From bd9363f6fc6b12ee8953683d6e53cead89fc99e7 Mon Sep 17 00:00:00 2001 From: Guofu Li Date: Mon, 22 Aug 2022 11:23:57 +0800 Subject: [PATCH] 1. Reorganized the directory structure. 2. Add `DDBBase` class to decrease repeated class-level constant definitino. 3. Add `DDBExpression` class for single quarter calculation and yoy claculation. --- .../scripts/{pit2.dos => single_quarter.dos} | 0 ipynb/ddb.ipynb | 86 +- ipynb/ddb_pit.ipynb | 870 +++++++++++++++++- src/DDBBase.py | 18 + src/DDBExpression.py | 365 ++++++++ src/DDBFactor.py | 14 +- src/DDBLoader.py | 23 +- .../HFDataTableMessage_pb2.py | 0 .../IndexFutureKLineMessage_pb2.py | 0 .../IndexFutureL1TickMessage_pb2.py | 0 .../IndexKLineMessage_pb2.py | 0 .../ProtoBuffEntitys}/IndexTickMessage_pb2.py | 0 .../ProtoBuffEntitys}/KLineMessage_pb2.py | 0 .../ProtoBuffEntitys}/OrderMessage_pb2.py | 0 .../ProtoBuffEntitys}/TickMessage_pb2.py | 0 .../ProtoBuffEntitys}/TickQueueMessage_pb2.py | 0 .../ProtoBuffEntitys}/TranseMessage_pb2.py | 0 17 files changed, 1312 insertions(+), 64 deletions(-) rename dolphindb/project/scripts/{pit2.dos => single_quarter.dos} (100%) create mode 100644 src/DDBBase.py create mode 100644 src/DDBExpression.py rename {ProtoBuffEntitys => src/ProtoBuffEntitys}/HFDataTableMessage_pb2.py (100%) rename {ProtoBuffEntitys => src/ProtoBuffEntitys}/IndexFutureKLineMessage_pb2.py (100%) rename {ProtoBuffEntitys => src/ProtoBuffEntitys}/IndexFutureL1TickMessage_pb2.py (100%) rename {ProtoBuffEntitys => src/ProtoBuffEntitys}/IndexKLineMessage_pb2.py (100%) rename {ProtoBuffEntitys => src/ProtoBuffEntitys}/IndexTickMessage_pb2.py (100%) rename {ProtoBuffEntitys => src/ProtoBuffEntitys}/KLineMessage_pb2.py (100%) rename {ProtoBuffEntitys => src/ProtoBuffEntitys}/OrderMessage_pb2.py (100%) rename {ProtoBuffEntitys => src/ProtoBuffEntitys}/TickMessage_pb2.py (100%) rename {ProtoBuffEntitys => src/ProtoBuffEntitys}/TickQueueMessage_pb2.py (100%) rename {ProtoBuffEntitys => src/ProtoBuffEntitys}/TranseMessage_pb2.py (100%) diff --git a/dolphindb/project/scripts/pit2.dos b/dolphindb/project/scripts/single_quarter.dos similarity index 100% rename from dolphindb/project/scripts/pit2.dos rename to dolphindb/project/scripts/single_quarter.dos diff --git a/ipynb/ddb.ipynb b/ipynb/ddb.ipynb index 028dbd2..e04a63e 100644 --- a/ipynb/ddb.ipynb +++ b/ipynb/ddb.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "7246e0c8-61cd-4cbf-a978-aa0dc0172d6d", "metadata": {}, "outputs": [], @@ -12,15 +12,27 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 3, "id": "5d0f471e-682e-43cc-abdb-7e52f3bbd707", "metadata": {}, "outputs": [], "source": [ - "sess = ddb.session('192.168.1.7', 8848)\n", + "sess = ddb.session('localhost', 8848)\n", "sess.login('admin', '123456')" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "fca58bdc-2aa1-4610-9a94-67d55f97a6e1", + "metadata": {}, + "outputs": [], + "source": [ + "sess.run(\"\"\"\n", + " migrate('/data/dolphindb/backup/', \"dfs://hft_stock_ts\", \"OrderPartitioned\")\n", + "\"\"\")" + ] + }, { "cell_type": "code", "execution_count": 67, @@ -1101,7 +1113,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 27, "id": "d68ea326-82c3-4a7c-97cf-c04dd8aee56b", "metadata": {}, "outputs": [ @@ -1126,44 +1138,24 @@ " \n", " \n", " \n", - " code\n", - " m_nDate\n", - " m_nTime\n", - " m_nPrice\n", - " m_iVolume\n", - " m_iTurover\n", - " m_nMatchItems\n", - " m_chTradeFlag\n", - " m_chBSFlag\n", - " m_iAccVolume\n", - " ...\n", - " m_nOpen\n", - " m_nPreClose\n", - " m_nAskPrice\n", - " m_nAskVolume\n", - " m_nBidPrice\n", - " m_nBidVolume\n", - " m_nAskAvPrice\n", - " m_nBidAvPrice\n", - " m_iTotalAskVolume\n", - " m_iTotalBidVolume\n", + " sum_cnt\n", " \n", " \n", " \n", + " \n", + " 0\n", + " 0\n", + " \n", " \n", "\n", - "

0 rows × 23 columns

\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: [code, m_nDate, m_nTime, m_nPrice, m_iVolume, m_iTurover, m_nMatchItems, m_chTradeFlag, m_chBSFlag, m_iAccVolume, m_iAccTurover, m_nHigh, m_nLow, m_nOpen, m_nPreClose, m_nAskPrice, m_nAskVolume, m_nBidPrice, m_nBidVolume, m_nAskAvPrice, m_nBidAvPrice, m_iTotalAskVolume, m_iTotalBidVolume]\n", - "Index: []\n", - "\n", - "[0 rows x 23 columns]" + " sum_cnt\n", + "0 0" ] }, - "execution_count": 33, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1173,7 +1165,35 @@ "sess.login('admin', '123456')\n", "sess.run(\"\"\"\n", "tbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\n", - "select * from tbl where code='000666.SZ', m_nDate <= 2013.02.26;\n", + "select sum(cnt) from (select count(*) as cnt from tbl map);\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "7cbc4906-7756-424a-9ce5-9d2b6d1bab4b", + "metadata": {}, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": " in run: Server response: 'tbl = loadTable(\"dfs://hft_stock_ts\", \"TickPartitioned\") => getFileBlocksMeta on path '/hft_stock_ts/TickPartitioned.tbl' failed, reason: path does not exist' script: '\ntbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\nselect sum(cnt) from (select count(*) as cnt from tbl map);\n'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [28]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m sess \u001b[38;5;241m=\u001b[39m ddb\u001b[38;5;241m.\u001b[39msession(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m192.168.1.167\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;241m8848\u001b[39m)\n\u001b[1;32m 2\u001b[0m sess\u001b[38;5;241m.\u001b[39mlogin(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madmin\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m123456\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43msess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;43mtbl = loadTable(\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdfs://hft_stock_ts\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mTickPartitioned\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m);\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43mselect sum(cnt) from (select count(*) as cnt from tbl map);\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.venv/tinysoft/lib/python3.8/site-packages/dolphindb/session.py:161\u001b[0m, in \u001b[0;36msession.run\u001b[0;34m(self, script, *args, **kwargs)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfetchSize\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m BlockReader(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcpp\u001b[38;5;241m.\u001b[39mrunBlock(script, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs))\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcpp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mscript\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mRuntimeError\u001b[0m: in run: Server response: 'tbl = loadTable(\"dfs://hft_stock_ts\", \"TickPartitioned\") => getFileBlocksMeta on path '/hft_stock_ts/TickPartitioned.tbl' failed, reason: path does not exist' script: '\ntbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\nselect sum(cnt) from (select count(*) as cnt from tbl map);\n'" + ] + } + ], + "source": [ + "sess = ddb.session('192.168.1.167', 8848)\n", + "sess.login('admin', '123456')\n", + "sess.run(\"\"\"\n", + "tbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\n", + "select sum(cnt) from (select count(*) as cnt from tbl map);\n", "\"\"\")" ] }, diff --git a/ipynb/ddb_pit.ipynb b/ipynb/ddb_pit.ipynb index 74a3632..5a8a333 100644 --- a/ipynb/ddb_pit.ipynb +++ b/ipynb/ddb_pit.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "139fd1cb-aedf-4186-8408-4d630ba69599", "metadata": {}, "outputs": [], @@ -639,7 +639,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 70, "id": "4cd4cd6e-f34e-43a8-98de-c468a54d8081", "metadata": {}, "outputs": [], @@ -656,7 +656,10 @@ "\treport_period = report_period_list;\n", "\t\n", "\tquery_table = table(report_period, m_nDate);\n", - "\tquery_table_exp = select * from cj(query_table, select code from source_table where partition(code, code_partition_id) group by code map);\n", + "\tquery_table_exp = select * from cj(\n", + " query_table, \n", + " select code from source_table where partition(code, code_partition_id\n", + " ) group by code map);\n", "\t\n", "\tcol_list = sqlCol(['code', 'report_period', 'appear_at_date', col_name]);\n", " from_tbl = ;\n", @@ -672,7 +675,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 71, "id": "943b760a-ab39-4291-8a93-81b3e38a70b7", "metadata": {}, "outputs": [ @@ -798,7 +801,7 @@ "[91 rows x 1 columns]" ] }, - "execution_count": 69, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } @@ -978,10 +981,865 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 181, "id": "3c246940-1ad6-414f-b461-2d8ca7cd87f1", "metadata": {}, "outputs": [], + "source": [ + "sess.run(\"\"\"\n", + " def single_quarter(table_name, col_list) {\n", + " \n", + " sel_col = array(ANY, 4 + size(col_list));\n", + " sel_col[0] = sqlCol('code');\n", + " sel_col[2] = sqlColAlias(, 'year');\n", + " \n", + " for (i in 0..(size(col_list) - 1)) {\n", + " sel_col[i + 4] = sqlCol(col_list[i]);\n", + " }\n", + "\n", + " // 当季累计数据,作为下一季基准\n", + " sel_col[1] = sqlColAlias(sqlCol('report_period'), 'base_report_period');\n", + " sel_col[3] = sqlColAlias(, 'quarter_of_year');\n", + " tbl_quarter_accum_base = sql(\n", + " select = sel_col,\n", + " from = table_name,\n", + " where = \n", + " ).eval();\n", + "\n", + " // 从第二季开始,需要去匹配前一季累计基数\n", + " sel_col[1] = sqlColAlias(sqlCol('report_period'), 'current_report_period');\n", + " sel_col[3] = sqlColAlias(, 'quarter_of_year');\n", + " tbl_quarter_accum = sql(\n", + " select = sel_col,\n", + " from = table_name,\n", + " where = 1>\n", + " ).eval();\n", + "\n", + " // 单季流量,把非第一季的季报都要扣除前一季度的基数\n", + " sel_col[1] = sqlColAlias(sqlCol('current_report_period'), 'report_period');\n", + " sel_col[2] = sqlCol('year');\n", + " sel_col[3] = sqlCol('quarter_of_year');\n", + " for (i in 0..(size(col_list) - 1)) {\n", + " sel_col[i + 4] = sqlColAlias(\n", + " expr(\n", + " sqlCol('tbl_quarter_accum_' + col_list[i]),\n", + " -,\n", + " sqlCol(col_list[i])\n", + " )\n", + " , col_list[i] + '_flux');\n", + " }\n", + " from_obj = ej(\n", + " tbl_quarter_accum_base, \n", + " tbl_quarter_accum, \n", + " `code`year`quarter_of_year, \n", + " `code`year`quarter_of_year);\n", + " tbl_quarter_flux = sql(\n", + " select = sel_col, \n", + " from = from_obj\n", + " ).eval();\n", + "\n", + " // 每年第一个季度\n", + " sel_col[1] = sqlCol('report_period');\n", + " sel_col[2] = sqlColAlias(, 'year');\n", + " sel_col[3] = sqlColAlias(, 'quarter_of_year');\n", + " for (i in 0..(size(col_list) - 1)) {\n", + " sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_flux');\n", + " }\n", + " tbl_quarter1 = sql(\n", + " select = sel_col,\n", + " from = table_name,\n", + " where = \n", + " ).eval();\n", + " \n", + " // 再拼接回第一季度(无需扣除基数的数据)\n", + " tbl_quarter_flux = unionAll(\n", + " tbl_quarter1, \n", + " tbl_quarter_flux\n", + " );\n", + " \n", + " return tbl_quarter_flux;\n", + " }\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "id": "981c8b85-a750-4d6b-bed9-c2567a95b2a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
codereport_periodyearquarter_of_yearTOT_OPER_REV_flux
0000400.SZ2019-03-31201911.032565e+09
82000400.SZ2019-06-30201922.021923e+09
81000400.SZ2019-09-30201932.183729e+09
80000400.SZ2019-12-31201944.917866e+09
1000558.SZ2019-03-31201912.361816e+07
..................
321688396.SH2019-12-31201941.610869e+09
79833874.NE2019-03-31201913.720237e+07
325833874.NE2019-06-30201924.538674e+07
324833874.NE2019-09-30201934.776034e+07
323833874.NE2019-12-31201944.816188e+07
\n", + "

326 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " code report_period year quarter_of_year TOT_OPER_REV_flux\n", + "0 000400.SZ 2019-03-31 2019 1 1.032565e+09\n", + "82 000400.SZ 2019-06-30 2019 2 2.021923e+09\n", + "81 000400.SZ 2019-09-30 2019 3 2.183729e+09\n", + "80 000400.SZ 2019-12-31 2019 4 4.917866e+09\n", + "1 000558.SZ 2019-03-31 2019 1 2.361816e+07\n", + ".. ... ... ... ... ...\n", + "321 688396.SH 2019-12-31 2019 4 1.610869e+09\n", + "79 833874.NE 2019-03-31 2019 1 3.720237e+07\n", + "325 833874.NE 2019-06-30 2019 2 4.538674e+07\n", + "324 833874.NE 2019-09-30 2019 3 4.776034e+07\n", + "323 833874.NE 2019-12-31 2019 4 4.816188e+07\n", + "\n", + "[326 rows x 5 columns]" + ] + }, + "execution_count": 182, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sess.run(\"single_quarter(tmp_table, ['TOT_OPER_REV'])\").sort_values(['code', 'report_period'])" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "1ceea0a7-7218-4aa7-be8a-ebb655dcbc93", + "metadata": {}, + "outputs": [], + "source": [ + "sess.run(\"\"\"\n", + " tmp_table = select * from pit_col_at_date(\n", + " \"is_common_ori\", \n", + " \"TOT_OPER_REV\", \n", + " 2021.03.14, \n", + " [2019.12.31, 2019.09.30, 2019.06.30, 2019.03.31], \n", + " 0) order by code\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "id": "d01d6fc2-15ed-4e40-9181-0f3e3a96d2cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
codereport_periodappear_at_dateTOT_OPER_REV
0000400.SZ2019-12-312020-04-101.015608e+10
1000400.SZ2019-09-302019-10-125.238217e+09
2000400.SZ2019-06-302019-08-243.054488e+09
3000400.SZ2019-03-312019-04-261.032565e+09
4000558.SZ2019-12-312020-08-251.378479e+08
...............
333831010.NE2019-06-302019-08-091.970813e+08
334833874.NE2019-12-312020-06-291.785113e+08
335833874.NE2019-09-302019-10-221.303494e+08
336833874.NE2019-06-302019-08-168.258911e+07
337833874.NE2019-03-312019-04-303.720237e+07
\n", + "

338 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " code report_period appear_at_date TOT_OPER_REV\n", + "0 000400.SZ 2019-12-31 2020-04-10 1.015608e+10\n", + "1 000400.SZ 2019-09-30 2019-10-12 5.238217e+09\n", + "2 000400.SZ 2019-06-30 2019-08-24 3.054488e+09\n", + "3 000400.SZ 2019-03-31 2019-04-26 1.032565e+09\n", + "4 000558.SZ 2019-12-31 2020-08-25 1.378479e+08\n", + ".. ... ... ... ...\n", + "333 831010.NE 2019-06-30 2019-08-09 1.970813e+08\n", + "334 833874.NE 2019-12-31 2020-06-29 1.785113e+08\n", + "335 833874.NE 2019-09-30 2019-10-22 1.303494e+08\n", + "336 833874.NE 2019-06-30 2019-08-16 8.258911e+07\n", + "337 833874.NE 2019-03-31 2019-04-30 3.720237e+07\n", + "\n", + "[338 rows x 4 columns]" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sess.run(\"tmp_table\")" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "5ac22809-9ba9-4a01-b76a-591558c4cc52", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'< select symbol,date,cumsum(volume) as cumVol from tmp_tablef0e858a3837f0000 >'" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sess.run(\"\"\"\n", + "sql(select=sqlCol(['symbol', 'date', 'cumsum(volume) as cumVol']), from = tmp_table)\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "dc36ca56-07e0-4912-b5d2-da192a1c3c62", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'< select symbol,date,cumsum(volume) as cumVol from tmp_tablef0e858a3837f0000 >'" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sess.run(\"\"\"\n", + "sql(select=(sqlCol(`symbol),sqlCol(`date),sqlColAlias(, `cumVol)), from=tmp_table)\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 211, + "id": "c3be05b5-c9b9-4df5-8481-b3092c25125e", + "metadata": {}, + "outputs": [], + "source": [ + "sess.run(\"\"\"\n", + " def yoy(table_name, col_list) {\n", + " sel_col = array(ANY, 4 + size(col_list));\n", + " sel_col[0] = sqlCol('code');\n", + " sel_col[3] = sqlColAlias(, 'quarter_of_year');\n", + " \n", + " // 上一年数据\n", + " for (i in 0..(size(col_list) - 1)) {\n", + " sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_base');\n", + " }\n", + " sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_last_year');\n", + " sel_col[2] = sqlColAlias(, 'year');\n", + " tbl_last_year = sql(\n", + " select = sel_col,\n", + " from = table_name\n", + " ).eval();\n", + "\n", + " // 本年度数据\n", + " for (i in 0..(size(col_list) - 1)) {\n", + " sel_col[i + 4] = sqlCol(col_list[i]);\n", + " }\n", + " sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_current_year');\n", + " sel_col[2] = sqlColAlias(, 'year');\n", + " tbl_this_year = sql(\n", + " select = sel_col,\n", + " from = table_name\n", + " ).eval();\n", + " \n", + " // 计算同比增长率\n", + " sel_col = array(ANY, 4 + (3 * size(col_list)));\n", + "\n", + " sel_col[0] = sqlCol('code');\n", + " sel_col[1] = sqlColAlias(sqlCol('report_period_current_year'), 'report_period');\n", + " sel_col[2] = sqlCol('year');\n", + " sel_col[3] = sqlCol('quarter_of_year');\n", + " \n", + " for (i in 0..(size(col_list) - 1)) {\n", + " sel_col[3 * i + 4] = sqlCol(col_list[i] + '_base');\n", + " sel_col[3 * i + 5] = sqlCol(col_list[i]);\n", + " sel_col[3 * i + 6] = sqlColAlias(\n", + " expr(expr(sqlCol(col_list[i]), -, sqlCol(col_list[i] + '_base')), /, expr(abs, sqlCol(col_list[i] + '_base'))), \n", + " col_list[i] + '_yoy');\n", + " }\n", + " from_obj = ej(\n", + " tbl_last_year, \n", + " tbl_this_year, \n", + " `code`year`quarter_of_year, \n", + " `code`year`quarter_of_year);\n", + " \n", + " return sql(\n", + " select = sel_col,\n", + " from = from_obj).eval();\n", + " }\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 212, + "id": "63492d0c-c348-4e8b-a08b-5feb279cee5e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
codereport_periodyearquarter_of_yearTOT_OPER_REV_baseTOT_OPER_REVTOT_OPER_REV_yoy
0000400.SZ2019-12-31201948.216559e+091.015608e+100.236051
1000400.SZ2018-12-31201841.033072e+108.216559e+09-0.204648
2000558.SZ2019-12-31201947.024741e+081.378479e+08-0.803768
3000558.SZ2018-12-31201841.324249e+097.024741e+08-0.469530
4000677.SZ2019-12-31201947.068198e+087.906742e+080.118636
........................
177688396.SH2018-12-31201845.875590e+096.270797e+090.067262
178831010.NE2019-12-31201942.936331e+083.552313e+080.209780
179831010.NE2018-12-31201842.209252e+082.936331e+080.329106
180833874.NE2019-12-31201941.974978e+081.785113e+08-0.096135
181833874.NE2018-12-31201841.986075e+081.974978e+08-0.005588
\n", + "

182 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " code report_period year quarter_of_year TOT_OPER_REV_base \\\n", + "0 000400.SZ 2019-12-31 2019 4 8.216559e+09 \n", + "1 000400.SZ 2018-12-31 2018 4 1.033072e+10 \n", + "2 000558.SZ 2019-12-31 2019 4 7.024741e+08 \n", + "3 000558.SZ 2018-12-31 2018 4 1.324249e+09 \n", + "4 000677.SZ 2019-12-31 2019 4 7.068198e+08 \n", + ".. ... ... ... ... ... \n", + "177 688396.SH 2018-12-31 2018 4 5.875590e+09 \n", + "178 831010.NE 2019-12-31 2019 4 2.936331e+08 \n", + "179 831010.NE 2018-12-31 2018 4 2.209252e+08 \n", + "180 833874.NE 2019-12-31 2019 4 1.974978e+08 \n", + "181 833874.NE 2018-12-31 2018 4 1.986075e+08 \n", + "\n", + " TOT_OPER_REV TOT_OPER_REV_yoy \n", + "0 1.015608e+10 0.236051 \n", + "1 8.216559e+09 -0.204648 \n", + "2 1.378479e+08 -0.803768 \n", + "3 7.024741e+08 -0.469530 \n", + "4 7.906742e+08 0.118636 \n", + ".. ... ... \n", + "177 6.270797e+09 0.067262 \n", + "178 3.552313e+08 0.209780 \n", + "179 2.936331e+08 0.329106 \n", + "180 1.785113e+08 -0.096135 \n", + "181 1.974978e+08 -0.005588 \n", + "\n", + "[182 rows x 7 columns]" + ] + }, + "execution_count": 212, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sess.run(\"yoy(tmp_table, ['TOT_OPER_REV'])\")" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "id": "5273ebfc-25bd-4b38-9605-d3109cf2280f", + "metadata": {}, + "outputs": [], + "source": [ + "sess.run(\"\"\"\n", + " tmp_table = select * from pit_col_at_date(\n", + " \"is_common_ori\", \n", + " \"TOT_OPER_REV\", \n", + " 2021.03.14, \n", + " [2019.12.31, 2018.12.31, 2017.12.31], \n", + " 0) order by code\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "id": "99af13cd-5918-4d21-9067-d3da86d19c15", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
codereport_periodappear_at_dateTOT_OPER_REV
0000400.SZ2019-12-312020-04-101.015608e+10
1000400.SZ2018-12-312019-03-298.216559e+09
2000400.SZ2017-12-312018-03-241.033072e+10
3000558.SZ2019-12-312020-08-251.378479e+08
4000558.SZ2018-12-312019-04-267.024741e+08
...............
268831010.NE2018-12-312020-07-012.936331e+08
269831010.NE2017-12-312020-07-012.209252e+08
270833874.NE2019-12-312020-06-291.785113e+08
271833874.NE2018-12-312020-06-291.974978e+08
272833874.NE2017-12-312020-06-291.986075e+08
\n", + "

273 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " code report_period appear_at_date TOT_OPER_REV\n", + "0 000400.SZ 2019-12-31 2020-04-10 1.015608e+10\n", + "1 000400.SZ 2018-12-31 2019-03-29 8.216559e+09\n", + "2 000400.SZ 2017-12-31 2018-03-24 1.033072e+10\n", + "3 000558.SZ 2019-12-31 2020-08-25 1.378479e+08\n", + "4 000558.SZ 2018-12-31 2019-04-26 7.024741e+08\n", + ".. ... ... ... ...\n", + "268 831010.NE 2018-12-31 2020-07-01 2.936331e+08\n", + "269 831010.NE 2017-12-31 2020-07-01 2.209252e+08\n", + "270 833874.NE 2019-12-31 2020-06-29 1.785113e+08\n", + "271 833874.NE 2018-12-31 2020-06-29 1.974978e+08\n", + "272 833874.NE 2017-12-31 2020-06-29 1.986075e+08\n", + "\n", + "[273 rows x 4 columns]" + ] + }, + "execution_count": 184, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sess.run(\"tmp_table\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87820a2a-1ddf-4950-b0d4-a3e7d4a61b16", + "metadata": {}, + "outputs": [], "source": [] } ], diff --git a/src/DDBBase.py b/src/DDBBase.py new file mode 100644 index 0000000..04f35df --- /dev/null +++ b/src/DDBBase.py @@ -0,0 +1,18 @@ + +import dolphindb as ddb + + +class DDBBase(object): + + ddb_config = { + 'host' : '192.168.1.167', + 'username' : 'admin', + 'password' : '123456', + } + + + def __init__(self): + self.ddb_sess = ddb.session(self.ddb_config['host'], 8848) + self.login(self.ddb_config['username'], self.ddb_config['password']) + + diff --git a/src/DDBExpression.py b/src/DDBExpression.py new file mode 100644 index 0000000..5c728e9 --- /dev/null +++ b/src/DDBExpression.py @@ -0,0 +1,365 @@ +from tqdm import tqdm + +import numpy as np +import pandas as pd +import dolphindb as ddb + +from DDBBase import DDBBase +from DDBLoader import DDBPITLoader + + +class DDBExpression(DDBBase): + """ + 调用事先定义好的DolphinDB的Function或者FunctionView来进行表达式计算。 + + 定义DolphinDB中的Function或者FunctionView的具体逻辑在`DDBExpreCreator`中。 + """ + + def pit_at_date(self, table_name, date, report_period_list): + """ + 获得任何时间点上,能看到的某个财报最新可见数据。 + + 实现方式: + 通过调用事先在DolpinDB中创建的Function View。 + + Args: + table_name: 财务报表名字,包含是否为调整报表的后缀名(非stem名) + date: 观察日期,使用DolphinDB的日期格式,即YYYY.MM.DD,无需加引号 + report_period_list: 报告期列表,列表中每一个日期为一个DolphinDb的日期格式 + + Return: + DataFrame: 在观察日期`date`日能看到的,所有公司最近能看到的,报告期在`report_period_list`中的,财务报告表`table_name`的截面数据。 + """ + df_list = [] + for i in range(DDBPITLoader.num_code_partition): + _df = self.ddb_sess.run(""" + select * from pit_at_date("{table_name}", {date}, [{report_period_list}], {code_partition_id}); + """.format( + table_name = table_name, + date = date, + report_period_list = ','.join(report_period_list), + code_parition_id = i + )) + _df.set_index(['code', 'report_period', 'appear_at_date'], inplace=True) + df_list.append(_df) + return pd.concat(df_list, axis=1) + + + def pit_col_at_date(self, table_name, col_name, date, report_period_list): + """ + 获得任何时间点上,能看到的某个财报最新可见数据。 + + 实现方式: + 通过调用事先在DolpinDB中创建的Function View。 + + Args: + table_name: 财务报表名字,包含是否为调整报表的后缀名(非stem名) + col_name: 列名,一般为具体财务科目名字。 + date: 观察日期,使用DolphinDB的日期格式,即YYYY.MM.DD,无需加引号 + report_period_list: 报告期列表,列表中每一个日期为一个DolphinDb的日期格式 + + Return: + DataFrame: 在观察日期`date`日能看到的,所有公司最近能看到的,报告期在`report_period_list`中的,财务报告表`table_name`中``col_name`指标的截面数据。 + + """ + df_list = [] + for i in range(DDBPITLoader.num_code_partition): + _df = self.ddb_sess.run(""" + select * from pit_col_at_date("{table_name}", "{col_name}", {date}, [{report_period_list}], {code_partition_id}); + """.format( + table_name = table_name, + col_name = col_name, + date = date, + report_period_list = ','.join(report_period_list), + code_parition_id = i + )) + _df.set_index(['code', 'report_period', 'appear_at_date'], inplace=True) + df_list.append(_df) + return pd.concat(df_list, axis=1) + + + def yoy(self, dataframe, col_list): + self.ddb_sess.upload({'table_yoy' : dataframe }) + self.ddb_sess.run(""" + yoy({table_name}, [{col_list}]); + """.format( + table_name = 'table_yoy', + col_list = ','.join([f"'{col_name}'" for col_name in col_list]) + )) + + + def single_quarter(self, dataframe, col_list): + self.ddb_sess.upload({'table_single_quarter' : dataframe}) + self.ddb_sess.run(""" + single_quarter({table_name}, [{col_list}]) + """.format( + table_name = table_name, + col_list = ','.join([f"'{col_name}'" for col_name in col_list]) + )) + + + +class DDBExpreCreator(DDBBase): + """ + 定义DolphinDB中的各种基础操作,因此这个类中的大部分实质性代码都是DolphinDB自己的语言,而不是Python语言编写的。 + + `Expre`为`Expression`的简写。 + """ + pit_dbpath = "dfs://pit_stock_ts" + pit_dbname = "db_pit_stock" + + daily_dbpath = "dfs://daily_stock_ts" + daily_dbname = "db_daily_stock" + + + def create_function_views(self): + self._func_view_pit_at_date() + self._func_view_pit_col_at_date() + + + def create_functions(self): + self._fun_yoy(); + self._func_single_quarter() + + + def _func_single_quarter(self): + code = """ + /** + * Args: + * table: DolphinDB的表,可以是一个内存表,也可以是一个分区表。 + * col_list: 需要进行单季转换的字段名列表。 + */ + def single_quarter(table_name, col_list) { + + sel_col = array(ANY, 4 + size(col_list)); + sel_col[0] = sqlCol('code'); + sel_col[2] = sqlColAlias(, 'year'); + + for (i in 0..(size(col_list) - 1)) { + sel_col[i + 4] = sqlCol(col_list[i]); + } + + // 当季累计数据,作为下一季基准 + sel_col[1] = sqlColAlias(sqlCol('report_period'), 'base_report_period'); + sel_col[3] = sqlColAlias(, 'quarter_of_year'); + tbl_quarter_accum_base = sql( + select = sel_col, + from = table_name, + where = + ).eval(); + + // 从第二季开始,需要去匹配前一季累计基数 + sel_col[1] = sqlColAlias(sqlCol('report_period'), 'current_report_period'); + sel_col[3] = sqlColAlias(, 'quarter_of_year'); + tbl_quarter_accum = sql( + select = sel_col, + from = table_name, + where = 1> + ).eval(); + + // 单季流量,把非第一季的季报都要扣除前一季度的基数 + sel_col[1] = sqlColAlias(sqlCol('current_report_period'), 'report_period'); + sel_col[2] = sqlCol('year'); + sel_col[3] = sqlCol('quarter_of_year'); + for (i in 0..(size(col_list) - 1)) { + sel_col[i + 4] = sqlColAlias( + expr( + sqlCol('tbl_quarter_accum_' + col_list[i]), + -, + sqlCol(col_list[i]) + ) + , col_list[i] + '_flux'); + } + from_obj = ej( + tbl_quarter_accum_base, + tbl_quarter_accum, + `code`year`quarter_of_year, + `code`year`quarter_of_year); + tbl_quarter_flux = sql( + select = sel_col, + from = from_obj + ).eval(); + + // 每年第一个季度 + sel_col[1] = sqlCol('report_period'); + sel_col[2] = sqlColAlias(, 'year'); + sel_col[3] = sqlColAlias(, 'quarter_of_year'); + for (i in 0..(size(col_list) - 1)) { + sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_flux'); + } + tbl_quarter1 = sql( + select = sel_col, + from = table_name, + where = + ).eval(); + + // 再拼接回第一季度(无需扣除基数的数据) + tbl_quarter_flux = unionAll( + tbl_quarter1, + tbl_quarter_flux + ); + + return tbl_quarter_flux; + } + """ + self.ddb_sess.run(code) + + + def _func_yoy(self): + code = """ + /** + * Args: + * table: DolphinDB的表,可以是一个内存表,也可以是一个分区表。 + * col_list: 需要进行同比转换的字段名。 + */ + def yoy(table_name, col_list) { + sel_col = array(ANY, 4 + size(col_list)); + sel_col[0] = sqlCol('code'); + sel_col[3] = sqlColAlias(, 'quarter_of_year'); + + // 上一年数据 + for (i in 0..(size(col_list) - 1)) { + sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_base'); + } + sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_last_year'); + sel_col[2] = sqlColAlias(, 'year'); + tbl_last_year = sql( + select = sel_col, + from = table_name + ).eval(); + + // 本年度数据 + for (i in 0..(size(col_list) - 1)) { + sel_col[i + 4] = sqlCol(col_list[i]); + } + sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_current_year'); + sel_col[2] = sqlColAlias(, 'year'); + tbl_this_year = sql( + select = sel_col, + from = table_name + ).eval(); + + // 计算同比增长率 + sel_col = array(ANY, 4 + (3 * size(col_list))); + + sel_col[0] = sqlCol('code'); + sel_col[1] = sqlColAlias(sqlCol('report_period_current_year'), 'report_period'); + sel_col[2] = sqlCol('year'); + sel_col[3] = sqlCol('quarter_of_year'); + + for (i in 0..(size(col_list) - 1)) { + sel_col[3 * i + 4] = sqlCol(col_list[i] + '_base'); + sel_col[3 * i + 5] = sqlCol(col_list[i]); + sel_col[3 * i + 6] = sqlColAlias( + expr(expr(sqlCol(col_list[i]), -, sqlCol(col_list[i] + '_base')), /, expr(abs, sqlCol(col_list[i] + '_base'))), + col_list[i] + '_yoy'); + } + from_obj = ej( + tbl_last_year, + tbl_this_year, + `code`year`quarter_of_year, + `code`year`quarter_of_year); + + return sql( + select = sel_col, + from = from_obj).eval(); + } + """ + self.ddb_sess.run(code) + + + def _func_view_pit_at_date(self): + """ + 创建`pit_at_date`,获得任何时间点上,能看到的某个财报最新可见数据。 + """ + self.ddb_sess.run(""" + dropFunctionView('pit_at_date'); + """) + + self.ddb_sess.run(""" + /** + * Example of use: + * pit_at_date("is_common_ori", "TOT_OPER_REV", 2021.03.14, [2019.12.31, 2019.09.30, 2019.06.30], 0) + */ + def pit_at_date(table_name, date, report_period_list, code_partition_id) { + source_table = loadTable("{pit_dbpath}", table_name); + + m_nDate = take(date, size(report_period_list)); + report_period = report_period_list; + + query_table = table(report_period, m_nDate); + query_table_exp = select * from cj( + query_table, + select code + from source_table + where partition(code, code_partition_id) + group by code map + ); + + source_table_part = select source_table.* from ej( + source_table, query_table_exp, `code`report_period + ) where partition(code, code_partition_id); + + return select source_table_part.* from aj( + query_table_exp, + source_table_part, + `code`report_period`m_nDate, + `code`report_period`appear_at_date + ) where not isNull(source_table_part.code); + } + + addFunctionView(pit_at_date); + """.format( + pit_dbpath = pit_dbpath + )) + + + def _func_view_pit_col_at_date(self): + """ + 创建`pit_col_at_date`,获得任何日期点上,能看到的财报中某一个指标的最新可见数据。 + 与`pit_at_date`的区别在于,`pit_col_at_date`返回返回一个序列,而`pit_at_date`返回一整张表。 + """ + # 先把原有的View Function删除 + self.ddb_sess.run(""" + dropFunctionView('pit_col_at_date'); + """) + + self.ddb_sess.run(""" + /** + * Example of use: + * pit_col_at_date("is_common_ori", "TOT_OPER_REV", 2021.03.14, [2019.12.31, 2019.09.30, 2019.06.30], 0) + */ + def pit_col_at_date(table_name, col_name, date, report_period_list, code_partition_id) { + source_table = loadTable("{pit_dbpath}", table_name); + + m_nDate = take(date, size(report_period_list)); + report_period = report_period_list; + + query_table = table(report_period, m_nDate); + query_table_exp = select * from cj( + query_table, + select code + from source_table + where partition(code, code_partition_id) + group by code map + ); + + col_list = sqlCol(['code', 'report_period', 'appear_at_date', col_name]); + from_tbl = ; + where_conditions = []; + source_table_part = sql(select=col_list, from=from_tbl, where=where_conditions).eval(); + + return select source_table_part.* from aj( + query_table_exp, + source_table_part, + `code`report_period`m_nDate, + `code`report_period`appear_at_date + ) where not isNull(source_table_part.code) + } + + addFunctionView(pit_col_at_date); + """.format( + pit_dbpath = self.pit_dbpath + )) + + diff --git a/src/DDBFactor.py b/src/DDBFactor.py index c0470cb..0a7b456 100644 --- a/src/DDBFactor.py +++ b/src/DDBFactor.py @@ -5,10 +5,10 @@ import functools import dolphindb as ddb +from DDBBase import DDBBase from DDBLoader import DDBLoader - def load_ddb_table(hft_tbl_name): """ 这是一个用来简化载入分区表过程的语法糖,但似乎需要预先调用这个函数的场景并不多,简化效果不是很明显。 @@ -30,27 +30,17 @@ def load_ddb_table(hft_tbl_name): return decorator -class DailyFactor(object): +class DDBDailyFactor(DDBBase): #ddb_hft_path = "dfs://hft_stock_ts" #ddb_hft_dbname = "db_hft_stock" ddb_daily_path = "dfs://daily_stock_ts" ddb_daily_dbname = "db_daily_stock" - ddb_config = { - 'host' : '192.168.1.167', - 'username' : 'admin', - 'password' : '123456', - } - # 这里的partition数量未必需要和hft表的一致 # 当读取hft表的时候,需要使用DDBLoader中的`num_code_partition`,而不是此字段 num_code_partition = 50 - def __init__(self): - self.ddb_sess = ddb.session(self.ddb_config['host'], 8848) - self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password']) - def create_ddb_database(self): """ diff --git a/src/DDBLoader.py b/src/DDBLoader.py index e26c465..84d6c48 100644 --- a/src/DDBLoader.py +++ b/src/DDBLoader.py @@ -23,9 +23,10 @@ import sqlalchemy as sa import ProtoBuffEntitys +from DDBBase import DDBBase -class DDBLoader(object): +class DDBLoader(DDBBase): """ - 放了几个公用的配置字段,包括: 1. SQL-Server的链接参数 @@ -42,14 +43,8 @@ class DDBLoader(object): 'password' : 'passw0rd!' } - ddb_config = { - 'host' : '192.168.1.167', - 'username' : 'admin', - 'password' : '123456' - } - - def __init__(self): + super().__init__() self.mssql_engine = sa.create_engine( "mssql+pyodbc://{username}:{password}@{host}/master?driver=ODBC+Driver+18+for+SQL+Server".format(**self.mssql_config), connect_args = { @@ -57,9 +52,6 @@ class DDBLoader(object): }, echo=False ) - self.ddb_sess = ddb.session(self.ddb_config['host'], 8848) - self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password']) - @abc.abstractmethod def create_ddb_database(self, *args, **kwargs): @@ -552,7 +544,7 @@ class DDBHFTLoader(DDBLoader): num_workers = 8 default_table_capacity = 10000 - ddb_dump_journal_fname = 'ddb_dump_journal.csv' + ddb_dump_journal_fname = '../assets/ddb_dump_journal.csv' def init_ddb_database(self, df_calendar): @@ -913,7 +905,9 @@ class DDBHFTLoader(DDBLoader): ddb_sess.login(DDBLoader.ddb_config['username'], DDBLoader.ddb_config['password']) ddb_sess.upload({df_table_name : df}) - ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( + # 因为在做Tick数据的时候,偶然发生'CHUNK[xxx] does not exist.',所以在这里使用`append!`函数代换一下试试 + ddb_sess.run("append!(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( + #ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( dbPath = DDBLoader.ddb_path, partitioned_table_name = type_name + DDBLoader.ddb_partition_table_suffix, df_table_name = df_table_name @@ -923,6 +917,9 @@ class DDBHFTLoader(DDBLoader): def main(): + # TODO: + # 可以使用`Fire`库,对函数调用再做一次封装,就可以避免每次运行不同参数时候需要修改内部多处的代码。 + # PIT基本面数据 loader = DDBPITLoader() loader.create_ddb_database() diff --git a/ProtoBuffEntitys/HFDataTableMessage_pb2.py b/src/ProtoBuffEntitys/HFDataTableMessage_pb2.py similarity index 100% rename from ProtoBuffEntitys/HFDataTableMessage_pb2.py rename to src/ProtoBuffEntitys/HFDataTableMessage_pb2.py diff --git a/ProtoBuffEntitys/IndexFutureKLineMessage_pb2.py b/src/ProtoBuffEntitys/IndexFutureKLineMessage_pb2.py similarity index 100% rename from ProtoBuffEntitys/IndexFutureKLineMessage_pb2.py rename to src/ProtoBuffEntitys/IndexFutureKLineMessage_pb2.py diff --git a/ProtoBuffEntitys/IndexFutureL1TickMessage_pb2.py b/src/ProtoBuffEntitys/IndexFutureL1TickMessage_pb2.py similarity index 100% rename from ProtoBuffEntitys/IndexFutureL1TickMessage_pb2.py rename to src/ProtoBuffEntitys/IndexFutureL1TickMessage_pb2.py diff --git a/ProtoBuffEntitys/IndexKLineMessage_pb2.py b/src/ProtoBuffEntitys/IndexKLineMessage_pb2.py similarity index 100% rename from ProtoBuffEntitys/IndexKLineMessage_pb2.py rename to src/ProtoBuffEntitys/IndexKLineMessage_pb2.py diff --git a/ProtoBuffEntitys/IndexTickMessage_pb2.py b/src/ProtoBuffEntitys/IndexTickMessage_pb2.py similarity index 100% rename from ProtoBuffEntitys/IndexTickMessage_pb2.py rename to src/ProtoBuffEntitys/IndexTickMessage_pb2.py diff --git a/ProtoBuffEntitys/KLineMessage_pb2.py b/src/ProtoBuffEntitys/KLineMessage_pb2.py similarity index 100% rename from ProtoBuffEntitys/KLineMessage_pb2.py rename to src/ProtoBuffEntitys/KLineMessage_pb2.py diff --git a/ProtoBuffEntitys/OrderMessage_pb2.py b/src/ProtoBuffEntitys/OrderMessage_pb2.py similarity index 100% rename from ProtoBuffEntitys/OrderMessage_pb2.py rename to src/ProtoBuffEntitys/OrderMessage_pb2.py diff --git a/ProtoBuffEntitys/TickMessage_pb2.py b/src/ProtoBuffEntitys/TickMessage_pb2.py similarity index 100% rename from ProtoBuffEntitys/TickMessage_pb2.py rename to src/ProtoBuffEntitys/TickMessage_pb2.py diff --git a/ProtoBuffEntitys/TickQueueMessage_pb2.py b/src/ProtoBuffEntitys/TickQueueMessage_pb2.py similarity index 100% rename from ProtoBuffEntitys/TickQueueMessage_pb2.py rename to src/ProtoBuffEntitys/TickQueueMessage_pb2.py diff --git a/ProtoBuffEntitys/TranseMessage_pb2.py b/src/ProtoBuffEntitys/TranseMessage_pb2.py similarity index 100% rename from ProtoBuffEntitys/TranseMessage_pb2.py rename to src/ProtoBuffEntitys/TranseMessage_pb2.py