diff --git a/dolphindb/project/scripts/pit2.dos b/dolphindb/project/scripts/single_quarter.dos
similarity index 100%
rename from dolphindb/project/scripts/pit2.dos
rename to dolphindb/project/scripts/single_quarter.dos
diff --git a/ipynb/ddb.ipynb b/ipynb/ddb.ipynb
index 028dbd2..e04a63e 100644
--- a/ipynb/ddb.ipynb
+++ b/ipynb/ddb.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 2,
"id": "7246e0c8-61cd-4cbf-a978-aa0dc0172d6d",
"metadata": {},
"outputs": [],
@@ -12,15 +12,27 @@
},
{
"cell_type": "code",
- "execution_count": 66,
+ "execution_count": 3,
"id": "5d0f471e-682e-43cc-abdb-7e52f3bbd707",
"metadata": {},
"outputs": [],
"source": [
- "sess = ddb.session('192.168.1.7', 8848)\n",
+ "sess = ddb.session('localhost', 8848)\n",
"sess.login('admin', '123456')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fca58bdc-2aa1-4610-9a94-67d55f97a6e1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sess.run(\"\"\"\n",
+ " migrate('/data/dolphindb/backup/', \"dfs://hft_stock_ts\", \"OrderPartitioned\")\n",
+ "\"\"\")"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 67,
@@ -1101,7 +1113,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 27,
"id": "d68ea326-82c3-4a7c-97cf-c04dd8aee56b",
"metadata": {},
"outputs": [
@@ -1126,44 +1138,24 @@
" \n",
" \n",
" | \n",
- " code | \n",
- " m_nDate | \n",
- " m_nTime | \n",
- " m_nPrice | \n",
- " m_iVolume | \n",
- " m_iTurover | \n",
- " m_nMatchItems | \n",
- " m_chTradeFlag | \n",
- " m_chBSFlag | \n",
- " m_iAccVolume | \n",
- " ... | \n",
- " m_nOpen | \n",
- " m_nPreClose | \n",
- " m_nAskPrice | \n",
- " m_nAskVolume | \n",
- " m_nBidPrice | \n",
- " m_nBidVolume | \n",
- " m_nAskAvPrice | \n",
- " m_nBidAvPrice | \n",
- " m_iTotalAskVolume | \n",
- " m_iTotalBidVolume | \n",
+ " sum_cnt | \n",
"
\n",
" \n",
"
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
" \n",
"\n",
- "0 rows × 23 columns
\n",
""
],
"text/plain": [
- "Empty DataFrame\n",
- "Columns: [code, m_nDate, m_nTime, m_nPrice, m_iVolume, m_iTurover, m_nMatchItems, m_chTradeFlag, m_chBSFlag, m_iAccVolume, m_iAccTurover, m_nHigh, m_nLow, m_nOpen, m_nPreClose, m_nAskPrice, m_nAskVolume, m_nBidPrice, m_nBidVolume, m_nAskAvPrice, m_nBidAvPrice, m_iTotalAskVolume, m_iTotalBidVolume]\n",
- "Index: []\n",
- "\n",
- "[0 rows x 23 columns]"
+ " sum_cnt\n",
+ "0 0"
]
},
- "execution_count": 33,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -1173,7 +1165,35 @@
"sess.login('admin', '123456')\n",
"sess.run(\"\"\"\n",
"tbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\n",
- "select * from tbl where code='000666.SZ', m_nDate <= 2013.02.26;\n",
+ "select sum(cnt) from (select count(*) as cnt from tbl map);\n",
+ "\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "7cbc4906-7756-424a-9ce5-9d2b6d1bab4b",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "RuntimeError",
+ "evalue": " in run: Server response: 'tbl = loadTable(\"dfs://hft_stock_ts\", \"TickPartitioned\") => getFileBlocksMeta on path '/hft_stock_ts/TickPartitioned.tbl' failed, reason: path does not exist' script: '\ntbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\nselect sum(cnt) from (select count(*) as cnt from tbl map);\n'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
+ "Input \u001b[0;32mIn [28]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m sess \u001b[38;5;241m=\u001b[39m ddb\u001b[38;5;241m.\u001b[39msession(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m192.168.1.167\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;241m8848\u001b[39m)\n\u001b[1;32m 2\u001b[0m sess\u001b[38;5;241m.\u001b[39mlogin(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madmin\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m123456\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43msess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;43mtbl = loadTable(\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdfs://hft_stock_ts\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mTickPartitioned\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m);\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43mselect sum(cnt) from (select count(*) as cnt from tbl map);\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/.venv/tinysoft/lib/python3.8/site-packages/dolphindb/session.py:161\u001b[0m, in \u001b[0;36msession.run\u001b[0;34m(self, script, *args, **kwargs)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfetchSize\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m BlockReader(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcpp\u001b[38;5;241m.\u001b[39mrunBlock(script, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs))\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcpp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mscript\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "\u001b[0;31mRuntimeError\u001b[0m: in run: Server response: 'tbl = loadTable(\"dfs://hft_stock_ts\", \"TickPartitioned\") => getFileBlocksMeta on path '/hft_stock_ts/TickPartitioned.tbl' failed, reason: path does not exist' script: '\ntbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\nselect sum(cnt) from (select count(*) as cnt from tbl map);\n'"
+ ]
+ }
+ ],
+ "source": [
+ "sess = ddb.session('192.168.1.167', 8848)\n",
+ "sess.login('admin', '123456')\n",
+ "sess.run(\"\"\"\n",
+ "tbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\n",
+ "select sum(cnt) from (select count(*) as cnt from tbl map);\n",
"\"\"\")"
]
},
diff --git a/ipynb/ddb_pit.ipynb b/ipynb/ddb_pit.ipynb
index 74a3632..5a8a333 100644
--- a/ipynb/ddb_pit.ipynb
+++ b/ipynb/ddb_pit.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 3,
"id": "139fd1cb-aedf-4186-8408-4d630ba69599",
"metadata": {},
"outputs": [],
@@ -639,7 +639,7 @@
},
{
"cell_type": "code",
- "execution_count": 67,
+ "execution_count": 70,
"id": "4cd4cd6e-f34e-43a8-98de-c468a54d8081",
"metadata": {},
"outputs": [],
@@ -656,7 +656,10 @@
"\treport_period = report_period_list;\n",
"\t\n",
"\tquery_table = table(report_period, m_nDate);\n",
- "\tquery_table_exp = select * from cj(query_table, select code from source_table where partition(code, code_partition_id) group by code map);\n",
+ "\tquery_table_exp = select * from cj(\n",
+ " query_table, \n",
+ " select code from source_table where partition(code, code_partition_id\n",
+ " ) group by code map);\n",
"\t\n",
"\tcol_list = sqlCol(['code', 'report_period', 'appear_at_date', col_name]);\n",
" from_tbl = ;\n",
@@ -672,7 +675,7 @@
},
{
"cell_type": "code",
- "execution_count": 69,
+ "execution_count": 71,
"id": "943b760a-ab39-4291-8a93-81b3e38a70b7",
"metadata": {},
"outputs": [
@@ -798,7 +801,7 @@
"[91 rows x 1 columns]"
]
},
- "execution_count": 69,
+ "execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
@@ -978,10 +981,865 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 181,
"id": "3c246940-1ad6-414f-b461-2d8ca7cd87f1",
"metadata": {},
"outputs": [],
+ "source": [
+ "sess.run(\"\"\"\n",
+ " def single_quarter(table_name, col_list) {\n",
+ " \n",
+ " sel_col = array(ANY, 4 + size(col_list));\n",
+ " sel_col[0] = sqlCol('code');\n",
+ " sel_col[2] = sqlColAlias(, 'year');\n",
+ " \n",
+ " for (i in 0..(size(col_list) - 1)) {\n",
+ " sel_col[i + 4] = sqlCol(col_list[i]);\n",
+ " }\n",
+ "\n",
+ " // 当季累计数据,作为下一季基准\n",
+ " sel_col[1] = sqlColAlias(sqlCol('report_period'), 'base_report_period');\n",
+ " sel_col[3] = sqlColAlias(, 'quarter_of_year');\n",
+ " tbl_quarter_accum_base = sql(\n",
+ " select = sel_col,\n",
+ " from = table_name,\n",
+ " where = \n",
+ " ).eval();\n",
+ "\n",
+ " // 从第二季开始,需要去匹配前一季累计基数\n",
+ " sel_col[1] = sqlColAlias(sqlCol('report_period'), 'current_report_period');\n",
+ " sel_col[3] = sqlColAlias(, 'quarter_of_year');\n",
+ " tbl_quarter_accum = sql(\n",
+ " select = sel_col,\n",
+ " from = table_name,\n",
+ " where = 1>\n",
+ " ).eval();\n",
+ "\n",
+ " // 单季流量,把非第一季的季报都要扣除前一季度的基数\n",
+ " sel_col[1] = sqlColAlias(sqlCol('current_report_period'), 'report_period');\n",
+ " sel_col[2] = sqlCol('year');\n",
+ " sel_col[3] = sqlCol('quarter_of_year');\n",
+ " for (i in 0..(size(col_list) - 1)) {\n",
+ " sel_col[i + 4] = sqlColAlias(\n",
+ " expr(\n",
+ " sqlCol('tbl_quarter_accum_' + col_list[i]),\n",
+ " -,\n",
+ " sqlCol(col_list[i])\n",
+ " )\n",
+ " , col_list[i] + '_flux');\n",
+ " }\n",
+ " from_obj = ej(\n",
+ " tbl_quarter_accum_base, \n",
+ " tbl_quarter_accum, \n",
+ " `code`year`quarter_of_year, \n",
+ " `code`year`quarter_of_year);\n",
+ " tbl_quarter_flux = sql(\n",
+ " select = sel_col, \n",
+ " from = from_obj\n",
+ " ).eval();\n",
+ "\n",
+ " // 每年第一个季度\n",
+ " sel_col[1] = sqlCol('report_period');\n",
+ " sel_col[2] = sqlColAlias(, 'year');\n",
+ " sel_col[3] = sqlColAlias(, 'quarter_of_year');\n",
+ " for (i in 0..(size(col_list) - 1)) {\n",
+ " sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_flux');\n",
+ " }\n",
+ " tbl_quarter1 = sql(\n",
+ " select = sel_col,\n",
+ " from = table_name,\n",
+ " where = \n",
+ " ).eval();\n",
+ " \n",
+ " // 再拼接回第一季度(无需扣除基数的数据)\n",
+ " tbl_quarter_flux = unionAll(\n",
+ " tbl_quarter1, \n",
+ " tbl_quarter_flux\n",
+ " );\n",
+ " \n",
+ " return tbl_quarter_flux;\n",
+ " }\n",
+ "\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 182,
+ "id": "981c8b85-a750-4d6b-bed9-c2567a95b2a3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " code | \n",
+ " report_period | \n",
+ " year | \n",
+ " quarter_of_year | \n",
+ " TOT_OPER_REV_flux | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 000400.SZ | \n",
+ " 2019-03-31 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " 1.032565e+09 | \n",
+ " \n",
+ " \n",
+ " 82 | \n",
+ " 000400.SZ | \n",
+ " 2019-06-30 | \n",
+ " 2019 | \n",
+ " 2 | \n",
+ " 2.021923e+09 | \n",
+ " \n",
+ " \n",
+ " 81 | \n",
+ " 000400.SZ | \n",
+ " 2019-09-30 | \n",
+ " 2019 | \n",
+ " 3 | \n",
+ " 2.183729e+09 | \n",
+ " \n",
+ " \n",
+ " 80 | \n",
+ " 000400.SZ | \n",
+ " 2019-12-31 | \n",
+ " 2019 | \n",
+ " 4 | \n",
+ " 4.917866e+09 | \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 000558.SZ | \n",
+ " 2019-03-31 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " 2.361816e+07 | \n",
+ " \n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " \n",
+ " \n",
+ " 321 | \n",
+ " 688396.SH | \n",
+ " 2019-12-31 | \n",
+ " 2019 | \n",
+ " 4 | \n",
+ " 1.610869e+09 | \n",
+ " \n",
+ " \n",
+ " 79 | \n",
+ " 833874.NE | \n",
+ " 2019-03-31 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " 3.720237e+07 | \n",
+ " \n",
+ " \n",
+ " 325 | \n",
+ " 833874.NE | \n",
+ " 2019-06-30 | \n",
+ " 2019 | \n",
+ " 2 | \n",
+ " 4.538674e+07 | \n",
+ " \n",
+ " \n",
+ " 324 | \n",
+ " 833874.NE | \n",
+ " 2019-09-30 | \n",
+ " 2019 | \n",
+ " 3 | \n",
+ " 4.776034e+07 | \n",
+ " \n",
+ " \n",
+ " 323 | \n",
+ " 833874.NE | \n",
+ " 2019-12-31 | \n",
+ " 2019 | \n",
+ " 4 | \n",
+ " 4.816188e+07 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " 326 rows × 5 columns \n",
+ " "
+ ],
+ "text/plain": [
+ " code report_period year quarter_of_year TOT_OPER_REV_flux\n",
+ "0 000400.SZ 2019-03-31 2019 1 1.032565e+09\n",
+ "82 000400.SZ 2019-06-30 2019 2 2.021923e+09\n",
+ "81 000400.SZ 2019-09-30 2019 3 2.183729e+09\n",
+ "80 000400.SZ 2019-12-31 2019 4 4.917866e+09\n",
+ "1 000558.SZ 2019-03-31 2019 1 2.361816e+07\n",
+ ".. ... ... ... ... ...\n",
+ "321 688396.SH 2019-12-31 2019 4 1.610869e+09\n",
+ "79 833874.NE 2019-03-31 2019 1 3.720237e+07\n",
+ "325 833874.NE 2019-06-30 2019 2 4.538674e+07\n",
+ "324 833874.NE 2019-09-30 2019 3 4.776034e+07\n",
+ "323 833874.NE 2019-12-31 2019 4 4.816188e+07\n",
+ "\n",
+ "[326 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 182,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sess.run(\"single_quarter(tmp_table, ['TOT_OPER_REV'])\").sort_values(['code', 'report_period'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 144,
+ "id": "1ceea0a7-7218-4aa7-be8a-ebb655dcbc93",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sess.run(\"\"\"\n",
+ " tmp_table = select * from pit_col_at_date(\n",
+ " \"is_common_ori\", \n",
+ " \"TOT_OPER_REV\", \n",
+ " 2021.03.14, \n",
+ " [2019.12.31, 2019.09.30, 2019.06.30, 2019.03.31], \n",
+ " 0) order by code\n",
+ "\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 145,
+ "id": "d01d6fc2-15ed-4e40-9181-0f3e3a96d2cb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " code | \n",
+ " report_period | \n",
+ " appear_at_date | \n",
+ " TOT_OPER_REV | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 000400.SZ | \n",
+ " 2019-12-31 | \n",
+ " 2020-04-10 | \n",
+ " 1.015608e+10 | \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 000400.SZ | \n",
+ " 2019-09-30 | \n",
+ " 2019-10-12 | \n",
+ " 5.238217e+09 | \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " 000400.SZ | \n",
+ " 2019-06-30 | \n",
+ " 2019-08-24 | \n",
+ " 3.054488e+09 | \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 000400.SZ | \n",
+ " 2019-03-31 | \n",
+ " 2019-04-26 | \n",
+ " 1.032565e+09 | \n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " 000558.SZ | \n",
+ " 2019-12-31 | \n",
+ " 2020-08-25 | \n",
+ " 1.378479e+08 | \n",
+ " \n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " \n",
+ " \n",
+ " 333 | \n",
+ " 831010.NE | \n",
+ " 2019-06-30 | \n",
+ " 2019-08-09 | \n",
+ " 1.970813e+08 | \n",
+ " \n",
+ " \n",
+ " 334 | \n",
+ " 833874.NE | \n",
+ " 2019-12-31 | \n",
+ " 2020-06-29 | \n",
+ " 1.785113e+08 | \n",
+ " \n",
+ " \n",
+ " 335 | \n",
+ " 833874.NE | \n",
+ " 2019-09-30 | \n",
+ " 2019-10-22 | \n",
+ " 1.303494e+08 | \n",
+ " \n",
+ " \n",
+ " 336 | \n",
+ " 833874.NE | \n",
+ " 2019-06-30 | \n",
+ " 2019-08-16 | \n",
+ " 8.258911e+07 | \n",
+ " \n",
+ " \n",
+ " 337 | \n",
+ " 833874.NE | \n",
+ " 2019-03-31 | \n",
+ " 2019-04-30 | \n",
+ " 3.720237e+07 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " 338 rows × 4 columns \n",
+ " "
+ ],
+ "text/plain": [
+ " code report_period appear_at_date TOT_OPER_REV\n",
+ "0 000400.SZ 2019-12-31 2020-04-10 1.015608e+10\n",
+ "1 000400.SZ 2019-09-30 2019-10-12 5.238217e+09\n",
+ "2 000400.SZ 2019-06-30 2019-08-24 3.054488e+09\n",
+ "3 000400.SZ 2019-03-31 2019-04-26 1.032565e+09\n",
+ "4 000558.SZ 2019-12-31 2020-08-25 1.378479e+08\n",
+ ".. ... ... ... ...\n",
+ "333 831010.NE 2019-06-30 2019-08-09 1.970813e+08\n",
+ "334 833874.NE 2019-12-31 2020-06-29 1.785113e+08\n",
+ "335 833874.NE 2019-09-30 2019-10-22 1.303494e+08\n",
+ "336 833874.NE 2019-06-30 2019-08-16 8.258911e+07\n",
+ "337 833874.NE 2019-03-31 2019-04-30 3.720237e+07\n",
+ "\n",
+ "[338 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 145,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sess.run(\"tmp_table\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "5ac22809-9ba9-4a01-b76a-591558c4cc52",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'< select symbol,date,cumsum(volume) as cumVol from tmp_tablef0e858a3837f0000 >'"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sess.run(\"\"\"\n",
+ "sql(select=sqlCol(['symbol', 'date', 'cumsum(volume) as cumVol']), from = tmp_table)\n",
+ "\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "dc36ca56-07e0-4912-b5d2-da192a1c3c62",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'< select symbol,date,cumsum(volume) as cumVol from tmp_tablef0e858a3837f0000 >'"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sess.run(\"\"\"\n",
+ "sql(select=(sqlCol(`symbol),sqlCol(`date),sqlColAlias(, `cumVol)), from=tmp_table)\n",
+ "\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 211,
+ "id": "c3be05b5-c9b9-4df5-8481-b3092c25125e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sess.run(\"\"\"\n",
+ " def yoy(table_name, col_list) {\n",
+ " sel_col = array(ANY, 4 + size(col_list));\n",
+ " sel_col[0] = sqlCol('code');\n",
+ " sel_col[3] = sqlColAlias(, 'quarter_of_year');\n",
+ " \n",
+ " // 上一年数据\n",
+ " for (i in 0..(size(col_list) - 1)) {\n",
+ " sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_base');\n",
+ " }\n",
+ " sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_last_year');\n",
+ " sel_col[2] = sqlColAlias(, 'year');\n",
+ " tbl_last_year = sql(\n",
+ " select = sel_col,\n",
+ " from = table_name\n",
+ " ).eval();\n",
+ "\n",
+ " // 本年度数据\n",
+ " for (i in 0..(size(col_list) - 1)) {\n",
+ " sel_col[i + 4] = sqlCol(col_list[i]);\n",
+ " }\n",
+ " sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_current_year');\n",
+ " sel_col[2] = sqlColAlias(, 'year');\n",
+ " tbl_this_year = sql(\n",
+ " select = sel_col,\n",
+ " from = table_name\n",
+ " ).eval();\n",
+ " \n",
+ " // 计算同比增长率\n",
+ " sel_col = array(ANY, 4 + (3 * size(col_list)));\n",
+ "\n",
+ " sel_col[0] = sqlCol('code');\n",
+ " sel_col[1] = sqlColAlias(sqlCol('report_period_current_year'), 'report_period');\n",
+ " sel_col[2] = sqlCol('year');\n",
+ " sel_col[3] = sqlCol('quarter_of_year');\n",
+ " \n",
+ " for (i in 0..(size(col_list) - 1)) {\n",
+ " sel_col[3 * i + 4] = sqlCol(col_list[i] + '_base');\n",
+ " sel_col[3 * i + 5] = sqlCol(col_list[i]);\n",
+ " sel_col[3 * i + 6] = sqlColAlias(\n",
+ " expr(expr(sqlCol(col_list[i]), -, sqlCol(col_list[i] + '_base')), /, expr(abs, sqlCol(col_list[i] + '_base'))), \n",
+ " col_list[i] + '_yoy');\n",
+ " }\n",
+ " from_obj = ej(\n",
+ " tbl_last_year, \n",
+ " tbl_this_year, \n",
+ " `code`year`quarter_of_year, \n",
+ " `code`year`quarter_of_year);\n",
+ " \n",
+ " return sql(\n",
+ " select = sel_col,\n",
+ " from = from_obj).eval();\n",
+ " }\n",
+ "\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 212,
+ "id": "63492d0c-c348-4e8b-a08b-5feb279cee5e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " code | \n",
+ " report_period | \n",
+ " year | \n",
+ " quarter_of_year | \n",
+ " TOT_OPER_REV_base | \n",
+ " TOT_OPER_REV | \n",
+ " TOT_OPER_REV_yoy | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 000400.SZ | \n",
+ " 2019-12-31 | \n",
+ " 2019 | \n",
+ " 4 | \n",
+ " 8.216559e+09 | \n",
+ " 1.015608e+10 | \n",
+ " 0.236051 | \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 000400.SZ | \n",
+ " 2018-12-31 | \n",
+ " 2018 | \n",
+ " 4 | \n",
+ " 1.033072e+10 | \n",
+ " 8.216559e+09 | \n",
+ " -0.204648 | \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " 000558.SZ | \n",
+ " 2019-12-31 | \n",
+ " 2019 | \n",
+ " 4 | \n",
+ " 7.024741e+08 | \n",
+ " 1.378479e+08 | \n",
+ " -0.803768 | \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 000558.SZ | \n",
+ " 2018-12-31 | \n",
+ " 2018 | \n",
+ " 4 | \n",
+ " 1.324249e+09 | \n",
+ " 7.024741e+08 | \n",
+ " -0.469530 | \n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " 000677.SZ | \n",
+ " 2019-12-31 | \n",
+ " 2019 | \n",
+ " 4 | \n",
+ " 7.068198e+08 | \n",
+ " 7.906742e+08 | \n",
+ " 0.118636 | \n",
+ " \n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " \n",
+ " \n",
+ " 177 | \n",
+ " 688396.SH | \n",
+ " 2018-12-31 | \n",
+ " 2018 | \n",
+ " 4 | \n",
+ " 5.875590e+09 | \n",
+ " 6.270797e+09 | \n",
+ " 0.067262 | \n",
+ " \n",
+ " \n",
+ " 178 | \n",
+ " 831010.NE | \n",
+ " 2019-12-31 | \n",
+ " 2019 | \n",
+ " 4 | \n",
+ " 2.936331e+08 | \n",
+ " 3.552313e+08 | \n",
+ " 0.209780 | \n",
+ " \n",
+ " \n",
+ " 179 | \n",
+ " 831010.NE | \n",
+ " 2018-12-31 | \n",
+ " 2018 | \n",
+ " 4 | \n",
+ " 2.209252e+08 | \n",
+ " 2.936331e+08 | \n",
+ " 0.329106 | \n",
+ " \n",
+ " \n",
+ " 180 | \n",
+ " 833874.NE | \n",
+ " 2019-12-31 | \n",
+ " 2019 | \n",
+ " 4 | \n",
+ " 1.974978e+08 | \n",
+ " 1.785113e+08 | \n",
+ " -0.096135 | \n",
+ " \n",
+ " \n",
+ " 181 | \n",
+ " 833874.NE | \n",
+ " 2018-12-31 | \n",
+ " 2018 | \n",
+ " 4 | \n",
+ " 1.986075e+08 | \n",
+ " 1.974978e+08 | \n",
+ " -0.005588 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " 182 rows × 7 columns \n",
+ " "
+ ],
+ "text/plain": [
+ " code report_period year quarter_of_year TOT_OPER_REV_base \\\n",
+ "0 000400.SZ 2019-12-31 2019 4 8.216559e+09 \n",
+ "1 000400.SZ 2018-12-31 2018 4 1.033072e+10 \n",
+ "2 000558.SZ 2019-12-31 2019 4 7.024741e+08 \n",
+ "3 000558.SZ 2018-12-31 2018 4 1.324249e+09 \n",
+ "4 000677.SZ 2019-12-31 2019 4 7.068198e+08 \n",
+ ".. ... ... ... ... ... \n",
+ "177 688396.SH 2018-12-31 2018 4 5.875590e+09 \n",
+ "178 831010.NE 2019-12-31 2019 4 2.936331e+08 \n",
+ "179 831010.NE 2018-12-31 2018 4 2.209252e+08 \n",
+ "180 833874.NE 2019-12-31 2019 4 1.974978e+08 \n",
+ "181 833874.NE 2018-12-31 2018 4 1.986075e+08 \n",
+ "\n",
+ " TOT_OPER_REV TOT_OPER_REV_yoy \n",
+ "0 1.015608e+10 0.236051 \n",
+ "1 8.216559e+09 -0.204648 \n",
+ "2 1.378479e+08 -0.803768 \n",
+ "3 7.024741e+08 -0.469530 \n",
+ "4 7.906742e+08 0.118636 \n",
+ ".. ... ... \n",
+ "177 6.270797e+09 0.067262 \n",
+ "178 3.552313e+08 0.209780 \n",
+ "179 2.936331e+08 0.329106 \n",
+ "180 1.785113e+08 -0.096135 \n",
+ "181 1.974978e+08 -0.005588 \n",
+ "\n",
+ "[182 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 212,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sess.run(\"yoy(tmp_table, ['TOT_OPER_REV'])\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 183,
+ "id": "5273ebfc-25bd-4b38-9605-d3109cf2280f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sess.run(\"\"\"\n",
+ " tmp_table = select * from pit_col_at_date(\n",
+ " \"is_common_ori\", \n",
+ " \"TOT_OPER_REV\", \n",
+ " 2021.03.14, \n",
+ " [2019.12.31, 2018.12.31, 2017.12.31], \n",
+ " 0) order by code\n",
+ "\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 184,
+ "id": "99af13cd-5918-4d21-9067-d3da86d19c15",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " code | \n",
+ " report_period | \n",
+ " appear_at_date | \n",
+ " TOT_OPER_REV | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 000400.SZ | \n",
+ " 2019-12-31 | \n",
+ " 2020-04-10 | \n",
+ " 1.015608e+10 | \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 000400.SZ | \n",
+ " 2018-12-31 | \n",
+ " 2019-03-29 | \n",
+ " 8.216559e+09 | \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " 000400.SZ | \n",
+ " 2017-12-31 | \n",
+ " 2018-03-24 | \n",
+ " 1.033072e+10 | \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 000558.SZ | \n",
+ " 2019-12-31 | \n",
+ " 2020-08-25 | \n",
+ " 1.378479e+08 | \n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " 000558.SZ | \n",
+ " 2018-12-31 | \n",
+ " 2019-04-26 | \n",
+ " 7.024741e+08 | \n",
+ " \n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " \n",
+ " \n",
+ " 268 | \n",
+ " 831010.NE | \n",
+ " 2018-12-31 | \n",
+ " 2020-07-01 | \n",
+ " 2.936331e+08 | \n",
+ " \n",
+ " \n",
+ " 269 | \n",
+ " 831010.NE | \n",
+ " 2017-12-31 | \n",
+ " 2020-07-01 | \n",
+ " 2.209252e+08 | \n",
+ " \n",
+ " \n",
+ " 270 | \n",
+ " 833874.NE | \n",
+ " 2019-12-31 | \n",
+ " 2020-06-29 | \n",
+ " 1.785113e+08 | \n",
+ " \n",
+ " \n",
+ " 271 | \n",
+ " 833874.NE | \n",
+ " 2018-12-31 | \n",
+ " 2020-06-29 | \n",
+ " 1.974978e+08 | \n",
+ " \n",
+ " \n",
+ " 272 | \n",
+ " 833874.NE | \n",
+ " 2017-12-31 | \n",
+ " 2020-06-29 | \n",
+ " 1.986075e+08 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " 273 rows × 4 columns \n",
+ " "
+ ],
+ "text/plain": [
+ " code report_period appear_at_date TOT_OPER_REV\n",
+ "0 000400.SZ 2019-12-31 2020-04-10 1.015608e+10\n",
+ "1 000400.SZ 2018-12-31 2019-03-29 8.216559e+09\n",
+ "2 000400.SZ 2017-12-31 2018-03-24 1.033072e+10\n",
+ "3 000558.SZ 2019-12-31 2020-08-25 1.378479e+08\n",
+ "4 000558.SZ 2018-12-31 2019-04-26 7.024741e+08\n",
+ ".. ... ... ... ...\n",
+ "268 831010.NE 2018-12-31 2020-07-01 2.936331e+08\n",
+ "269 831010.NE 2017-12-31 2020-07-01 2.209252e+08\n",
+ "270 833874.NE 2019-12-31 2020-06-29 1.785113e+08\n",
+ "271 833874.NE 2018-12-31 2020-06-29 1.974978e+08\n",
+ "272 833874.NE 2017-12-31 2020-06-29 1.986075e+08\n",
+ "\n",
+ "[273 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 184,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sess.run(\"tmp_table\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "87820a2a-1ddf-4950-b0d4-a3e7d4a61b16",
+ "metadata": {},
+ "outputs": [],
"source": []
}
],
diff --git a/src/DDBBase.py b/src/DDBBase.py
new file mode 100644
index 0000000..04f35df
--- /dev/null
+++ b/src/DDBBase.py
@@ -0,0 +1,18 @@
+
+import dolphindb as ddb
+
+
+class DDBBase(object):
+
+ ddb_config = {
+ 'host' : '192.168.1.167',
+ 'username' : 'admin',
+ 'password' : '123456',
+ }
+
+
+ def __init__(self):
+ self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
+ self.login(self.ddb_config['username'], self.ddb_config['password'])
+
+
diff --git a/src/DDBExpression.py b/src/DDBExpression.py
new file mode 100644
index 0000000..5c728e9
--- /dev/null
+++ b/src/DDBExpression.py
@@ -0,0 +1,365 @@
+from tqdm import tqdm
+
+import numpy as np
+import pandas as pd
+import dolphindb as ddb
+
+from DDBBase import DDBBase
+from DDBLoader import DDBPITLoader
+
+
+class DDBExpression(DDBBase):
+ """
+ 调用事先定义好的DolphinDB的Function或者FunctionView来进行表达式计算。
+
+ 定义DolphinDB中的Function或者FunctionView的具体逻辑在`DDBExpreCreator`中。
+ """
+
+ def pit_at_date(self, table_name, date, report_period_list):
+ """
+ 获得任何时间点上,能看到的某个财报最新可见数据。
+
+ 实现方式:
+ 通过调用事先在DolpinDB中创建的Function View。
+
+ Args:
+ table_name: 财务报表名字,包含是否为调整报表的后缀名(非stem名)
+ date: 观察日期,使用DolphinDB的日期格式,即YYYY.MM.DD,无需加引号
+ report_period_list: 报告期列表,列表中每一个日期为一个DolphinDb的日期格式
+
+ Return:
+ DataFrame: 在观察日期`date`日能看到的,所有公司最近能看到的,报告期在`report_period_list`中的,财务报告表`table_name`的截面数据。
+ """
+ df_list = []
+ for i in range(DDBPITLoader.num_code_partition):
+ _df = self.ddb_sess.run("""
+ select * from pit_at_date("{table_name}", {date}, [{report_period_list}], {code_partition_id});
+ """.format(
+ table_name = table_name,
+ date = date,
+ report_period_list = ','.join(report_period_list),
+ code_parition_id = i
+ ))
+ _df.set_index(['code', 'report_period', 'appear_at_date'], inplace=True)
+ df_list.append(_df)
+ return pd.concat(df_list, axis=1)
+
+
+ def pit_col_at_date(self, table_name, col_name, date, report_period_list):
+ """
+ 获得任何时间点上,能看到的某个财报最新可见数据。
+
+ 实现方式:
+ 通过调用事先在DolpinDB中创建的Function View。
+
+ Args:
+ table_name: 财务报表名字,包含是否为调整报表的后缀名(非stem名)
+ col_name: 列名,一般为具体财务科目名字。
+ date: 观察日期,使用DolphinDB的日期格式,即YYYY.MM.DD,无需加引号
+ report_period_list: 报告期列表,列表中每一个日期为一个DolphinDb的日期格式
+
+ Return:
+ DataFrame: 在观察日期`date`日能看到的,所有公司最近能看到的,报告期在`report_period_list`中的,财务报告表`table_name`中``col_name`指标的截面数据。
+
+ """
+ df_list = []
+ for i in range(DDBPITLoader.num_code_partition):
+ _df = self.ddb_sess.run("""
+ select * from pit_col_at_date("{table_name}", "{col_name}", {date}, [{report_period_list}], {code_partition_id});
+ """.format(
+ table_name = table_name,
+ col_name = col_name,
+ date = date,
+ report_period_list = ','.join(report_period_list),
+ code_parition_id = i
+ ))
+ _df.set_index(['code', 'report_period', 'appear_at_date'], inplace=True)
+ df_list.append(_df)
+ return pd.concat(df_list, axis=1)
+
+
+ def yoy(self, dataframe, col_list):
+ self.ddb_sess.upload({'table_yoy' : dataframe })
+ self.ddb_sess.run("""
+ yoy({table_name}, [{col_list}]);
+ """.format(
+ table_name = 'table_yoy',
+ col_list = ','.join([f"'{col_name}'" for col_name in col_list])
+ ))
+
+
+ def single_quarter(self, dataframe, col_list):
+ self.ddb_sess.upload({'table_single_quarter' : dataframe})
+ self.ddb_sess.run("""
+ single_quarter({table_name}, [{col_list}])
+ """.format(
+ table_name = table_name,
+ col_list = ','.join([f"'{col_name}'" for col_name in col_list])
+ ))
+
+
+
+class DDBExpreCreator(DDBBase):
+ """
+ 定义DolphinDB中的各种基础操作,因此这个类中的大部分实质性代码都是DolphinDB自己的语言,而不是Python语言编写的。
+
+ `Expre`为`Expression`的简写。
+ """
+ pit_dbpath = "dfs://pit_stock_ts"
+ pit_dbname = "db_pit_stock"
+
+ daily_dbpath = "dfs://daily_stock_ts"
+ daily_dbname = "db_daily_stock"
+
+
+ def create_function_views(self):
+ self._func_view_pit_at_date()
+ self._func_view_pit_col_at_date()
+
+
+ def create_functions(self):
+ self._fun_yoy();
+ self._func_single_quarter()
+
+
+ def _func_single_quarter(self):
+ code = """
+ /**
+ * Args:
+ * table: DolphinDB的表,可以是一个内存表,也可以是一个分区表。
+ * col_list: 需要进行单季转换的字段名列表。
+ */
+ def single_quarter(table_name, col_list) {
+
+ sel_col = array(ANY, 4 + size(col_list));
+ sel_col[0] = sqlCol('code');
+ sel_col[2] = sqlColAlias(, 'year');
+
+ for (i in 0..(size(col_list) - 1)) {
+ sel_col[i + 4] = sqlCol(col_list[i]);
+ }
+
+ // 当季累计数据,作为下一季基准
+ sel_col[1] = sqlColAlias(sqlCol('report_period'), 'base_report_period');
+ sel_col[3] = sqlColAlias(, 'quarter_of_year');
+ tbl_quarter_accum_base = sql(
+ select = sel_col,
+ from = table_name,
+ where =
+ ).eval();
+
+ // 从第二季开始,需要去匹配前一季累计基数
+ sel_col[1] = sqlColAlias(sqlCol('report_period'), 'current_report_period');
+ sel_col[3] = sqlColAlias(, 'quarter_of_year');
+ tbl_quarter_accum = sql(
+ select = sel_col,
+ from = table_name,
+ where = 1>
+ ).eval();
+
+ // 单季流量,把非第一季的季报都要扣除前一季度的基数
+ sel_col[1] = sqlColAlias(sqlCol('current_report_period'), 'report_period');
+ sel_col[2] = sqlCol('year');
+ sel_col[3] = sqlCol('quarter_of_year');
+ for (i in 0..(size(col_list) - 1)) {
+ sel_col[i + 4] = sqlColAlias(
+ expr(
+ sqlCol('tbl_quarter_accum_' + col_list[i]),
+ -,
+ sqlCol(col_list[i])
+ )
+ , col_list[i] + '_flux');
+ }
+ from_obj = ej(
+ tbl_quarter_accum_base,
+ tbl_quarter_accum,
+ `code`year`quarter_of_year,
+ `code`year`quarter_of_year);
+ tbl_quarter_flux = sql(
+ select = sel_col,
+ from = from_obj
+ ).eval();
+
+ // 每年第一个季度
+ sel_col[1] = sqlCol('report_period');
+ sel_col[2] = sqlColAlias(, 'year');
+ sel_col[3] = sqlColAlias(, 'quarter_of_year');
+ for (i in 0..(size(col_list) - 1)) {
+ sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_flux');
+ }
+ tbl_quarter1 = sql(
+ select = sel_col,
+ from = table_name,
+ where =
+ ).eval();
+
+ // 再拼接回第一季度(无需扣除基数的数据)
+ tbl_quarter_flux = unionAll(
+ tbl_quarter1,
+ tbl_quarter_flux
+ );
+
+ return tbl_quarter_flux;
+ }
+ """
+ self.ddb_sess.run(code)
+
+
+ def _func_yoy(self):
+ code = """
+ /**
+ * Args:
+ * table: DolphinDB的表,可以是一个内存表,也可以是一个分区表。
+ * col_list: 需要进行同比转换的字段名。
+ */
+ def yoy(table_name, col_list) {
+ sel_col = array(ANY, 4 + size(col_list));
+ sel_col[0] = sqlCol('code');
+ sel_col[3] = sqlColAlias(, 'quarter_of_year');
+
+ // 上一年数据
+ for (i in 0..(size(col_list) - 1)) {
+ sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_base');
+ }
+ sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_last_year');
+ sel_col[2] = sqlColAlias(, 'year');
+ tbl_last_year = sql(
+ select = sel_col,
+ from = table_name
+ ).eval();
+
+ // 本年度数据
+ for (i in 0..(size(col_list) - 1)) {
+ sel_col[i + 4] = sqlCol(col_list[i]);
+ }
+ sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_current_year');
+ sel_col[2] = sqlColAlias(, 'year');
+ tbl_this_year = sql(
+ select = sel_col,
+ from = table_name
+ ).eval();
+
+ // 计算同比增长率
+ sel_col = array(ANY, 4 + (3 * size(col_list)));
+
+ sel_col[0] = sqlCol('code');
+ sel_col[1] = sqlColAlias(sqlCol('report_period_current_year'), 'report_period');
+ sel_col[2] = sqlCol('year');
+ sel_col[3] = sqlCol('quarter_of_year');
+
+ for (i in 0..(size(col_list) - 1)) {
+ sel_col[3 * i + 4] = sqlCol(col_list[i] + '_base');
+ sel_col[3 * i + 5] = sqlCol(col_list[i]);
+ sel_col[3 * i + 6] = sqlColAlias(
+ expr(expr(sqlCol(col_list[i]), -, sqlCol(col_list[i] + '_base')), /, expr(abs, sqlCol(col_list[i] + '_base'))),
+ col_list[i] + '_yoy');
+ }
+ from_obj = ej(
+ tbl_last_year,
+ tbl_this_year,
+ `code`year`quarter_of_year,
+ `code`year`quarter_of_year);
+
+ return sql(
+ select = sel_col,
+ from = from_obj).eval();
+ }
+ """
+ self.ddb_sess.run(code)
+
+
+ def _func_view_pit_at_date(self):
+ """
+ 创建`pit_at_date`,获得任何时间点上,能看到的某个财报最新可见数据。
+ """
+ self.ddb_sess.run("""
+ dropFunctionView('pit_at_date');
+ """)
+
+ self.ddb_sess.run("""
+ /**
+ * Example of use:
+ * pit_at_date("is_common_ori", "TOT_OPER_REV", 2021.03.14, [2019.12.31, 2019.09.30, 2019.06.30], 0)
+ */
+ def pit_at_date(table_name, date, report_period_list, code_partition_id) {
+ source_table = loadTable("{pit_dbpath}", table_name);
+
+ m_nDate = take(date, size(report_period_list));
+ report_period = report_period_list;
+
+ query_table = table(report_period, m_nDate);
+ query_table_exp = select * from cj(
+ query_table,
+ select code
+ from source_table
+ where partition(code, code_partition_id)
+ group by code map
+ );
+
+ source_table_part = select source_table.* from ej(
+ source_table, query_table_exp, `code`report_period
+ ) where partition(code, code_partition_id);
+
+ return select source_table_part.* from aj(
+ query_table_exp,
+ source_table_part,
+ `code`report_period`m_nDate,
+ `code`report_period`appear_at_date
+ ) where not isNull(source_table_part.code);
+ }
+
+ addFunctionView(pit_at_date);
+ """.format(
+ pit_dbpath = pit_dbpath
+ ))
+
+
+ def _func_view_pit_col_at_date(self):
+ """
+ 创建`pit_col_at_date`,获得任何日期点上,能看到的财报中某一个指标的最新可见数据。
+ 与`pit_at_date`的区别在于,`pit_col_at_date`返回返回一个序列,而`pit_at_date`返回一整张表。
+ """
+ # 先把原有的View Function删除
+ self.ddb_sess.run("""
+ dropFunctionView('pit_col_at_date');
+ """)
+
+ self.ddb_sess.run("""
+ /**
+ * Example of use:
+ * pit_col_at_date("is_common_ori", "TOT_OPER_REV", 2021.03.14, [2019.12.31, 2019.09.30, 2019.06.30], 0)
+ */
+ def pit_col_at_date(table_name, col_name, date, report_period_list, code_partition_id) {
+ source_table = loadTable("{pit_dbpath}", table_name);
+
+ m_nDate = take(date, size(report_period_list));
+ report_period = report_period_list;
+
+ query_table = table(report_period, m_nDate);
+ query_table_exp = select * from cj(
+ query_table,
+ select code
+ from source_table
+ where partition(code, code_partition_id)
+ group by code map
+ );
+
+ col_list = sqlCol(['code', 'report_period', 'appear_at_date', col_name]);
+ from_tbl = ;
+ where_conditions = [];
+ source_table_part = sql(select=col_list, from=from_tbl, where=where_conditions).eval();
+
+ return select source_table_part.* from aj(
+ query_table_exp,
+ source_table_part,
+ `code`report_period`m_nDate,
+ `code`report_period`appear_at_date
+ ) where not isNull(source_table_part.code)
+ }
+
+ addFunctionView(pit_col_at_date);
+ """.format(
+ pit_dbpath = self.pit_dbpath
+ ))
+
+
diff --git a/src/DDBFactor.py b/src/DDBFactor.py
index c0470cb..0a7b456 100644
--- a/src/DDBFactor.py
+++ b/src/DDBFactor.py
@@ -5,10 +5,10 @@ import functools
import dolphindb as ddb
+from DDBBase import DDBBase
from DDBLoader import DDBLoader
-
def load_ddb_table(hft_tbl_name):
"""
这是一个用来简化载入分区表过程的语法糖,但似乎需要预先调用这个函数的场景并不多,简化效果不是很明显。
@@ -30,27 +30,17 @@ def load_ddb_table(hft_tbl_name):
return decorator
-class DailyFactor(object):
+class DDBDailyFactor(DDBBase):
#ddb_hft_path = "dfs://hft_stock_ts"
#ddb_hft_dbname = "db_hft_stock"
ddb_daily_path = "dfs://daily_stock_ts"
ddb_daily_dbname = "db_daily_stock"
- ddb_config = {
- 'host' : '192.168.1.167',
- 'username' : 'admin',
- 'password' : '123456',
- }
-
# 这里的partition数量未必需要和hft表的一致
# 当读取hft表的时候,需要使用DDBLoader中的`num_code_partition`,而不是此字段
num_code_partition = 50
- def __init__(self):
- self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
- self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password'])
-
def create_ddb_database(self):
"""
diff --git a/src/DDBLoader.py b/src/DDBLoader.py
index e26c465..84d6c48 100644
--- a/src/DDBLoader.py
+++ b/src/DDBLoader.py
@@ -23,9 +23,10 @@ import sqlalchemy as sa
import ProtoBuffEntitys
+from DDBBase import DDBBase
-class DDBLoader(object):
+class DDBLoader(DDBBase):
"""
- 放了几个公用的配置字段,包括:
1. SQL-Server的链接参数
@@ -42,14 +43,8 @@ class DDBLoader(object):
'password' : 'passw0rd!'
}
- ddb_config = {
- 'host' : '192.168.1.167',
- 'username' : 'admin',
- 'password' : '123456'
- }
-
-
def __init__(self):
+ super().__init__()
self.mssql_engine = sa.create_engine(
"mssql+pyodbc://{username}:{password}@{host}/master?driver=ODBC+Driver+18+for+SQL+Server".format(**self.mssql_config),
connect_args = {
@@ -57,9 +52,6 @@ class DDBLoader(object):
}, echo=False
)
- self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
- self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password'])
-
@abc.abstractmethod
def create_ddb_database(self, *args, **kwargs):
@@ -552,7 +544,7 @@ class DDBHFTLoader(DDBLoader):
num_workers = 8
default_table_capacity = 10000
- ddb_dump_journal_fname = 'ddb_dump_journal.csv'
+ ddb_dump_journal_fname = '../assets/ddb_dump_journal.csv'
def init_ddb_database(self, df_calendar):
@@ -913,7 +905,9 @@ class DDBHFTLoader(DDBLoader):
ddb_sess.login(DDBLoader.ddb_config['username'], DDBLoader.ddb_config['password'])
ddb_sess.upload({df_table_name : df})
- ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
+ # 因为在做Tick数据的时候,偶然发生'CHUNK[xxx] does not exist.',所以在这里使用`append!`函数代换一下试试
+ ddb_sess.run("append!(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
+ #ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
dbPath = DDBLoader.ddb_path,
partitioned_table_name = type_name + DDBLoader.ddb_partition_table_suffix,
df_table_name = df_table_name
@@ -923,6 +917,9 @@ class DDBHFTLoader(DDBLoader):
def main():
+ # TODO:
+ # 可以使用`Fire`库,对函数调用再做一次封装,就可以避免每次运行不同参数时候需要修改内部多处的代码。
+
# PIT基本面数据
loader = DDBPITLoader()
loader.create_ddb_database()
diff --git a/ProtoBuffEntitys/HFDataTableMessage_pb2.py b/src/ProtoBuffEntitys/HFDataTableMessage_pb2.py
similarity index 100%
rename from ProtoBuffEntitys/HFDataTableMessage_pb2.py
rename to src/ProtoBuffEntitys/HFDataTableMessage_pb2.py
diff --git a/ProtoBuffEntitys/IndexFutureKLineMessage_pb2.py b/src/ProtoBuffEntitys/IndexFutureKLineMessage_pb2.py
similarity index 100%
rename from ProtoBuffEntitys/IndexFutureKLineMessage_pb2.py
rename to src/ProtoBuffEntitys/IndexFutureKLineMessage_pb2.py
diff --git a/ProtoBuffEntitys/IndexFutureL1TickMessage_pb2.py b/src/ProtoBuffEntitys/IndexFutureL1TickMessage_pb2.py
similarity index 100%
rename from ProtoBuffEntitys/IndexFutureL1TickMessage_pb2.py
rename to src/ProtoBuffEntitys/IndexFutureL1TickMessage_pb2.py
diff --git a/ProtoBuffEntitys/IndexKLineMessage_pb2.py b/src/ProtoBuffEntitys/IndexKLineMessage_pb2.py
similarity index 100%
rename from ProtoBuffEntitys/IndexKLineMessage_pb2.py
rename to src/ProtoBuffEntitys/IndexKLineMessage_pb2.py
diff --git a/ProtoBuffEntitys/IndexTickMessage_pb2.py b/src/ProtoBuffEntitys/IndexTickMessage_pb2.py
similarity index 100%
rename from ProtoBuffEntitys/IndexTickMessage_pb2.py
rename to src/ProtoBuffEntitys/IndexTickMessage_pb2.py
diff --git a/ProtoBuffEntitys/KLineMessage_pb2.py b/src/ProtoBuffEntitys/KLineMessage_pb2.py
similarity index 100%
rename from ProtoBuffEntitys/KLineMessage_pb2.py
rename to src/ProtoBuffEntitys/KLineMessage_pb2.py
diff --git a/ProtoBuffEntitys/OrderMessage_pb2.py b/src/ProtoBuffEntitys/OrderMessage_pb2.py
similarity index 100%
rename from ProtoBuffEntitys/OrderMessage_pb2.py
rename to src/ProtoBuffEntitys/OrderMessage_pb2.py
diff --git a/ProtoBuffEntitys/TickMessage_pb2.py b/src/ProtoBuffEntitys/TickMessage_pb2.py
similarity index 100%
rename from ProtoBuffEntitys/TickMessage_pb2.py
rename to src/ProtoBuffEntitys/TickMessage_pb2.py
diff --git a/ProtoBuffEntitys/TickQueueMessage_pb2.py b/src/ProtoBuffEntitys/TickQueueMessage_pb2.py
similarity index 100%
rename from ProtoBuffEntitys/TickQueueMessage_pb2.py
rename to src/ProtoBuffEntitys/TickQueueMessage_pb2.py
diff --git a/ProtoBuffEntitys/TranseMessage_pb2.py b/src/ProtoBuffEntitys/TranseMessage_pb2.py
similarity index 100%
rename from ProtoBuffEntitys/TranseMessage_pb2.py
rename to src/ProtoBuffEntitys/TranseMessage_pb2.py
|