1. Reorganized the directory structure.

2. Add `DDBBase` class to decrease repeated class-level constant definitino.
3. Add `DDBExpression` class for single quarter calculation and yoy claculation.
main
Guofu Li 2 years ago
parent 85522c064c
commit bd9363f6fc

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "7246e0c8-61cd-4cbf-a978-aa0dc0172d6d", "id": "7246e0c8-61cd-4cbf-a978-aa0dc0172d6d",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -12,15 +12,27 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 66, "execution_count": 3,
"id": "5d0f471e-682e-43cc-abdb-7e52f3bbd707", "id": "5d0f471e-682e-43cc-abdb-7e52f3bbd707",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"sess = ddb.session('192.168.1.7', 8848)\n", "sess = ddb.session('localhost', 8848)\n",
"sess.login('admin', '123456')" "sess.login('admin', '123456')"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "fca58bdc-2aa1-4610-9a94-67d55f97a6e1",
"metadata": {},
"outputs": [],
"source": [
"sess.run(\"\"\"\n",
" migrate('/data/dolphindb/backup/', \"dfs://hft_stock_ts\", \"OrderPartitioned\")\n",
"\"\"\")"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 67, "execution_count": 67,
@ -1101,7 +1113,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 27,
"id": "d68ea326-82c3-4a7c-97cf-c04dd8aee56b", "id": "d68ea326-82c3-4a7c-97cf-c04dd8aee56b",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -1126,44 +1138,24 @@
" <thead>\n", " <thead>\n",
" <tr style=\"text-align: right;\">\n", " <tr style=\"text-align: right;\">\n",
" <th></th>\n", " <th></th>\n",
" <th>code</th>\n", " <th>sum_cnt</th>\n",
" <th>m_nDate</th>\n",
" <th>m_nTime</th>\n",
" <th>m_nPrice</th>\n",
" <th>m_iVolume</th>\n",
" <th>m_iTurover</th>\n",
" <th>m_nMatchItems</th>\n",
" <th>m_chTradeFlag</th>\n",
" <th>m_chBSFlag</th>\n",
" <th>m_iAccVolume</th>\n",
" <th>...</th>\n",
" <th>m_nOpen</th>\n",
" <th>m_nPreClose</th>\n",
" <th>m_nAskPrice</th>\n",
" <th>m_nAskVolume</th>\n",
" <th>m_nBidPrice</th>\n",
" <th>m_nBidVolume</th>\n",
" <th>m_nAskAvPrice</th>\n",
" <th>m_nBidAvPrice</th>\n",
" <th>m_iTotalAskVolume</th>\n",
" <th>m_iTotalBidVolume</th>\n",
" </tr>\n", " </tr>\n",
" </thead>\n", " </thead>\n",
" <tbody>\n", " <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>0 rows × 23 columns</p>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
"Empty DataFrame\n", " sum_cnt\n",
"Columns: [code, m_nDate, m_nTime, m_nPrice, m_iVolume, m_iTurover, m_nMatchItems, m_chTradeFlag, m_chBSFlag, m_iAccVolume, m_iAccTurover, m_nHigh, m_nLow, m_nOpen, m_nPreClose, m_nAskPrice, m_nAskVolume, m_nBidPrice, m_nBidVolume, m_nAskAvPrice, m_nBidAvPrice, m_iTotalAskVolume, m_iTotalBidVolume]\n", "0 0"
"Index: []\n",
"\n",
"[0 rows x 23 columns]"
] ]
}, },
"execution_count": 33, "execution_count": 27,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1173,7 +1165,35 @@
"sess.login('admin', '123456')\n", "sess.login('admin', '123456')\n",
"sess.run(\"\"\"\n", "sess.run(\"\"\"\n",
"tbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\n", "tbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\n",
"select * from tbl where code='000666.SZ', m_nDate <= 2013.02.26;\n", "select sum(cnt) from (select count(*) as cnt from tbl map);\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "7cbc4906-7756-424a-9ce5-9d2b6d1bab4b",
"metadata": {},
"outputs": [
{
"ename": "RuntimeError",
"evalue": "<Exception> in run: Server response: 'tbl = loadTable(\"dfs://hft_stock_ts\", \"TickPartitioned\") => getFileBlocksMeta on path '/hft_stock_ts/TickPartitioned.tbl' failed, reason: path does not exist' script: '\ntbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\nselect sum(cnt) from (select count(*) as cnt from tbl map);\n'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [28]\u001b[0m, in \u001b[0;36m<cell line: 3>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m sess \u001b[38;5;241m=\u001b[39m ddb\u001b[38;5;241m.\u001b[39msession(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m192.168.1.167\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;241m8848\u001b[39m)\n\u001b[1;32m 2\u001b[0m sess\u001b[38;5;241m.\u001b[39mlogin(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madmin\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m123456\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43msess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;43mtbl = loadTable(\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdfs://hft_stock_ts\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mTickPartitioned\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m);\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43mselect sum(cnt) from (select count(*) as cnt from tbl map);\u001b[39;49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/.venv/tinysoft/lib/python3.8/site-packages/dolphindb/session.py:161\u001b[0m, in \u001b[0;36msession.run\u001b[0;34m(self, script, *args, **kwargs)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfetchSize\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m BlockReader(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcpp\u001b[38;5;241m.\u001b[39mrunBlock(script, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs))\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcpp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mscript\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mRuntimeError\u001b[0m: <Exception> in run: Server response: 'tbl = loadTable(\"dfs://hft_stock_ts\", \"TickPartitioned\") => getFileBlocksMeta on path '/hft_stock_ts/TickPartitioned.tbl' failed, reason: path does not exist' script: '\ntbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\nselect sum(cnt) from (select count(*) as cnt from tbl map);\n'"
]
}
],
"source": [
"sess = ddb.session('192.168.1.167', 8848)\n",
"sess.login('admin', '123456')\n",
"sess.run(\"\"\"\n",
"tbl = loadTable(\"dfs://hft_stock_ts\", 'TickPartitioned');\n",
"select sum(cnt) from (select count(*) as cnt from tbl map);\n",
"\"\"\")" "\"\"\")"
] ]
}, },

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 3,
"id": "139fd1cb-aedf-4186-8408-4d630ba69599", "id": "139fd1cb-aedf-4186-8408-4d630ba69599",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -639,7 +639,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 67, "execution_count": 70,
"id": "4cd4cd6e-f34e-43a8-98de-c468a54d8081", "id": "4cd4cd6e-f34e-43a8-98de-c468a54d8081",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -656,7 +656,10 @@
"\treport_period = report_period_list;\n", "\treport_period = report_period_list;\n",
"\t\n", "\t\n",
"\tquery_table = table(report_period, m_nDate);\n", "\tquery_table = table(report_period, m_nDate);\n",
"\tquery_table_exp = select * from cj(query_table, select code from source_table where partition(code, code_partition_id) group by code map);\n", "\tquery_table_exp = select * from cj(\n",
" query_table, \n",
" select code from source_table where partition(code, code_partition_id\n",
" ) group by code map);\n",
"\t\n", "\t\n",
"\tcol_list = sqlCol(['code', 'report_period', 'appear_at_date', col_name]);\n", "\tcol_list = sqlCol(['code', 'report_period', 'appear_at_date', col_name]);\n",
" from_tbl = <ej(source_table, query_table_exp, `code`report_period)>;\n", " from_tbl = <ej(source_table, query_table_exp, `code`report_period)>;\n",
@ -672,7 +675,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 69, "execution_count": 71,
"id": "943b760a-ab39-4291-8a93-81b3e38a70b7", "id": "943b760a-ab39-4291-8a93-81b3e38a70b7",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -798,7 +801,7 @@
"[91 rows x 1 columns]" "[91 rows x 1 columns]"
] ]
}, },
"execution_count": 69, "execution_count": 71,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -978,10 +981,865 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 181,
"id": "3c246940-1ad6-414f-b461-2d8ca7cd87f1", "id": "3c246940-1ad6-414f-b461-2d8ca7cd87f1",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"sess.run(\"\"\"\n",
" def single_quarter(table_name, col_list) {\n",
" \n",
" sel_col = array(ANY, 4 + size(col_list));\n",
" sel_col[0] = sqlCol('code');\n",
" sel_col[2] = sqlColAlias(<year(report_period)>, 'year');\n",
" \n",
" for (i in 0..(size(col_list) - 1)) {\n",
" sel_col[i + 4] = sqlCol(col_list[i]);\n",
" }\n",
"\n",
" // 当季累计数据,作为下一季基准\n",
" sel_col[1] = sqlColAlias(sqlCol('report_period'), 'base_report_period');\n",
" sel_col[3] = sqlColAlias(<quarterOfYear(report_period) + 1>, 'quarter_of_year');\n",
" tbl_quarter_accum_base = sql(\n",
" select = sel_col,\n",
" from = table_name,\n",
" where = <quarterOfYear(report_period) < 4>\n",
" ).eval();\n",
"\n",
" // 从第二季开始,需要去匹配前一季累计基数\n",
" sel_col[1] = sqlColAlias(sqlCol('report_period'), 'current_report_period');\n",
" sel_col[3] = sqlColAlias(<quarterOfYear(report_period)>, 'quarter_of_year');\n",
" tbl_quarter_accum = sql(\n",
" select = sel_col,\n",
" from = table_name,\n",
" where = <quarterOfYear(report_period) > 1>\n",
" ).eval();\n",
"\n",
" // 单季流量,把非第一季的季报都要扣除前一季度的基数\n",
" sel_col[1] = sqlColAlias(sqlCol('current_report_period'), 'report_period');\n",
" sel_col[2] = sqlCol('year');\n",
" sel_col[3] = sqlCol('quarter_of_year');\n",
" for (i in 0..(size(col_list) - 1)) {\n",
" sel_col[i + 4] = sqlColAlias(\n",
" expr(\n",
" sqlCol('tbl_quarter_accum_' + col_list[i]),\n",
" -,\n",
" sqlCol(col_list[i])\n",
" )\n",
" , col_list[i] + '_flux');\n",
" }\n",
" from_obj = ej(\n",
" tbl_quarter_accum_base, \n",
" tbl_quarter_accum, \n",
" `code`year`quarter_of_year, \n",
" `code`year`quarter_of_year);\n",
" tbl_quarter_flux = sql(\n",
" select = sel_col, \n",
" from = from_obj\n",
" ).eval();\n",
"\n",
" // 每年第一个季度\n",
" sel_col[1] = sqlCol('report_period');\n",
" sel_col[2] = sqlColAlias(<year(report_period)>, 'year');\n",
" sel_col[3] = sqlColAlias(<quarterOfYear(report_period)>, 'quarter_of_year');\n",
" for (i in 0..(size(col_list) - 1)) {\n",
" sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_flux');\n",
" }\n",
" tbl_quarter1 = sql(\n",
" select = sel_col,\n",
" from = table_name,\n",
" where = <quarterOfYear(report_period) == 1>\n",
" ).eval();\n",
" \n",
" // 再拼接回第一季度(无需扣除基数的数据)\n",
" tbl_quarter_flux = unionAll(\n",
" tbl_quarter1, \n",
" tbl_quarter_flux\n",
" );\n",
" \n",
" return tbl_quarter_flux;\n",
" }\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 182,
"id": "981c8b85-a750-4d6b-bed9-c2567a95b2a3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>code</th>\n",
" <th>report_period</th>\n",
" <th>year</th>\n",
" <th>quarter_of_year</th>\n",
" <th>TOT_OPER_REV_flux</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>000400.SZ</td>\n",
" <td>2019-03-31</td>\n",
" <td>2019</td>\n",
" <td>1</td>\n",
" <td>1.032565e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>000400.SZ</td>\n",
" <td>2019-06-30</td>\n",
" <td>2019</td>\n",
" <td>2</td>\n",
" <td>2.021923e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>000400.SZ</td>\n",
" <td>2019-09-30</td>\n",
" <td>2019</td>\n",
" <td>3</td>\n",
" <td>2.183729e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>000400.SZ</td>\n",
" <td>2019-12-31</td>\n",
" <td>2019</td>\n",
" <td>4</td>\n",
" <td>4.917866e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>000558.SZ</td>\n",
" <td>2019-03-31</td>\n",
" <td>2019</td>\n",
" <td>1</td>\n",
" <td>2.361816e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>321</th>\n",
" <td>688396.SH</td>\n",
" <td>2019-12-31</td>\n",
" <td>2019</td>\n",
" <td>4</td>\n",
" <td>1.610869e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>833874.NE</td>\n",
" <td>2019-03-31</td>\n",
" <td>2019</td>\n",
" <td>1</td>\n",
" <td>3.720237e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>325</th>\n",
" <td>833874.NE</td>\n",
" <td>2019-06-30</td>\n",
" <td>2019</td>\n",
" <td>2</td>\n",
" <td>4.538674e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>324</th>\n",
" <td>833874.NE</td>\n",
" <td>2019-09-30</td>\n",
" <td>2019</td>\n",
" <td>3</td>\n",
" <td>4.776034e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>323</th>\n",
" <td>833874.NE</td>\n",
" <td>2019-12-31</td>\n",
" <td>2019</td>\n",
" <td>4</td>\n",
" <td>4.816188e+07</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>326 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" code report_period year quarter_of_year TOT_OPER_REV_flux\n",
"0 000400.SZ 2019-03-31 2019 1 1.032565e+09\n",
"82 000400.SZ 2019-06-30 2019 2 2.021923e+09\n",
"81 000400.SZ 2019-09-30 2019 3 2.183729e+09\n",
"80 000400.SZ 2019-12-31 2019 4 4.917866e+09\n",
"1 000558.SZ 2019-03-31 2019 1 2.361816e+07\n",
".. ... ... ... ... ...\n",
"321 688396.SH 2019-12-31 2019 4 1.610869e+09\n",
"79 833874.NE 2019-03-31 2019 1 3.720237e+07\n",
"325 833874.NE 2019-06-30 2019 2 4.538674e+07\n",
"324 833874.NE 2019-09-30 2019 3 4.776034e+07\n",
"323 833874.NE 2019-12-31 2019 4 4.816188e+07\n",
"\n",
"[326 rows x 5 columns]"
]
},
"execution_count": 182,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"single_quarter(tmp_table, ['TOT_OPER_REV'])\").sort_values(['code', 'report_period'])"
]
},
{
"cell_type": "code",
"execution_count": 144,
"id": "1ceea0a7-7218-4aa7-be8a-ebb655dcbc93",
"metadata": {},
"outputs": [],
"source": [
"sess.run(\"\"\"\n",
" tmp_table = select * from pit_col_at_date(\n",
" \"is_common_ori\", \n",
" \"TOT_OPER_REV\", \n",
" 2021.03.14, \n",
" [2019.12.31, 2019.09.30, 2019.06.30, 2019.03.31], \n",
" 0) order by code\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 145,
"id": "d01d6fc2-15ed-4e40-9181-0f3e3a96d2cb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>code</th>\n",
" <th>report_period</th>\n",
" <th>appear_at_date</th>\n",
" <th>TOT_OPER_REV</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>000400.SZ</td>\n",
" <td>2019-12-31</td>\n",
" <td>2020-04-10</td>\n",
" <td>1.015608e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>000400.SZ</td>\n",
" <td>2019-09-30</td>\n",
" <td>2019-10-12</td>\n",
" <td>5.238217e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>000400.SZ</td>\n",
" <td>2019-06-30</td>\n",
" <td>2019-08-24</td>\n",
" <td>3.054488e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>000400.SZ</td>\n",
" <td>2019-03-31</td>\n",
" <td>2019-04-26</td>\n",
" <td>1.032565e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>000558.SZ</td>\n",
" <td>2019-12-31</td>\n",
" <td>2020-08-25</td>\n",
" <td>1.378479e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>333</th>\n",
" <td>831010.NE</td>\n",
" <td>2019-06-30</td>\n",
" <td>2019-08-09</td>\n",
" <td>1.970813e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>334</th>\n",
" <td>833874.NE</td>\n",
" <td>2019-12-31</td>\n",
" <td>2020-06-29</td>\n",
" <td>1.785113e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>335</th>\n",
" <td>833874.NE</td>\n",
" <td>2019-09-30</td>\n",
" <td>2019-10-22</td>\n",
" <td>1.303494e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>336</th>\n",
" <td>833874.NE</td>\n",
" <td>2019-06-30</td>\n",
" <td>2019-08-16</td>\n",
" <td>8.258911e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>337</th>\n",
" <td>833874.NE</td>\n",
" <td>2019-03-31</td>\n",
" <td>2019-04-30</td>\n",
" <td>3.720237e+07</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>338 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" code report_period appear_at_date TOT_OPER_REV\n",
"0 000400.SZ 2019-12-31 2020-04-10 1.015608e+10\n",
"1 000400.SZ 2019-09-30 2019-10-12 5.238217e+09\n",
"2 000400.SZ 2019-06-30 2019-08-24 3.054488e+09\n",
"3 000400.SZ 2019-03-31 2019-04-26 1.032565e+09\n",
"4 000558.SZ 2019-12-31 2020-08-25 1.378479e+08\n",
".. ... ... ... ...\n",
"333 831010.NE 2019-06-30 2019-08-09 1.970813e+08\n",
"334 833874.NE 2019-12-31 2020-06-29 1.785113e+08\n",
"335 833874.NE 2019-09-30 2019-10-22 1.303494e+08\n",
"336 833874.NE 2019-06-30 2019-08-16 8.258911e+07\n",
"337 833874.NE 2019-03-31 2019-04-30 3.720237e+07\n",
"\n",
"[338 rows x 4 columns]"
]
},
"execution_count": 145,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"tmp_table\")"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "5ac22809-9ba9-4a01-b76a-591558c4cc52",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'< select symbol,date,cumsum(volume) as cumVol from tmp_tablef0e858a3837f0000 >'"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"\"\"\n",
"sql(select=sqlCol(['symbol', 'date', 'cumsum(volume) as cumVol']), from = tmp_table)\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "dc36ca56-07e0-4912-b5d2-da192a1c3c62",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'< select symbol,date,cumsum(volume) as cumVol from tmp_tablef0e858a3837f0000 >'"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"\"\"\n",
"sql(select=(sqlCol(`symbol),sqlCol(`date),sqlColAlias(<cumsum(volume)>, `cumVol)), from=tmp_table)\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 211,
"id": "c3be05b5-c9b9-4df5-8481-b3092c25125e",
"metadata": {},
"outputs": [],
"source": [
"sess.run(\"\"\"\n",
" def yoy(table_name, col_list) {\n",
" sel_col = array(ANY, 4 + size(col_list));\n",
" sel_col[0] = sqlCol('code');\n",
" sel_col[3] = sqlColAlias(<quarterOfYear(report_period)>, 'quarter_of_year');\n",
" \n",
" // 上一年数据\n",
" for (i in 0..(size(col_list) - 1)) {\n",
" sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_base');\n",
" }\n",
" sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_last_year');\n",
" sel_col[2] = sqlColAlias(<year(report_period) + 1>, 'year');\n",
" tbl_last_year = sql(\n",
" select = sel_col,\n",
" from = table_name\n",
" ).eval();\n",
"\n",
" // 本年度数据\n",
" for (i in 0..(size(col_list) - 1)) {\n",
" sel_col[i + 4] = sqlCol(col_list[i]);\n",
" }\n",
" sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_current_year');\n",
" sel_col[2] = sqlColAlias(<year(report_period)>, 'year');\n",
" tbl_this_year = sql(\n",
" select = sel_col,\n",
" from = table_name\n",
" ).eval();\n",
" \n",
" // 计算同比增长率\n",
" sel_col = array(ANY, 4 + (3 * size(col_list)));\n",
"\n",
" sel_col[0] = sqlCol('code');\n",
" sel_col[1] = sqlColAlias(sqlCol('report_period_current_year'), 'report_period');\n",
" sel_col[2] = sqlCol('year');\n",
" sel_col[3] = sqlCol('quarter_of_year');\n",
" \n",
" for (i in 0..(size(col_list) - 1)) {\n",
" sel_col[3 * i + 4] = sqlCol(col_list[i] + '_base');\n",
" sel_col[3 * i + 5] = sqlCol(col_list[i]);\n",
" sel_col[3 * i + 6] = sqlColAlias(\n",
" expr(expr(sqlCol(col_list[i]), -, sqlCol(col_list[i] + '_base')), /, expr(abs, sqlCol(col_list[i] + '_base'))), \n",
" col_list[i] + '_yoy');\n",
" }\n",
" from_obj = ej(\n",
" tbl_last_year, \n",
" tbl_this_year, \n",
" `code`year`quarter_of_year, \n",
" `code`year`quarter_of_year);\n",
" \n",
" return sql(\n",
" select = sel_col,\n",
" from = from_obj).eval();\n",
" }\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 212,
"id": "63492d0c-c348-4e8b-a08b-5feb279cee5e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>code</th>\n",
" <th>report_period</th>\n",
" <th>year</th>\n",
" <th>quarter_of_year</th>\n",
" <th>TOT_OPER_REV_base</th>\n",
" <th>TOT_OPER_REV</th>\n",
" <th>TOT_OPER_REV_yoy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>000400.SZ</td>\n",
" <td>2019-12-31</td>\n",
" <td>2019</td>\n",
" <td>4</td>\n",
" <td>8.216559e+09</td>\n",
" <td>1.015608e+10</td>\n",
" <td>0.236051</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>000400.SZ</td>\n",
" <td>2018-12-31</td>\n",
" <td>2018</td>\n",
" <td>4</td>\n",
" <td>1.033072e+10</td>\n",
" <td>8.216559e+09</td>\n",
" <td>-0.204648</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>000558.SZ</td>\n",
" <td>2019-12-31</td>\n",
" <td>2019</td>\n",
" <td>4</td>\n",
" <td>7.024741e+08</td>\n",
" <td>1.378479e+08</td>\n",
" <td>-0.803768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>000558.SZ</td>\n",
" <td>2018-12-31</td>\n",
" <td>2018</td>\n",
" <td>4</td>\n",
" <td>1.324249e+09</td>\n",
" <td>7.024741e+08</td>\n",
" <td>-0.469530</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>000677.SZ</td>\n",
" <td>2019-12-31</td>\n",
" <td>2019</td>\n",
" <td>4</td>\n",
" <td>7.068198e+08</td>\n",
" <td>7.906742e+08</td>\n",
" <td>0.118636</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>177</th>\n",
" <td>688396.SH</td>\n",
" <td>2018-12-31</td>\n",
" <td>2018</td>\n",
" <td>4</td>\n",
" <td>5.875590e+09</td>\n",
" <td>6.270797e+09</td>\n",
" <td>0.067262</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178</th>\n",
" <td>831010.NE</td>\n",
" <td>2019-12-31</td>\n",
" <td>2019</td>\n",
" <td>4</td>\n",
" <td>2.936331e+08</td>\n",
" <td>3.552313e+08</td>\n",
" <td>0.209780</td>\n",
" </tr>\n",
" <tr>\n",
" <th>179</th>\n",
" <td>831010.NE</td>\n",
" <td>2018-12-31</td>\n",
" <td>2018</td>\n",
" <td>4</td>\n",
" <td>2.209252e+08</td>\n",
" <td>2.936331e+08</td>\n",
" <td>0.329106</td>\n",
" </tr>\n",
" <tr>\n",
" <th>180</th>\n",
" <td>833874.NE</td>\n",
" <td>2019-12-31</td>\n",
" <td>2019</td>\n",
" <td>4</td>\n",
" <td>1.974978e+08</td>\n",
" <td>1.785113e+08</td>\n",
" <td>-0.096135</td>\n",
" </tr>\n",
" <tr>\n",
" <th>181</th>\n",
" <td>833874.NE</td>\n",
" <td>2018-12-31</td>\n",
" <td>2018</td>\n",
" <td>4</td>\n",
" <td>1.986075e+08</td>\n",
" <td>1.974978e+08</td>\n",
" <td>-0.005588</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>182 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" code report_period year quarter_of_year TOT_OPER_REV_base \\\n",
"0 000400.SZ 2019-12-31 2019 4 8.216559e+09 \n",
"1 000400.SZ 2018-12-31 2018 4 1.033072e+10 \n",
"2 000558.SZ 2019-12-31 2019 4 7.024741e+08 \n",
"3 000558.SZ 2018-12-31 2018 4 1.324249e+09 \n",
"4 000677.SZ 2019-12-31 2019 4 7.068198e+08 \n",
".. ... ... ... ... ... \n",
"177 688396.SH 2018-12-31 2018 4 5.875590e+09 \n",
"178 831010.NE 2019-12-31 2019 4 2.936331e+08 \n",
"179 831010.NE 2018-12-31 2018 4 2.209252e+08 \n",
"180 833874.NE 2019-12-31 2019 4 1.974978e+08 \n",
"181 833874.NE 2018-12-31 2018 4 1.986075e+08 \n",
"\n",
" TOT_OPER_REV TOT_OPER_REV_yoy \n",
"0 1.015608e+10 0.236051 \n",
"1 8.216559e+09 -0.204648 \n",
"2 1.378479e+08 -0.803768 \n",
"3 7.024741e+08 -0.469530 \n",
"4 7.906742e+08 0.118636 \n",
".. ... ... \n",
"177 6.270797e+09 0.067262 \n",
"178 3.552313e+08 0.209780 \n",
"179 2.936331e+08 0.329106 \n",
"180 1.785113e+08 -0.096135 \n",
"181 1.974978e+08 -0.005588 \n",
"\n",
"[182 rows x 7 columns]"
]
},
"execution_count": 212,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"yoy(tmp_table, ['TOT_OPER_REV'])\")"
]
},
{
"cell_type": "code",
"execution_count": 183,
"id": "5273ebfc-25bd-4b38-9605-d3109cf2280f",
"metadata": {},
"outputs": [],
"source": [
"sess.run(\"\"\"\n",
" tmp_table = select * from pit_col_at_date(\n",
" \"is_common_ori\", \n",
" \"TOT_OPER_REV\", \n",
" 2021.03.14, \n",
" [2019.12.31, 2018.12.31, 2017.12.31], \n",
" 0) order by code\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 184,
"id": "99af13cd-5918-4d21-9067-d3da86d19c15",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>code</th>\n",
" <th>report_period</th>\n",
" <th>appear_at_date</th>\n",
" <th>TOT_OPER_REV</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>000400.SZ</td>\n",
" <td>2019-12-31</td>\n",
" <td>2020-04-10</td>\n",
" <td>1.015608e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>000400.SZ</td>\n",
" <td>2018-12-31</td>\n",
" <td>2019-03-29</td>\n",
" <td>8.216559e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>000400.SZ</td>\n",
" <td>2017-12-31</td>\n",
" <td>2018-03-24</td>\n",
" <td>1.033072e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>000558.SZ</td>\n",
" <td>2019-12-31</td>\n",
" <td>2020-08-25</td>\n",
" <td>1.378479e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>000558.SZ</td>\n",
" <td>2018-12-31</td>\n",
" <td>2019-04-26</td>\n",
" <td>7.024741e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>268</th>\n",
" <td>831010.NE</td>\n",
" <td>2018-12-31</td>\n",
" <td>2020-07-01</td>\n",
" <td>2.936331e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>269</th>\n",
" <td>831010.NE</td>\n",
" <td>2017-12-31</td>\n",
" <td>2020-07-01</td>\n",
" <td>2.209252e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>270</th>\n",
" <td>833874.NE</td>\n",
" <td>2019-12-31</td>\n",
" <td>2020-06-29</td>\n",
" <td>1.785113e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>271</th>\n",
" <td>833874.NE</td>\n",
" <td>2018-12-31</td>\n",
" <td>2020-06-29</td>\n",
" <td>1.974978e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>272</th>\n",
" <td>833874.NE</td>\n",
" <td>2017-12-31</td>\n",
" <td>2020-06-29</td>\n",
" <td>1.986075e+08</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>273 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" code report_period appear_at_date TOT_OPER_REV\n",
"0 000400.SZ 2019-12-31 2020-04-10 1.015608e+10\n",
"1 000400.SZ 2018-12-31 2019-03-29 8.216559e+09\n",
"2 000400.SZ 2017-12-31 2018-03-24 1.033072e+10\n",
"3 000558.SZ 2019-12-31 2020-08-25 1.378479e+08\n",
"4 000558.SZ 2018-12-31 2019-04-26 7.024741e+08\n",
".. ... ... ... ...\n",
"268 831010.NE 2018-12-31 2020-07-01 2.936331e+08\n",
"269 831010.NE 2017-12-31 2020-07-01 2.209252e+08\n",
"270 833874.NE 2019-12-31 2020-06-29 1.785113e+08\n",
"271 833874.NE 2018-12-31 2020-06-29 1.974978e+08\n",
"272 833874.NE 2017-12-31 2020-06-29 1.986075e+08\n",
"\n",
"[273 rows x 4 columns]"
]
},
"execution_count": 184,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"tmp_table\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "87820a2a-1ddf-4950-b0d4-a3e7d4a61b16",
"metadata": {},
"outputs": [],
"source": [] "source": []
} }
], ],

@ -0,0 +1,18 @@
import dolphindb as ddb
class DDBBase(object):
ddb_config = {
'host' : '192.168.1.167',
'username' : 'admin',
'password' : '123456',
}
def __init__(self):
self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
self.login(self.ddb_config['username'], self.ddb_config['password'])

@ -0,0 +1,365 @@
from tqdm import tqdm
import numpy as np
import pandas as pd
import dolphindb as ddb
from DDBBase import DDBBase
from DDBLoader import DDBPITLoader
class DDBExpression(DDBBase):
"""
调用事先定义好的DolphinDB的Function或者FunctionView来进行表达式计算
定义DolphinDB中的Function或者FunctionView的具体逻辑在`DDBExpreCreator`
"""
def pit_at_date(self, table_name, date, report_period_list):
"""
获得任何时间点上能看到的某个财报最新可见数据
实现方式
通过调用事先在DolpinDB中创建的Function View
Args:
table_name: 财务报表名字包含是否为调整报表的后缀名非stem名
date: 观察日期使用DolphinDB的日期格式即YYYY.MM.DD无需加引号
report_period_list: 报告期列表列表中每一个日期为一个DolphinDb的日期格式
Return:
DataFrame: 在观察日期`date`日能看到的所有公司最近能看到的报告期在`report_period_list`中的财务报告表`table_name`的截面数据
"""
df_list = []
for i in range(DDBPITLoader.num_code_partition):
_df = self.ddb_sess.run("""
select * from pit_at_date("{table_name}", {date}, [{report_period_list}], {code_partition_id});
""".format(
table_name = table_name,
date = date,
report_period_list = ','.join(report_period_list),
code_parition_id = i
))
_df.set_index(['code', 'report_period', 'appear_at_date'], inplace=True)
df_list.append(_df)
return pd.concat(df_list, axis=1)
def pit_col_at_date(self, table_name, col_name, date, report_period_list):
"""
获得任何时间点上能看到的某个财报最新可见数据
实现方式
通过调用事先在DolpinDB中创建的Function View
Args:
table_name: 财务报表名字包含是否为调整报表的后缀名非stem名
col_name: 列名一般为具体财务科目名字
date: 观察日期使用DolphinDB的日期格式即YYYY.MM.DD无需加引号
report_period_list: 报告期列表列表中每一个日期为一个DolphinDb的日期格式
Return:
DataFrame: 在观察日期`date`日能看到的所有公司最近能看到的报告期在`report_period_list`中的财务报告表`table_name```col_name`指标的截面数据
"""
df_list = []
for i in range(DDBPITLoader.num_code_partition):
_df = self.ddb_sess.run("""
select * from pit_col_at_date("{table_name}", "{col_name}", {date}, [{report_period_list}], {code_partition_id});
""".format(
table_name = table_name,
col_name = col_name,
date = date,
report_period_list = ','.join(report_period_list),
code_parition_id = i
))
_df.set_index(['code', 'report_period', 'appear_at_date'], inplace=True)
df_list.append(_df)
return pd.concat(df_list, axis=1)
def yoy(self, dataframe, col_list):
self.ddb_sess.upload({'table_yoy' : dataframe })
self.ddb_sess.run("""
yoy({table_name}, [{col_list}]);
""".format(
table_name = 'table_yoy',
col_list = ','.join([f"'{col_name}'" for col_name in col_list])
))
def single_quarter(self, dataframe, col_list):
self.ddb_sess.upload({'table_single_quarter' : dataframe})
self.ddb_sess.run("""
single_quarter({table_name}, [{col_list}])
""".format(
table_name = table_name,
col_list = ','.join([f"'{col_name}'" for col_name in col_list])
))
class DDBExpreCreator(DDBBase):
"""
定义DolphinDB中的各种基础操作因此这个类中的大部分实质性代码都是DolphinDB自己的语言而不是Python语言编写的
`Expre``Expression`的简写
"""
pit_dbpath = "dfs://pit_stock_ts"
pit_dbname = "db_pit_stock"
daily_dbpath = "dfs://daily_stock_ts"
daily_dbname = "db_daily_stock"
def create_function_views(self):
self._func_view_pit_at_date()
self._func_view_pit_col_at_date()
def create_functions(self):
self._fun_yoy();
self._func_single_quarter()
def _func_single_quarter(self):
code = """
/**
* Args:
* table: DolphinDB的表可以是一个内存表也可以是一个分区表
* col_list: 需要进行单季转换的字段名列表
*/
def single_quarter(table_name, col_list) {
sel_col = array(ANY, 4 + size(col_list));
sel_col[0] = sqlCol('code');
sel_col[2] = sqlColAlias(<year(report_period)>, 'year');
for (i in 0..(size(col_list) - 1)) {
sel_col[i + 4] = sqlCol(col_list[i]);
}
// 当季累计数据作为下一季基准
sel_col[1] = sqlColAlias(sqlCol('report_period'), 'base_report_period');
sel_col[3] = sqlColAlias(<quarterOfYear(report_period) + 1>, 'quarter_of_year');
tbl_quarter_accum_base = sql(
select = sel_col,
from = table_name,
where = <quarterOfYear(report_period) < 4>
).eval();
// 从第二季开始需要去匹配前一季累计基数
sel_col[1] = sqlColAlias(sqlCol('report_period'), 'current_report_period');
sel_col[3] = sqlColAlias(<quarterOfYear(report_period)>, 'quarter_of_year');
tbl_quarter_accum = sql(
select = sel_col,
from = table_name,
where = <quarterOfYear(report_period) > 1>
).eval();
// 单季流量把非第一季的季报都要扣除前一季度的基数
sel_col[1] = sqlColAlias(sqlCol('current_report_period'), 'report_period');
sel_col[2] = sqlCol('year');
sel_col[3] = sqlCol('quarter_of_year');
for (i in 0..(size(col_list) - 1)) {
sel_col[i + 4] = sqlColAlias(
expr(
sqlCol('tbl_quarter_accum_' + col_list[i]),
-,
sqlCol(col_list[i])
)
, col_list[i] + '_flux');
}
from_obj = ej(
tbl_quarter_accum_base,
tbl_quarter_accum,
`code`year`quarter_of_year,
`code`year`quarter_of_year);
tbl_quarter_flux = sql(
select = sel_col,
from = from_obj
).eval();
// 每年第一个季度
sel_col[1] = sqlCol('report_period');
sel_col[2] = sqlColAlias(<year(report_period)>, 'year');
sel_col[3] = sqlColAlias(<quarterOfYear(report_period)>, 'quarter_of_year');
for (i in 0..(size(col_list) - 1)) {
sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_flux');
}
tbl_quarter1 = sql(
select = sel_col,
from = table_name,
where = <quarterOfYear(report_period) == 1>
).eval();
// 再拼接回第一季度无需扣除基数的数据
tbl_quarter_flux = unionAll(
tbl_quarter1,
tbl_quarter_flux
);
return tbl_quarter_flux;
}
"""
self.ddb_sess.run(code)
def _func_yoy(self):
code = """
/**
* Args:
* table: DolphinDB的表可以是一个内存表也可以是一个分区表
* col_list: 需要进行同比转换的字段名
*/
def yoy(table_name, col_list) {
sel_col = array(ANY, 4 + size(col_list));
sel_col[0] = sqlCol('code');
sel_col[3] = sqlColAlias(<quarterOfYear(report_period)>, 'quarter_of_year');
// 上一年数据
for (i in 0..(size(col_list) - 1)) {
sel_col[i + 4] = sqlColAlias(sqlCol(col_list[i]), col_list[i] + '_base');
}
sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_last_year');
sel_col[2] = sqlColAlias(<year(report_period) + 1>, 'year');
tbl_last_year = sql(
select = sel_col,
from = table_name
).eval();
// 本年度数据
for (i in 0..(size(col_list) - 1)) {
sel_col[i + 4] = sqlCol(col_list[i]);
}
sel_col[1] = sqlColAlias(sqlCol('report_period'), 'report_period_current_year');
sel_col[2] = sqlColAlias(<year(report_period)>, 'year');
tbl_this_year = sql(
select = sel_col,
from = table_name
).eval();
// 计算同比增长率
sel_col = array(ANY, 4 + (3 * size(col_list)));
sel_col[0] = sqlCol('code');
sel_col[1] = sqlColAlias(sqlCol('report_period_current_year'), 'report_period');
sel_col[2] = sqlCol('year');
sel_col[3] = sqlCol('quarter_of_year');
for (i in 0..(size(col_list) - 1)) {
sel_col[3 * i + 4] = sqlCol(col_list[i] + '_base');
sel_col[3 * i + 5] = sqlCol(col_list[i]);
sel_col[3 * i + 6] = sqlColAlias(
expr(expr(sqlCol(col_list[i]), -, sqlCol(col_list[i] + '_base')), /, expr(abs, sqlCol(col_list[i] + '_base'))),
col_list[i] + '_yoy');
}
from_obj = ej(
tbl_last_year,
tbl_this_year,
`code`year`quarter_of_year,
`code`year`quarter_of_year);
return sql(
select = sel_col,
from = from_obj).eval();
}
"""
self.ddb_sess.run(code)
def _func_view_pit_at_date(self):
"""
创建`pit_at_date`获得任何时间点上能看到的某个财报最新可见数据
"""
self.ddb_sess.run("""
dropFunctionView('pit_at_date');
""")
self.ddb_sess.run("""
/**
* Example of use:
* pit_at_date("is_common_ori", "TOT_OPER_REV", 2021.03.14, [2019.12.31, 2019.09.30, 2019.06.30], 0)
*/
def pit_at_date(table_name, date, report_period_list, code_partition_id) {
source_table = loadTable("{pit_dbpath}", table_name);
m_nDate = take(date, size(report_period_list));
report_period = report_period_list;
query_table = table(report_period, m_nDate);
query_table_exp = select * from cj(
query_table,
select code
from source_table
where partition(code, code_partition_id)
group by code map
);
source_table_part = select source_table.* from ej(
source_table, query_table_exp, `code`report_period
) where partition(code, code_partition_id);
return select source_table_part.* from aj(
query_table_exp,
source_table_part,
`code`report_period`m_nDate,
`code`report_period`appear_at_date
) where not isNull(source_table_part.code);
}
addFunctionView(pit_at_date);
""".format(
pit_dbpath = pit_dbpath
))
def _func_view_pit_col_at_date(self):
"""
创建`pit_col_at_date`获得任何日期点上能看到的财报中某一个指标的最新可见数据
`pit_at_date`的区别在于`pit_col_at_date`返回返回一个序列`pit_at_date`返回一整张表
"""
# 先把原有的View Function删除
self.ddb_sess.run("""
dropFunctionView('pit_col_at_date');
""")
self.ddb_sess.run("""
/**
* Example of use:
* pit_col_at_date("is_common_ori", "TOT_OPER_REV", 2021.03.14, [2019.12.31, 2019.09.30, 2019.06.30], 0)
*/
def pit_col_at_date(table_name, col_name, date, report_period_list, code_partition_id) {
source_table = loadTable("{pit_dbpath}", table_name);
m_nDate = take(date, size(report_period_list));
report_period = report_period_list;
query_table = table(report_period, m_nDate);
query_table_exp = select * from cj(
query_table,
select code
from source_table
where partition(code, code_partition_id)
group by code map
);
col_list = sqlCol(['code', 'report_period', 'appear_at_date', col_name]);
from_tbl = <ej(source_table, query_table_exp, `code`report_period)>;
where_conditions = [<partition(code, code_partition_id)>];
source_table_part = sql(select=col_list, from=from_tbl, where=where_conditions).eval();
return select source_table_part.* from aj(
query_table_exp,
source_table_part,
`code`report_period`m_nDate,
`code`report_period`appear_at_date
) where not isNull(source_table_part.code)
}
addFunctionView(pit_col_at_date);
""".format(
pit_dbpath = self.pit_dbpath
))

@ -5,10 +5,10 @@ import functools
import dolphindb as ddb import dolphindb as ddb
from DDBBase import DDBBase
from DDBLoader import DDBLoader from DDBLoader import DDBLoader
def load_ddb_table(hft_tbl_name): def load_ddb_table(hft_tbl_name):
""" """
这是一个用来简化载入分区表过程的语法糖但似乎需要预先调用这个函数的场景并不多简化效果不是很明显 这是一个用来简化载入分区表过程的语法糖但似乎需要预先调用这个函数的场景并不多简化效果不是很明显
@ -30,27 +30,17 @@ def load_ddb_table(hft_tbl_name):
return decorator return decorator
class DailyFactor(object): class DDBDailyFactor(DDBBase):
#ddb_hft_path = "dfs://hft_stock_ts" #ddb_hft_path = "dfs://hft_stock_ts"
#ddb_hft_dbname = "db_hft_stock" #ddb_hft_dbname = "db_hft_stock"
ddb_daily_path = "dfs://daily_stock_ts" ddb_daily_path = "dfs://daily_stock_ts"
ddb_daily_dbname = "db_daily_stock" ddb_daily_dbname = "db_daily_stock"
ddb_config = {
'host' : '192.168.1.167',
'username' : 'admin',
'password' : '123456',
}
# 这里的partition数量未必需要和hft表的一致 # 这里的partition数量未必需要和hft表的一致
# 当读取hft表的时候需要使用DDBLoader中的`num_code_partition`,而不是此字段 # 当读取hft表的时候需要使用DDBLoader中的`num_code_partition`,而不是此字段
num_code_partition = 50 num_code_partition = 50
def __init__(self):
self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password'])
def create_ddb_database(self): def create_ddb_database(self):
""" """

@ -23,9 +23,10 @@ import sqlalchemy as sa
import ProtoBuffEntitys import ProtoBuffEntitys
from DDBBase import DDBBase
class DDBLoader(object): class DDBLoader(DDBBase):
""" """
- 放了几个公用的配置字段包括 - 放了几个公用的配置字段包括
1. SQL-Server的链接参数 1. SQL-Server的链接参数
@ -42,14 +43,8 @@ class DDBLoader(object):
'password' : 'passw0rd!' 'password' : 'passw0rd!'
} }
ddb_config = {
'host' : '192.168.1.167',
'username' : 'admin',
'password' : '123456'
}
def __init__(self): def __init__(self):
super().__init__()
self.mssql_engine = sa.create_engine( self.mssql_engine = sa.create_engine(
"mssql+pyodbc://{username}:{password}@{host}/master?driver=ODBC+Driver+18+for+SQL+Server".format(**self.mssql_config), "mssql+pyodbc://{username}:{password}@{host}/master?driver=ODBC+Driver+18+for+SQL+Server".format(**self.mssql_config),
connect_args = { connect_args = {
@ -57,9 +52,6 @@ class DDBLoader(object):
}, echo=False }, echo=False
) )
self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password'])
@abc.abstractmethod @abc.abstractmethod
def create_ddb_database(self, *args, **kwargs): def create_ddb_database(self, *args, **kwargs):
@ -552,7 +544,7 @@ class DDBHFTLoader(DDBLoader):
num_workers = 8 num_workers = 8
default_table_capacity = 10000 default_table_capacity = 10000
ddb_dump_journal_fname = 'ddb_dump_journal.csv' ddb_dump_journal_fname = '../assets/ddb_dump_journal.csv'
def init_ddb_database(self, df_calendar): def init_ddb_database(self, df_calendar):
@ -913,7 +905,9 @@ class DDBHFTLoader(DDBLoader):
ddb_sess.login(DDBLoader.ddb_config['username'], DDBLoader.ddb_config['password']) ddb_sess.login(DDBLoader.ddb_config['username'], DDBLoader.ddb_config['password'])
ddb_sess.upload({df_table_name : df}) ddb_sess.upload({df_table_name : df})
ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( # 因为在做Tick数据的时候偶然发生'CHUNK[xxx] does not exist.',所以在这里使用`append!`函数代换一下试试
ddb_sess.run("append!(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
#ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
dbPath = DDBLoader.ddb_path, dbPath = DDBLoader.ddb_path,
partitioned_table_name = type_name + DDBLoader.ddb_partition_table_suffix, partitioned_table_name = type_name + DDBLoader.ddb_partition_table_suffix,
df_table_name = df_table_name df_table_name = df_table_name
@ -923,6 +917,9 @@ class DDBHFTLoader(DDBLoader):
def main(): def main():
# TODO:
# 可以使用`Fire`库,对函数调用再做一次封装,就可以避免每次运行不同参数时候需要修改内部多处的代码。
# PIT基本面数据 # PIT基本面数据
loader = DDBPITLoader() loader = DDBPITLoader()
loader.create_ddb_database() loader.create_ddb_database()

Loading…
Cancel
Save