Guofu Li 2 years ago
commit 99798262f3

@ -12,28 +12,28 @@
│   └── scripts │   └── scripts
├── ipynb ├── ipynb
│   ├── ddb.ipynb │   ├── ddb.ipynb
│   ├── ddb_pe.ipynb
│   ├── ddb_pit.ipynb │   ├── ddb_pit.ipynb
│   ├── dos.ipynb │   ├── dos.ipynb
│   └── mssql.ipynb │   └── mssql.ipynb
├── Readme.txt ├── README.md
└── src └── src
├── DDBBase.py ├── DDBBase.py
├── DDBExpression.py ├── expr
├── DDBFactor.py │   └── DDBExpression.py
├── DDBLoader.py ├── factor
├── make_hft.py │   └── DDBFactor.py
├── ProtoBuffEntitys ├── __init__.py
│   ├── HFDataTableMessage_pb2.py ├── loader
│   ├── IndexFutureKLineMessage_pb2.py │   ├── DDBBasicInfoLoader.py
│   ├── IndexFutureL1TickMessage_pb2.py │   ├── DDBDailyLoader.py
│   ├── IndexKLineMessage_pb2.py │   ├── DDBHFTLoader.py
│   ├── IndexTickMessage_pb2.py │   ├── DDBLoader.py
│   ├── KLineMessage_pb2.py │   ├── DDBPITLoader.py
│   ├── OrderMessage_pb2.py │   ├── make_hft.py
│   ├── TickMessage_pb2.py │   └── ProtoBuffEntitys
│   ├── TickQueueMessage_pb2.py
│   └── TranseMessage_pb2.py
├── ReadTickFromDB.py ├── ReadTickFromDB.py
├── run.py
└── script.py └── script.py
``` ```
@ -51,21 +51,28 @@
- `src` - `src`
- Python源代码目录是工作的主要目录 - Python源代码目录是工作的主要目录
- 大部分源码文件以`DDB`开头表示与DolphinDB相关继承和代码复用关系可以进一步优化 - 大部分源码文件以`DDB`开头表示与DolphinDB相关继承和代码复用关系可以进一步优化
- `DDBBase`是`DDB*`的基类,包含了一些常用常量 - `DDBBase.py`
- `DDBLoader`专门用于向DolphinDB写入数据似乎称为Dumper更合适目前包括 - `DDB*`的基类,包含了一些常用常量
- `DDBHFTLoader`,用以导入高频数据(海通高频数据) - `run.py`
- `DDBDailyLoader`用以导入日频数据市场日K线数据 - 用于执行具体操作任务的入口脚本
- `DDBPITLoader`用以导入低频PIT数据主要是财报数据 - TODO需要支持命令行参数来选择执行功能和配套参数
- `DDBFactor`用于生成因子,目前只有生成日频因子 - `loader`目录代码专门用于向DolphinDB写入数据似乎称为Dumper更合适目前包括
- `DDBExpression`提供了基本的操作表达式,目前包括: - `DDBLoader.py`其他Loader类的基础类
- `DDBHFTLoader.py`:用以导入高频数据(海通高频数据)
- `DDBDailyLoader.py`用以导入日频数据市场日K线数据
- `DDBPITLoader.py`用以导入低频PIT数据主要是财报数据
- `loader/ProtoBuffEntitys`目录:
- 用以解析海通高频数据中protobuf字节流的Python辅助代码
- `factor`目录:用于生成因子
- `DDBFactor.py`:目前只有生成日频因子
- `expr`目录用于支持基于DDB的各种表达式操作
- `DDBExpression.py`:提供了基本的操作表达式,目前包括:
- 通过Function View读取DolphinDB的分布式存储表 - 通过Function View读取DolphinDB的分布式存储表
- 在任意时间点上,获取所有公司某张财报、指定财报期的最新可见快照 - 在任意时间点上,获取所有公司某张财报、指定财报期的最新可见快照
- 在任意时间点上,获取所有公司某张财报中某科目、指定财报期的最新可见快照 - 在任意时间点上,获取所有公司某张财报中某科目、指定财报期的最新可见快照
- 通过一般Function对内存表进行操作 - 通过一般Function对内存表进行操作
- 流量表累计数值转单季值 - 流量表累计数值转单季值
- 跨年同比指标计算(分母为去年基数取绝对值,函数会同时返回当年值和去年基数,以便进一步调整) - 跨年同比指标计算(分母为去年基数取绝对值,函数会同时返回当年值和去年基数,以便进一步调整)
- `src/ProtoBuffEntitys`
- 用以解析海通高频数据中protobuf字节流的Python辅助代码
## 海通高频数据录入说明 ## 海通高频数据录入说明

@ -35,15 +35,31 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 67, "execution_count": 60,
"id": "ed17fd0b-9b36-47e4-9ab6-11459a3621fb", "id": "ed17fd0b-9b36-47e4-9ab6-11459a3621fb",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"115548"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"sess = ddb.session('192.168.1.7', 8848)\n",
"sess.login('admin', '123456')\n",
"\n",
"# backup(backup_path, sql_obj, force, parallel)\n", "# backup(backup_path, sql_obj, force, parallel)\n",
"code = \"\"\"\n", "code = \"\"\"\n",
" backup('/data/dolphindb/backup/', <select * from loadTable(\"dfs://hft_stock_ts\", \"OrderPartitioned\")>, true, true)\n", " backup('/data/dolphindb/backup/', <select * from loadTable(\"dfs://hft_stock_ts\", \"TickPartitioned\")>, true, true)\n",
"\"\"\"" "\"\"\"\n",
"\n",
"sess.run(code)"
] ]
}, },
{ {
@ -1389,7 +1405,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 43, "execution_count": 100,
"id": "4ba45027-bbb5-4b27-99da-3452cc8d2f1c", "id": "4ba45027-bbb5-4b27-99da-3452cc8d2f1c",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -1420,7 +1436,7 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>2298</td>\n", " <td>2227</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
@ -1428,10 +1444,10 @@
], ],
"text/plain": [ "text/plain": [
" count\n", " count\n",
"0 2298" "0 2227"
] ]
}, },
"execution_count": 43, "execution_count": 100,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1439,14 +1455,14 @@
"source": [ "source": [
"sess.run(\"\"\"\n", "sess.run(\"\"\"\n",
" select count(*) from (\n", " select count(*) from (\n",
" select code, m_nDate, count(*) as cnt from tbl where code='002182.SZ' group by code, m_nDate map\n", " select code, m_nDate, count(*) as cnt from tbl where code='002459.SZ' group by code, m_nDate map\n",
" );\n", " );\n",
"\"\"\")" "\"\"\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 57, "execution_count": 103,
"id": "29ab8af5-e571-4064-b691-a186d9fb4d08", "id": "29ab8af5-e571-4064-b691-a186d9fb4d08",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -1471,66 +1487,32 @@
" <thead>\n", " <thead>\n",
" <tr style=\"text-align: right;\">\n", " <tr style=\"text-align: right;\">\n",
" <th></th>\n", " <th></th>\n",
" <th>count</th>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>m_nDate</th>\n", " <th>m_nDate</th>\n",
" <th></th>\n",
" </tr>\n", " </tr>\n",
" </thead>\n", " </thead>\n",
" <tbody>\n", " <tbody>\n",
" <tr>\n",
" <th>2013-01-04</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-07</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-08</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-09</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2013-01-10</th>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-07-04</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-07-05</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-07-06</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-07-07</th>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-07-08</th>\n",
" </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>2298 rows × 0 columns</p>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
"Empty DataFrame\n", "Empty DataFrame\n",
"Columns: []\n", "Columns: [count]\n",
"Index: [2013-01-04 00:00:00, 2013-01-07 00:00:00, 2013-01-08 00:00:00, 2013-01-09 00:00:00, 2013-01-10 00:00:00, 2013-01-11 00:00:00, 2013-01-14 00:00:00, 2013-01-15 00:00:00, 2013-01-16 00:00:00, 2013-01-17 00:00:00, 2013-01-18 00:00:00, 2013-01-21 00:00:00, 2013-01-22 00:00:00, 2013-01-23 00:00:00, 2013-01-24 00:00:00, 2013-01-25 00:00:00, 2013-01-28 00:00:00, 2013-01-29 00:00:00, 2013-01-30 00:00:00, 2013-01-31 00:00:00, 2013-02-01 00:00:00, 2013-02-04 00:00:00, 2013-02-05 00:00:00, 2013-02-06 00:00:00, 2013-02-07 00:00:00, 2013-02-08 00:00:00, 2013-02-18 00:00:00, 2013-02-19 00:00:00, 2013-02-20 00:00:00, 2013-02-21 00:00:00, 2013-02-22 00:00:00, 2013-02-25 00:00:00, 2013-02-26 00:00:00, 2013-02-27 00:00:00, 2013-02-28 00:00:00, 2013-03-01 00:00:00, 2013-03-04 00:00:00, 2013-03-05 00:00:00, 2013-03-06 00:00:00, 2013-03-07 00:00:00, 2013-03-08 00:00:00, 2013-03-11 00:00:00, 2013-03-12 00:00:00, 2013-03-13 00:00:00, 2013-03-14 00:00:00, 2013-03-15 00:00:00, 2013-03-18 00:00:00, 2013-03-19 00:00:00, 2013-03-20 00:00:00, 2013-03-21 00:00:00, 2013-03-22 00:00:00, 2013-03-25 00:00:00, 2013-03-26 00:00:00, 2013-03-27 00:00:00, 2013-03-28 00:00:00, 2013-03-29 00:00:00, 2013-04-01 00:00:00, 2013-04-02 00:00:00, 2013-04-03 00:00:00, 2013-04-08 00:00:00, 2013-04-09 00:00:00, 2013-04-10 00:00:00, 2013-04-11 00:00:00, 2013-04-12 00:00:00, 2013-04-15 00:00:00, 2013-04-16 00:00:00, 2013-04-17 00:00:00, 2013-04-18 00:00:00, 2013-04-19 00:00:00, 2013-04-22 00:00:00, 2013-04-23 00:00:00, 2013-04-24 00:00:00, 2013-04-25 00:00:00, 2013-04-26 00:00:00, 2013-05-02 00:00:00, 2013-05-03 00:00:00, 2013-05-06 00:00:00, 2013-05-07 00:00:00, 2013-05-08 00:00:00, 2013-05-09 00:00:00, 2013-05-10 00:00:00, 2013-05-13 00:00:00, 2013-05-14 00:00:00, 2013-05-15 00:00:00, 2013-05-16 00:00:00, 2013-05-17 00:00:00, 2013-05-20 00:00:00, 2013-05-21 00:00:00, 2013-05-22 00:00:00, 2013-05-23 00:00:00, 2013-05-24 00:00:00, 2013-05-27 00:00:00, 2013-05-28 00:00:00, 2013-05-29 00:00:00, 2013-05-30 00:00:00, 2013-05-31 00:00:00, 2013-06-03 00:00:00, 2013-06-04 00:00:00, 2013-06-05 00:00:00, 2013-06-06 00:00:00, ...]\n", "Index: []"
"\n",
"[2298 rows x 0 columns]"
] ]
}, },
"execution_count": 57, "execution_count": 103,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"df = sess.run(\"\"\"\n", "df = sess.run(\"\"\"\n",
" select m_nDate from tbl where code='002182.SZ' group by m_nDate map;\n", " select m_nDate, count(*) from tbl where code='002458.SZ' group by m_nDate having count(*) > 5000 map;\n",
"\"\"\").set_index('m_nDate')\n", "\"\"\").set_index('m_nDate')\n",
"df" "df"
] ]

Loading…
Cancel
Save