diff --git a/README.md b/README.md index 6c15ca7..eb6f162 100644 --- a/README.md +++ b/README.md @@ -12,28 +12,28 @@ │   └── scripts ├── ipynb │   ├── ddb.ipynb +│   ├── ddb_pe.ipynb │   ├── ddb_pit.ipynb │   ├── dos.ipynb │   └── mssql.ipynb -├── Readme.txt +├── README.md └── src ├── DDBBase.py - ├── DDBExpression.py - ├── DDBFactor.py - ├── DDBLoader.py - ├── make_hft.py - ├── ProtoBuffEntitys - │   ├── HFDataTableMessage_pb2.py - │   ├── IndexFutureKLineMessage_pb2.py - │   ├── IndexFutureL1TickMessage_pb2.py - │   ├── IndexKLineMessage_pb2.py - │   ├── IndexTickMessage_pb2.py - │   ├── KLineMessage_pb2.py - │   ├── OrderMessage_pb2.py - │   ├── TickMessage_pb2.py - │   ├── TickQueueMessage_pb2.py - │   └── TranseMessage_pb2.py + ├── expr + │   └── DDBExpression.py + ├── factor + │   └── DDBFactor.py + ├── __init__.py + ├── loader + │   ├── DDBBasicInfoLoader.py + │   ├── DDBDailyLoader.py + │   ├── DDBHFTLoader.py + │   ├── DDBLoader.py + │   ├── DDBPITLoader.py + │   ├── make_hft.py + │   └── ProtoBuffEntitys ├── ReadTickFromDB.py + ├── run.py └── script.py ``` @@ -51,21 +51,28 @@ - `src` - Python源代码目录,是工作的主要目录 - 大部分源码文件以`DDB`开头,表示与DolphinDB相关,继承和代码复用关系可以进一步优化 - - `DDBBase`是`DDB*`的基类,包含了一些常用常量 - - `DDBLoader`专门用于向DolphinDB写入数据(似乎称为Dumper更合适),目前包括: - - `DDBHFTLoader`,用以导入高频数据(海通高频数据) - - `DDBDailyLoader`,用以导入日频数据(市场日K线数据) - - `DDBPITLoader`,用以导入低频PIT数据,主要是财报数据 - - `DDBFactor`用于生成因子,目前只有生成日频因子 - - `DDBExpression`提供了基本的操作表达式,目前包括: + - `DDBBase.py`: + - `DDB*`的基类,包含了一些常用常量 + - `run.py`: + - 用于执行具体操作任务的入口脚本 + - TODO:需要支持命令行参数来选择执行功能和配套参数 + - `loader`目录:代码专门用于向DolphinDB写入数据(似乎称为Dumper更合适),目前包括: + - `DDBLoader.py`:其他Loader类的基础类 + - `DDBHFTLoader.py`:用以导入高频数据(海通高频数据) + - `DDBDailyLoader.py`:用以导入日频数据(市场日K线数据) + - `DDBPITLoader.py`:用以导入低频PIT数据,主要是财报数据 + - `loader/ProtoBuffEntitys`目录: + - 用以解析海通高频数据中,protobuf字节流的Python辅助代码 + - `factor`目录:用于生成因子 + - `DDBFactor.py`:目前只有生成日频因子 + - `expr`目录:用于支持基于DDB的各种表达式操作 + - `DDBExpression.py`:提供了基本的操作表达式,目前包括: - 通过Function View,读取DolphinDB的分布式存储表: - 在任意时间点上,获取所有公司某张财报、指定财报期的最新可见快照 - 在任意时间点上,获取所有公司某张财报中某科目、指定财报期的最新可见快照 - 通过一般Function,对内存表进行操作: - 流量表累计数值转单季值 - 跨年同比指标计算(分母为去年基数取绝对值,函数会同时返回当年值和去年基数,以便进一步调整) -- `src/ProtoBuffEntitys` - - 用以解析海通高频数据中,protobuf字节流的Python辅助代码 ## 海通高频数据录入说明 diff --git a/ipynb/ddb.ipynb b/ipynb/ddb.ipynb index e7669e7..4c0de16 100644 --- a/ipynb/ddb.ipynb +++ b/ipynb/ddb.ipynb @@ -35,15 +35,31 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 60, "id": "ed17fd0b-9b36-47e4-9ab6-11459a3621fb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "115548" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "sess = ddb.session('192.168.1.7', 8848)\n", + "sess.login('admin', '123456')\n", + "\n", "# backup(backup_path, sql_obj, force, parallel)\n", "code = \"\"\"\n", - " backup('/data/dolphindb/backup/', , true, true)\n", + "\"\"\"\n", + "\n", + "sess.run(code)" ] }, { @@ -1389,7 +1405,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 100, "id": "4ba45027-bbb5-4b27-99da-3452cc8d2f1c", "metadata": {}, "outputs": [ @@ -1420,7 +1436,7 @@ " \n", " \n", " 0\n", - " 2298\n", + " 2227\n", " \n", " \n", "\n", @@ -1428,10 +1444,10 @@ ], "text/plain": [ " count\n", - "0 2298" + "0 2227" ] }, - "execution_count": 43, + "execution_count": 100, "metadata": {}, "output_type": "execute_result" } @@ -1439,14 +1455,14 @@ "source": [ "sess.run(\"\"\"\n", " select count(*) from (\n", - " select code, m_nDate, count(*) as cnt from tbl where code='002182.SZ' group by code, m_nDate map\n", + " select code, m_nDate, count(*) as cnt from tbl where code='002459.SZ' group by code, m_nDate map\n", " );\n", "\"\"\")" ] }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 103, "id": "29ab8af5-e571-4064-b691-a186d9fb4d08", "metadata": {}, "outputs": [ @@ -1471,66 +1487,32 @@ " \n", " \n", " \n", + " count\n", " \n", " \n", " m_nDate\n", + " \n", " \n", " \n", " \n", - " \n", - " 2013-01-04\n", - " \n", - " \n", - " 2013-01-07\n", - " \n", - " \n", - " 2013-01-08\n", - " \n", - " \n", - " 2013-01-09\n", - " \n", - " \n", - " 2013-01-10\n", - " \n", - " \n", - " ...\n", - " \n", - " \n", - " 2022-07-04\n", - " \n", - " \n", - " 2022-07-05\n", - " \n", - " \n", - " 2022-07-06\n", - " \n", - " \n", - " 2022-07-07\n", - " \n", - " \n", - " 2022-07-08\n", - " \n", " \n", "\n", - "

2298 rows × 0 columns

\n", "" ], "text/plain": [ "Empty DataFrame\n", - "Columns: []\n", - "Index: [2013-01-04 00:00:00, 2013-01-07 00:00:00, 2013-01-08 00:00:00, 2013-01-09 00:00:00, 2013-01-10 00:00:00, 2013-01-11 00:00:00, 2013-01-14 00:00:00, 2013-01-15 00:00:00, 2013-01-16 00:00:00, 2013-01-17 00:00:00, 2013-01-18 00:00:00, 2013-01-21 00:00:00, 2013-01-22 00:00:00, 2013-01-23 00:00:00, 2013-01-24 00:00:00, 2013-01-25 00:00:00, 2013-01-28 00:00:00, 2013-01-29 00:00:00, 2013-01-30 00:00:00, 2013-01-31 00:00:00, 2013-02-01 00:00:00, 2013-02-04 00:00:00, 2013-02-05 00:00:00, 2013-02-06 00:00:00, 2013-02-07 00:00:00, 2013-02-08 00:00:00, 2013-02-18 00:00:00, 2013-02-19 00:00:00, 2013-02-20 00:00:00, 2013-02-21 00:00:00, 2013-02-22 00:00:00, 2013-02-25 00:00:00, 2013-02-26 00:00:00, 2013-02-27 00:00:00, 2013-02-28 00:00:00, 2013-03-01 00:00:00, 2013-03-04 00:00:00, 2013-03-05 00:00:00, 2013-03-06 00:00:00, 2013-03-07 00:00:00, 2013-03-08 00:00:00, 2013-03-11 00:00:00, 2013-03-12 00:00:00, 2013-03-13 00:00:00, 2013-03-14 00:00:00, 2013-03-15 00:00:00, 2013-03-18 00:00:00, 2013-03-19 00:00:00, 2013-03-20 00:00:00, 2013-03-21 00:00:00, 2013-03-22 00:00:00, 2013-03-25 00:00:00, 2013-03-26 00:00:00, 2013-03-27 00:00:00, 2013-03-28 00:00:00, 2013-03-29 00:00:00, 2013-04-01 00:00:00, 2013-04-02 00:00:00, 2013-04-03 00:00:00, 2013-04-08 00:00:00, 2013-04-09 00:00:00, 2013-04-10 00:00:00, 2013-04-11 00:00:00, 2013-04-12 00:00:00, 2013-04-15 00:00:00, 2013-04-16 00:00:00, 2013-04-17 00:00:00, 2013-04-18 00:00:00, 2013-04-19 00:00:00, 2013-04-22 00:00:00, 2013-04-23 00:00:00, 2013-04-24 00:00:00, 2013-04-25 00:00:00, 2013-04-26 00:00:00, 2013-05-02 00:00:00, 2013-05-03 00:00:00, 2013-05-06 00:00:00, 2013-05-07 00:00:00, 2013-05-08 00:00:00, 2013-05-09 00:00:00, 2013-05-10 00:00:00, 2013-05-13 00:00:00, 2013-05-14 00:00:00, 2013-05-15 00:00:00, 2013-05-16 00:00:00, 2013-05-17 00:00:00, 2013-05-20 00:00:00, 2013-05-21 00:00:00, 2013-05-22 00:00:00, 2013-05-23 00:00:00, 2013-05-24 00:00:00, 2013-05-27 00:00:00, 2013-05-28 00:00:00, 2013-05-29 00:00:00, 2013-05-30 00:00:00, 2013-05-31 00:00:00, 2013-06-03 00:00:00, 2013-06-04 00:00:00, 2013-06-05 00:00:00, 2013-06-06 00:00:00, ...]\n", - "\n", - "[2298 rows x 0 columns]" + "Columns: [count]\n", + "Index: []" ] }, - "execution_count": 57, + "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = sess.run(\"\"\"\n", - " select m_nDate from tbl where code='002182.SZ' group by m_nDate map;\n", + " select m_nDate, count(*) from tbl where code='002458.SZ' group by m_nDate having count(*) > 5000 map;\n", "\"\"\").set_index('m_nDate')\n", "df" ]