1. Add `DDBDailyFactor.py`, which creates daily factors from HFT data.

2. Correct some typo in `DDBLoader.py`, but shouldn't affect its normal behaviour.
main
Guofu Li 2 years ago
parent 65d667ecab
commit 621b38dce3

@ -0,0 +1,216 @@
from pprint import pprint
from tqdm import tqdm
import functools
import dolphindb as ddb
from DDBLoader import DDBLoader
def load_ddb_table(hft_tbl_name):
def decorator(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.ddb_sess.run("""
// 载入计算使用的原始数据分钟线数据
tbl = loadTable("{hft_ddb_path}", "{hft_tbl_name}");
""".format(
hft_ddb_path = DDBLoader.ddb_path,
hft_tbl_name = hft_tbl_name,
))
print('Did load', hft_tbl_name)
return func(self)
return wrapper
return decorator
class DailyFactor(object):
#ddb_hft_path = "dfs://hft_stock_ts"
#ddb_hft_dbname = "db_hft_stock"
ddb_daily_path = "dfs://daily_stock_ts"
ddb_daily_dbname = "db_daily_stock"
ddb_config = {
'host' : '192.168.1.167',
'username' : 'admin',
'password' : '123456',
}
# 这里的partition数量未必需要和hft表的一致
# 当读取hft表的时候需要使用DDBLoader中的`num_code_partition`,而不是此字段
num_code_partition = 50
def __init__(self):
self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password'])
def create_ddb_database(self):
"""
因为日频数据量较小内部可以使用m_nDate作为sort_key所以分区仅需要对stock_id做[HASH, 50]即可因此不需要输入calendar数据
"""
self.ddb_sess.run("""
daily_stock_ts = database(
"{ddb_hft_path}",
HASH, [SYMBOL, {num_code_parition}],
engine = 'TSDB'
)
""".format(
ddb_hft_path = DDBLoader.ddb_path,
num_code_partition = self.num_code_partition
))
print('Did create database')
def load_ddb_database(self):
self.ddb_sess.run("""
{dbName} = database(
directory = '{dbPath}',
partitionType = HASH,
partitionScheme = [SYMBOL, {num_code_partition}],
engine = 'TSDB'
)
""".format(
dbName = self.ddb_daily_dbname,
dbPath = self.ddb_daily_path,
num_code_partition = self.num_code_partition
))
print('Did load database.')
def append_factor_columns(self, factor_name_list, memory_tbl_name, partition_tbl_name):
code = """
addColumn({partition_tbl_name}, {col_name_list}, {col_type_list});
""".format(
partition_tbl_name = partition_tbl_name,
col_name_list = '`' + '`'.join(factor_name_list),
col_type_list = '[' + ','.join(['DOUBLE']*len(factor_name_list)) + ']'
)
print('Will add columns via script:')
print(code)
self.ddb_sess.run(code)
code = """
{partition_tbl_name}.tableInsert({memory_tbl_name})
""".format(
partition_tbl_name = partition_tbl_name,
memory_tbl_name = memory_tbl_name
)
print('Will append date via script:')
print(code)
self.ddb_sess.run(code)
def append_to_partition_table(self, partition_tbl_name, memory_tbl_name):
self.ddb_sess.run("""
{partition_tbl_name}.tableInsert({memory_tbl_name})
""".format(
partition_tbl_name = partition_tbl_name,
memory_tbl_name = memory_tbl_name
))
def create_factor_partition_table(self, partition_tbl_name, memory_tbl_name):
"""
把数据从内存表`memory_tbl_name`添加到分区表`partition_tbl_name`
"""
# createPartitionedTable(
# dbHandle, table, tableName,
# [partitionColumns], [compressMethods],
# [sortColumns], [keepDuplicates=ALL], [sortKeyMappingFunction])
code = """
// 保证创建新的分区表不会和已经存在表冲突
if (existsTable("{ddb_daily_path}", "{partition_tbl_name}")) {{
dropTable({ddb_daily_dbname}, "{partition_tbl_name}");
}}
{partition_tbl_name} = createPartitionedTable(
dbHandle = {ddb_daily_dbname},
table = {memory_tbl_name},
tableName = "{partition_tbl_name}",
partitionColumns = 'code',
compressMethods = {{'m_nDate' : 'delta'}},
sortColumns = `code`m_nDate
);
""".format(
ddb_daily_path = self.ddb_daily_path,
ddb_daily_dbname = self.ddb_daily_dbname,
partition_tbl_name = partition_tbl_name,
memory_tbl_name = memory_tbl_name,
)
print('Will create partitioned factor table via script:')
print(code)
self.ddb_sess.run(code)
@load_ddb_table("KLinePartitioned")
def make_kurto_memory_table(self):
memory_table_name = "kurto"
code_tpl = """
// 需要首先创建分钟线收益表
// 使用`context by`使得计算结果仍然为一个序列
// 使用`where partition()`来逐个加载分区
ret_sql = select
code, m_nDate, eachPre(\, m_nClose)-1.0 as ret
from tbl
where partition(code, {partition_id})
context by m_nDate;
// 计算kurto指标`ret`表中每日第一条记录为空似乎并不造成影响
kurto_sql = select
code, m_nDate, sqrt(239) * sum(pow(ret, 3)) / pow(sum(pow(ret, 2)), 1.5) as kurto
from ret_sql
group by code, m_nDate;
"""
with tqdm(range(DDBLoader.num_code_partition)) as pbar:
#with tqdm(range(1)) as pbar:
for partition_id in pbar:
self.ddb_sess.run(code_tpl.format(
partition_id = partition_id,
))
# 因为原表有50个分区需要逐个计算因此先创建一个内存临时表
# 否则一旦第一个分区插入分区表后,就无法再插入后续只包含部分字段的数据了
if partition_id == 0:
self.ddb_sess.run("""
{memory_table_name} = table(kurto_sql)
""".format(
memory_table_name = memory_table_name
))
# 上面`table`语句仅仅是创建表结构,
# 然后使用`tableInsert`把真实数据插入进去
self.ddb_sess.run("""
{memory_table_name}.tableInsert(kurto_sql)
""".format(
memory_table_name = memory_table_name
))
print('Did finish all parititons for kurto.')
pprint(self.ddb_sess.run(f"{memory_table_name}"))
return memory_table_name
def main():
factor = DailyFactor()
factor.load_ddb_database()
memory_table_name = factor.make_kurto_memory_table()
factor.create_factor_partition_table(
'hft_daily_factor',
memory_table_name
)
factor.append_to_partition_table(
'hft_daily_factor',
memory_table_name
)
if __name__ == '__main__':
main()

@ -21,6 +21,9 @@ import ProtoBuffEntitys
def make_stock_daily_df(blob, type_name, stock_id):
"""
用于做多进程录入ddb的函数
"""
blob = gzip.decompress(blob)
dataArray = eval(f"ProtoBuffEntitys.{type_name}Message_pb2.{type_name}Array()")
dataArray.ParseFromString(blob)
@ -51,6 +54,9 @@ def make_stock_daily_df(blob, type_name, stock_id):
def dump_stock_daily_to_ddb(row, type_name, stock_id):
"""
用于做多进程录入ddb的函数
"""
df_table_name = type_name
df = make_stock_daily_df(row[2], type_name, stock_id)
@ -168,6 +174,9 @@ class DDBLoader(object):
'password' : '123456'
}
# this value may be used by factor makers, which may loop through code partitions
num_code_partition = 50
num_workers = 8
default_table_capacity = 10000
ddb_dump_journal_fname = 'ddb_dump_journal.csv'
@ -258,8 +267,10 @@ class DDBLoader(object):
# 这里看起来直接使用dolphindb的脚本语句更方便一些
self.ddb_sess.run("""
db_stock = database("", 5, [SYMBOL, 50])
""")
db_stock = database("", 5, [SYMBOL, {num_code_partition}])
""".format(
num_code_partition = self.num_code_parition
))
#self.ddb_sess.run("""
# db_stock = database("", 1, symbol({partitions}))
#""".format(
@ -346,7 +357,7 @@ class DDBLoader(object):
dbHandle={ddb_dbname},
table={mem_table},
tableName=`{per_table},
sortCOlumns=`code`m_nDate,
sortColumns=`code`m_nDate,
compressMethods={{"m_nDate":"delta"}}
), {mem_table})
""".format(

@ -0,0 +1,857 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "7246e0c8-61cd-4cbf-a978-aa0dc0172d6d",
"metadata": {},
"outputs": [],
"source": [
"import dolphindb as ddb"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "5d0f471e-682e-43cc-abdb-7e52f3bbd707",
"metadata": {},
"outputs": [],
"source": [
"sess = ddb.session('192.168.1.7', 8848)\n",
"sess.login('admin', '123456')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ed17fd0b-9b36-47e4-9ab6-11459a3621fb",
"metadata": {},
"outputs": [],
"source": [
"# backup(backup_path, sql_obj, force, parallel)\n",
"code = \"\"\"\n",
" backup('/data/dolphindb/backup/', <select * from loadTable(\"dfs://hft_stock_ts\", \"KLinePartitioned\")>, false, true)\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8b7dae3d-aef1-4c50-92b2-460d4fea0a96",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(code)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "11275975-af43-4123-973a-f75096eb4e43",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2311.0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"115550 / 50"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "155735c7-bdad-4c64-a2a1-514fe54e088c",
"metadata": {},
"outputs": [],
"source": [
"import dolphindb as ddb\n",
"\n",
"sess = ddb.session('localhost', 8848)\n",
"sess.login('admin', '123456')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a2e95bc5-c1d5-47f5-9488-9b8702947afc",
"metadata": {},
"outputs": [],
"source": [
"# migrate(backupDir, [backupDBPath], [backupTableName], [newDBPath=backupDBPath], [newTableName=backupTableName])\n",
"code = \"\"\"\n",
" migrate('/data/dolphindb/backup/',\"dfs://hft_stock_ts\", \"KLinePartitioned\")\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a6198ef-e7f1-4f58-ba24-32dc429c9803",
"metadata": {},
"outputs": [],
"source": [
"sess.run(code)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "60f71516-33fc-4147-9740-c493ccb6b6a8",
"metadata": {},
"outputs": [],
"source": [
"def make_kurto_code_partition(partition_num):\n",
" code = \"\"\"\n",
" tbl = loadTable(\"dfs://hft_stock_ts\", \"KLinePartitioned\");\n",
" ret = select \n",
" code, m_nDate, eachPre(\\, m_nClose)-1.0 as ret \n",
" from tbl \n",
" where partition(code, {partition_num}) \n",
" context by m_nDate;\n",
" kurto = select \n",
" code, m_nDate, sqrt(239) * sum(pow(ret, 3)) / pow(sum(pow(ret, 2)), 1.5) as kurto \n",
" from ret \n",
" group by code, m_nDate;\n",
" \"\"\".format(\n",
" partition_num = partition_num\n",
" )\n",
" sess.run(code)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "1397c1b7-f81a-465e-868a-894b3ef818fc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>code</th>\n",
" <th>count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>000400.SZ</td>\n",
" <td>2305</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>000558.SZ</td>\n",
" <td>2063</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>000602.SZ</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>000677.SZ</td>\n",
" <td>2009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>000731.SZ</td>\n",
" <td>2311</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94</th>\n",
" <td>688039.SH</td>\n",
" <td>624</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>688052.SH</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>688091.SH</td>\n",
" <td>198</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>688368.SH</td>\n",
" <td>656</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>688396.SH</td>\n",
" <td>575</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>99 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" code count\n",
"0 000400.SZ 2305\n",
"1 000558.SZ 2063\n",
"2 000602.SZ 47\n",
"3 000677.SZ 2009\n",
"4 000731.SZ 2311\n",
".. ... ...\n",
"94 688039.SH 624\n",
"95 688052.SH 52\n",
"96 688091.SH 198\n",
"97 688368.SH 656\n",
"98 688396.SH 575\n",
"\n",
"[99 rows x 2 columns]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"\"\"\n",
" select code, count(*) from kurto group by code\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "59758901-3728-4355-bbb8-15b3eb537cdf",
"metadata": {},
"outputs": [],
"source": [
"def create_daily_ddb_database():\n",
" code = \"\"\"\n",
" daily_stock_ts = database(\n",
" \"dfs://daily_stock_ts\", \n",
" HASH, [SYMBOL, 50], \n",
" engine = 'TSDB'\n",
" )\n",
" \"\"\"\n",
" sess.run(code)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "d8fa94ae-6ca4-41f3-8b13-47ab8876ba95",
"metadata": {},
"outputs": [],
"source": [
"def load_daily_ddb_database():\n",
" code = \"\"\"\n",
" daily_stock_ts = database(\"dfs://daily_stock_ts\")\n",
" \"\"\"\n",
" sess.run(code)\n",
"\n",
"load_daily_ddb_database()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "84eb237e-959c-419f-bce1-6779acda86d4",
"metadata": {},
"outputs": [],
"source": [
"def create_kurto_partition_table():\n",
" # createPartitionedTable(dbHandle, table, tableName, [partitionColumns], [compressMethods], [sortColumns], [keepDuplicates=ALL], [sortKeyMappingFunction])\n",
" code = \"\"\"\n",
" dropTable(daily_stock_ts, \"kurto_partitioned\");\n",
" kurto_partitioned = createPartitionedTable(\n",
" daily_stock_ts,\n",
" kurto, \n",
" \"kurto_partitioned\",\n",
" partitionColumns = 'code',\n",
" compressMethods = {'m_nDate' : 'delta'},\n",
" sortColumns = `code`m_nDate\n",
" );\n",
" \"\"\"\n",
" sess.run(code)\n",
" \n",
"create_kurto_partition_table()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "0d93b3d1-39b7-4e23-8abf-eda86dcd2a23",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>code</th>\n",
" <th>m_nDate</th>\n",
" <th>kurto</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>000400.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>-0.317991</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>000558.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>-15.387820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>000677.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>-15.459625</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>000731.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>15.443339</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>000752.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>15.396262</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>000789.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>-14.801720</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>000903.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>-15.405085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>000912.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>14.755241</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>000935.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>15.413214</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>000970.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>15.458413</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" code m_nDate kurto\n",
"0 000400.SZ 2013-01-04 -0.317991\n",
"1 000558.SZ 2013-01-04 -15.387820\n",
"2 000677.SZ 2013-01-04 -15.459625\n",
"3 000731.SZ 2013-01-04 15.443339\n",
"4 000752.SZ 2013-01-04 15.396262\n",
"5 000789.SZ 2013-01-04 -14.801720\n",
"6 000903.SZ 2013-01-04 -15.405085\n",
"7 000912.SZ 2013-01-04 14.755241\n",
"8 000935.SZ 2013-01-04 15.413214\n",
"9 000970.SZ 2013-01-04 15.458413"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"\"\"\n",
" kurto_partitioned.tableInsert(kurto);\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "eea5cb0b-8452-41a9-a0ad-7cf52c856c25",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>154953</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count\n",
"0 154953"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"\"\"\n",
" select count(*) from kurto_partitioned;\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "9e7bc856-517b-4bf9-866d-4fe85b9705a3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'partitionType': 5,\n",
" 'partitionColumnType': 17,\n",
" 'partitionColumnIndex': 0,\n",
" 'chunkPath': None,\n",
" 'colDefs': name typeString typeInt comment\n",
" 0 code SYMBOL 17 \n",
" 1 m_nDate DATE 6 \n",
" 2 kurto DOUBLE 16 ,\n",
" 'chunkGranularity': 'TABLE',\n",
" 'partitionTypeName': 'HASH',\n",
" 'keepDuplicates': 'ALL',\n",
" 'engineType': 'TSDB',\n",
" 'partitionColumnName': 'code',\n",
" 'partitionSchema': 50,\n",
" 'sortColumns': array(['code', 'm_nDate'], dtype=object),\n",
" 'partitionSites': None}"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"\"\"\n",
" schema(loadTable(\"dfs://daily_stock_ts\", \"kurto_partitioned\"));\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "94ae00b5-ba42-48b9-b114-012cf306d871",
"metadata": {},
"outputs": [],
"source": [
"sess.run(\"m_table = table(kurto)\")"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "e30134f9-5110-421d-8f71-1bef25e7ef17",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>code</th>\n",
" <th>m_nDate</th>\n",
" <th>kurto</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>000400.SZ</td>\n",
" <td>2013-01-04</td>\n",
" <td>-0.317991</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>000400.SZ</td>\n",
" <td>2013-01-07</td>\n",
" <td>0.790872</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>000400.SZ</td>\n",
" <td>2013-01-08</td>\n",
" <td>0.655005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>000400.SZ</td>\n",
" <td>2013-01-09</td>\n",
" <td>0.965054</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>000400.SZ</td>\n",
" <td>2013-01-10</td>\n",
" <td>-0.116488</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>154948</th>\n",
" <td>688396.SH</td>\n",
" <td>2022-07-04</td>\n",
" <td>-15.420192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>154949</th>\n",
" <td>688396.SH</td>\n",
" <td>2022-07-05</td>\n",
" <td>-15.426922</td>\n",
" </tr>\n",
" <tr>\n",
" <th>154950</th>\n",
" <td>688396.SH</td>\n",
" <td>2022-07-06</td>\n",
" <td>-15.389434</td>\n",
" </tr>\n",
" <tr>\n",
" <th>154951</th>\n",
" <td>688396.SH</td>\n",
" <td>2022-07-07</td>\n",
" <td>-15.411602</td>\n",
" </tr>\n",
" <tr>\n",
" <th>154952</th>\n",
" <td>688396.SH</td>\n",
" <td>2022-07-08</td>\n",
" <td>-15.401453</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>154953 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" code m_nDate kurto\n",
"0 000400.SZ 2013-01-04 -0.317991\n",
"1 000400.SZ 2013-01-07 0.790872\n",
"2 000400.SZ 2013-01-08 0.655005\n",
"3 000400.SZ 2013-01-09 0.965054\n",
"4 000400.SZ 2013-01-10 -0.116488\n",
"... ... ... ...\n",
"154948 688396.SH 2022-07-04 -15.420192\n",
"154949 688396.SH 2022-07-05 -15.426922\n",
"154950 688396.SH 2022-07-06 -15.389434\n",
"154951 688396.SH 2022-07-07 -15.411602\n",
"154952 688396.SH 2022-07-08 -15.401453\n",
"\n",
"[154953 rows x 3 columns]"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"m_table\")"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "947e3346-15f6-4fb5-b2d6-b97b013a3b36",
"metadata": {},
"outputs": [],
"source": [
"code = \"\"\"\n",
" // 保证创建新的分区表不会和已经存在表冲突\n",
" if (existsTable(\"{ddb_daily_path}\", \"{partition_tbl_name}\")) {{\n",
" dropTable({ddb_daily_dbname}, \"{partition_tbl_name}\");\n",
" }}\n",
"\n",
" {partition_tbl_name} = createPartitionedTable(\n",
" dbHandle = {ddb_daily_dbname},\n",
" table = {memory_tbl_name}, \n",
" \"{partition_tbl_name}\",\n",
" partitionColumns = 'code',\n",
" compressMethods = {{'m_nDate' : 'delta'}},\n",
" sortColumns = `code`m_nDate\n",
" );\n",
"\"\"\".format(\n",
" ddb_daily_path = \"A\",\n",
" ddb_daily_dbname = \"B\",\n",
" partition_tbl_name = \"C\",\n",
" memory_tbl_name = \"D\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "5d758c68-301a-47a7-9cd2-b7dd7edf1a55",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('\\n'\n",
" ' // 保证创建新的分区表不会和已经存在表冲突\\n'\n",
" ' if (existsTable(\"A\", \"C\")) {\\n'\n",
" ' dropTable(B, \"C\");\\n'\n",
" ' }\\n'\n",
" '\\n'\n",
" ' C = createPartitionedTable(\\n'\n",
" ' dbHandle = B,\\n'\n",
" ' table = D, \\n'\n",
" ' \"C\",\\n'\n",
" \" partitionColumns = 'code',\\n\"\n",
" \" compressMethods = {'m_nDate' : 'delta'},\\n\"\n",
" ' sortColumns = `code`m_nDate\\n'\n",
" ' );\\n')\n"
]
}
],
"source": [
"from pprint import pprint\n",
"pprint(code)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "400424d9-9c58-4417-94e4-fe1569522b03",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'chunkGranularity': 'TABLE',\n",
" 'chunkPath': None,\n",
" 'colDefs': name typeString typeInt comment\n",
"0 code SYMBOL 17 \n",
"1 m_nDate DATE 6 \n",
"2 kurto DOUBLE 16 ,\n",
" 'engineType': 'TSDB',\n",
" 'keepDuplicates': 'ALL',\n",
" 'partitionColumnIndex': 0,\n",
" 'partitionColumnName': 'code',\n",
" 'partitionColumnType': 17,\n",
" 'partitionSchema': 50,\n",
" 'partitionSites': None,\n",
" 'partitionType': 5,\n",
" 'partitionTypeName': 'HASH',\n",
" 'sortColumns': array(['code', 'm_nDate'], dtype=object)}\n"
]
}
],
"source": [
"pprint(\n",
" sess.run(\"\"\"\n",
" schema(loadTable(\"dfs://daily_stock_ts\", \"hft_daily_factor\"));\n",
" \"\"\")\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "68e74de8-359c-4c47-a361-2c006896211c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>309906</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count\n",
"0 309906"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(\"\"\"\n",
" select count(*) from loadTable(\"dfs://daily_stock_ts\", \"hft_daily_factor\");\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1bd7605-c347-44f2-a90f-bc949cde0ef0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading…
Cancel
Save