You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

220 lines
7.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from pprint import pprint
from tqdm import tqdm
import functools
import dolphindb as ddb
from DDBLoader import DDBLoader
def load_ddb_table(hft_tbl_name):
"""
这是一个用来简化载入分区表过程的语法糖,但似乎需要预先调用这个函数的场景并不多,简化效果不是很明显。
"""
def decorator(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.ddb_sess.run("""
// 载入计算使用的原始数据:分钟线数据
tbl = loadTable("{hft_ddb_path}", "{hft_tbl_name}");
""".format(
hft_ddb_path = DDBLoader.ddb_path,
hft_tbl_name = hft_tbl_name,
))
print('Did load', hft_tbl_name)
return func(self)
return wrapper
return decorator
class DailyFactor(object):
#ddb_hft_path = "dfs://hft_stock_ts"
#ddb_hft_dbname = "db_hft_stock"
ddb_daily_path = "dfs://daily_stock_ts"
ddb_daily_dbname = "db_daily_stock"
ddb_config = {
'host' : '192.168.1.167',
'username' : 'admin',
'password' : '123456',
}
# 这里的partition数量未必需要和hft表的一致
# 当读取hft表的时候需要使用DDBLoader中的`num_code_partition`,而不是此字段
num_code_partition = 50
def __init__(self):
self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password'])
def create_ddb_database(self):
"""
因为日频数据量较小内部可以使用m_nDate作为sort_key所以分区仅需要对stock_id做[HASH, 50]即可因此不需要输入calendar数据
"""
self.ddb_sess.run("""
daily_stock_ts = database(
"{ddb_hft_path}",
HASH, [SYMBOL, {num_code_parition}],
engine = 'TSDB'
)
""".format(
ddb_hft_path = DDBLoader.ddb_path,
num_code_partition = self.num_code_partition
))
print('Did create database')
def load_ddb_database(self):
self.ddb_sess.run("""
{dbName} = database(
directory = '{dbPath}',
partitionType = HASH,
partitionScheme = [SYMBOL, {num_code_partition}],
engine = 'TSDB'
)
""".format(
dbName = self.ddb_daily_dbname,
dbPath = self.ddb_daily_path,
num_code_partition = self.num_code_partition
))
print('Did load database.')
def append_factor_columns(self, factor_name_list, memory_tbl_name, partition_tbl_name):
code = """
addColumn({partition_tbl_name}, {col_name_list}, {col_type_list});
""".format(
partition_tbl_name = partition_tbl_name,
col_name_list = '`' + '`'.join(factor_name_list),
col_type_list = '[' + ','.join(['DOUBLE']*len(factor_name_list)) + ']'
)
print('Will add columns via script:')
print(code)
self.ddb_sess.run(code)
code = """
{partition_tbl_name}.tableInsert({memory_tbl_name})
""".format(
partition_tbl_name = partition_tbl_name,
memory_tbl_name = memory_tbl_name
)
print('Will append date via script:')
print(code)
self.ddb_sess.run(code)
def append_to_partition_table(self, partition_tbl_name, memory_tbl_name):
self.ddb_sess.run("""
{partition_tbl_name}.tableInsert({memory_tbl_name})
""".format(
partition_tbl_name = partition_tbl_name,
memory_tbl_name = memory_tbl_name
))
def create_factor_partition_table(self, partition_tbl_name, memory_tbl_name):
"""
把数据从内存表(`memory_tbl_name`)添加到分区表(`partition_tbl_name`
"""
# createPartitionedTable(
# dbHandle, table, tableName,
# [partitionColumns], [compressMethods],
# [sortColumns], [keepDuplicates=ALL], [sortKeyMappingFunction])
code = """
// 保证创建新的分区表不会和已经存在表冲突
if (existsTable("{ddb_daily_path}", "{partition_tbl_name}")) {{
dropTable({ddb_daily_dbname}, "{partition_tbl_name}");
}}
{partition_tbl_name} = createPartitionedTable(
dbHandle = {ddb_daily_dbname},
table = {memory_tbl_name},
tableName = "{partition_tbl_name}",
partitionColumns = 'code',
compressMethods = {{'m_nDate' : 'delta'}},
sortColumns = `code`m_nDate
);
""".format(
ddb_daily_path = self.ddb_daily_path,
ddb_daily_dbname = self.ddb_daily_dbname,
partition_tbl_name = partition_tbl_name,
memory_tbl_name = memory_tbl_name,
)
print('Will create partitioned factor table via script:')
print(code)
self.ddb_sess.run(code)
@load_ddb_table("KLinePartitioned")
def make_kurto_memory_table(self):
memory_table_name = "kurto"
code_tpl = """
// 需要首先创建分钟线收益表
// 使用`context by`使得计算结果仍然为一个序列
// 使用`where partition()`来逐个加载分区
ret_sql = select
code, m_nDate, eachPre(\, m_nClose)-1.0 as ret
from tbl
where partition(code, {partition_id})
context by m_nDate;
// 计算kurto指标`ret`表中每日第一条记录为空,似乎并不造成影响
kurto_sql = select
code, m_nDate, sqrt(239) * sum(pow(ret, 3)) / pow(sum(pow(ret, 2)), 1.5) as kurto
from ret_sql
group by code, m_nDate;
"""
with tqdm(range(DDBLoader.num_code_partition)) as pbar:
#with tqdm(range(1)) as pbar:
for partition_id in pbar:
self.ddb_sess.run(code_tpl.format(
partition_id = partition_id,
))
# 因为原表有50个分区需要逐个计算因此先创建一个内存临时表
# 否则一旦第一个分区插入分区表后,就无法再插入后续只包含部分字段的数据了
if partition_id == 0:
self.ddb_sess.run("""
{memory_table_name} = table(kurto_sql)
""".format(
memory_table_name = memory_table_name
))
# 上面`table`语句仅仅是创建表结构,
# 然后使用`tableInsert`把真实数据插入进去
self.ddb_sess.run("""
{memory_table_name}.tableInsert(kurto_sql)
""".format(
memory_table_name = memory_table_name
))
print('Did finish all parititons for kurto.')
pprint(self.ddb_sess.run(f"{memory_table_name}"))
return memory_table_name
def main():
factor = DailyFactor()
factor.load_ddb_database()
memory_table_name = factor.make_kurto_memory_table()
factor.create_factor_partition_table(
'hft_daily_factor',
memory_table_name
)
factor.append_to_partition_table(
'hft_daily_factor',
memory_table_name
)
if __name__ == '__main__':
main()