You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
dolphin-dev/DDBDailyFactor.py

217 lines
7.3 KiB

from pprint import pprint
from tqdm import tqdm
import functools
import dolphindb as ddb
from DDBLoader import DDBLoader
def load_ddb_table(hft_tbl_name):
def decorator(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.ddb_sess.run("""
// 载入计算使用的原始数据分钟线数据
tbl = loadTable("{hft_ddb_path}", "{hft_tbl_name}");
""".format(
hft_ddb_path = DDBLoader.ddb_path,
hft_tbl_name = hft_tbl_name,
))
print('Did load', hft_tbl_name)
return func(self)
return wrapper
return decorator
class DailyFactor(object):
#ddb_hft_path = "dfs://hft_stock_ts"
#ddb_hft_dbname = "db_hft_stock"
ddb_daily_path = "dfs://daily_stock_ts"
ddb_daily_dbname = "db_daily_stock"
ddb_config = {
'host' : '192.168.1.167',
'username' : 'admin',
'password' : '123456',
}
# 这里的partition数量未必需要和hft表的一致
# 当读取hft表的时候需要使用DDBLoader中的`num_code_partition`,而不是此字段
num_code_partition = 50
def __init__(self):
self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password'])
def create_ddb_database(self):
"""
因为日频数据量较小内部可以使用m_nDate作为sort_key所以分区仅需要对stock_id做[HASH, 50]即可因此不需要输入calendar数据
"""
self.ddb_sess.run("""
daily_stock_ts = database(
"{ddb_hft_path}",
HASH, [SYMBOL, {num_code_parition}],
engine = 'TSDB'
)
""".format(
ddb_hft_path = DDBLoader.ddb_path,
num_code_partition = self.num_code_partition
))
print('Did create database')
def load_ddb_database(self):
self.ddb_sess.run("""
{dbName} = database(
directory = '{dbPath}',
partitionType = HASH,
partitionScheme = [SYMBOL, {num_code_partition}],
engine = 'TSDB'
)
""".format(
dbName = self.ddb_daily_dbname,
dbPath = self.ddb_daily_path,
num_code_partition = self.num_code_partition
))
print('Did load database.')
def append_factor_columns(self, factor_name_list, memory_tbl_name, partition_tbl_name):
code = """
addColumn({partition_tbl_name}, {col_name_list}, {col_type_list});
""".format(
partition_tbl_name = partition_tbl_name,
col_name_list = '`' + '`'.join(factor_name_list),
col_type_list = '[' + ','.join(['DOUBLE']*len(factor_name_list)) + ']'
)
print('Will add columns via script:')
print(code)
self.ddb_sess.run(code)
code = """
{partition_tbl_name}.tableInsert({memory_tbl_name})
""".format(
partition_tbl_name = partition_tbl_name,
memory_tbl_name = memory_tbl_name
)
print('Will append date via script:')
print(code)
self.ddb_sess.run(code)
def append_to_partition_table(self, partition_tbl_name, memory_tbl_name):
self.ddb_sess.run("""
{partition_tbl_name}.tableInsert({memory_tbl_name})
""".format(
partition_tbl_name = partition_tbl_name,
memory_tbl_name = memory_tbl_name
))
def create_factor_partition_table(self, partition_tbl_name, memory_tbl_name):
"""
把数据从内存表`memory_tbl_name`添加到分区表`partition_tbl_name`
"""
# createPartitionedTable(
# dbHandle, table, tableName,
# [partitionColumns], [compressMethods],
# [sortColumns], [keepDuplicates=ALL], [sortKeyMappingFunction])
code = """
// 保证创建新的分区表不会和已经存在表冲突
if (existsTable("{ddb_daily_path}", "{partition_tbl_name}")) {{
dropTable({ddb_daily_dbname}, "{partition_tbl_name}");
}}
{partition_tbl_name} = createPartitionedTable(
dbHandle = {ddb_daily_dbname},
table = {memory_tbl_name},
tableName = "{partition_tbl_name}",
partitionColumns = 'code',
compressMethods = {{'m_nDate' : 'delta'}},
sortColumns = `code`m_nDate
);
""".format(
ddb_daily_path = self.ddb_daily_path,
ddb_daily_dbname = self.ddb_daily_dbname,
partition_tbl_name = partition_tbl_name,
memory_tbl_name = memory_tbl_name,
)
print('Will create partitioned factor table via script:')
print(code)
self.ddb_sess.run(code)
@load_ddb_table("KLinePartitioned")
def make_kurto_memory_table(self):
memory_table_name = "kurto"
code_tpl = """
// 需要首先创建分钟线收益表
// 使用`context by`使得计算结果仍然为一个序列
// 使用`where partition()`来逐个加载分区
ret_sql = select
code, m_nDate, eachPre(\, m_nClose)-1.0 as ret
from tbl
where partition(code, {partition_id})
context by m_nDate;
// 计算kurto指标`ret`表中每日第一条记录为空似乎并不造成影响
kurto_sql = select
code, m_nDate, sqrt(239) * sum(pow(ret, 3)) / pow(sum(pow(ret, 2)), 1.5) as kurto
from ret_sql
group by code, m_nDate;
"""
with tqdm(range(DDBLoader.num_code_partition)) as pbar:
#with tqdm(range(1)) as pbar:
for partition_id in pbar:
self.ddb_sess.run(code_tpl.format(
partition_id = partition_id,
))
# 因为原表有50个分区需要逐个计算因此先创建一个内存临时表
# 否则一旦第一个分区插入分区表后,就无法再插入后续只包含部分字段的数据了
if partition_id == 0:
self.ddb_sess.run("""
{memory_table_name} = table(kurto_sql)
""".format(
memory_table_name = memory_table_name
))
# 上面`table`语句仅仅是创建表结构,
# 然后使用`tableInsert`把真实数据插入进去
self.ddb_sess.run("""
{memory_table_name}.tableInsert(kurto_sql)
""".format(
memory_table_name = memory_table_name
))
print('Did finish all parititons for kurto.')
pprint(self.ddb_sess.run(f"{memory_table_name}"))
return memory_table_name
def main():
factor = DailyFactor()
factor.load_ddb_database()
memory_table_name = factor.make_kurto_memory_table()
factor.create_factor_partition_table(
'hft_daily_factor',
memory_table_name
)
factor.append_to_partition_table(
'hft_daily_factor',
memory_table_name
)
if __name__ == '__main__':
main()