|
|
|
|
|
|
|
|
|
from pprint import pprint
|
|
|
|
|
from tqdm import tqdm
|
|
|
|
|
import functools
|
|
|
|
|
|
|
|
|
|
import dolphindb as ddb
|
|
|
|
|
|
|
|
|
|
from DDBLoader import DDBLoader
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_ddb_table(hft_tbl_name):
|
|
|
|
|
"""
|
|
|
|
|
这是一个用来简化载入分区表过程的语法糖,但似乎需要预先调用这个函数的场景并不多,简化效果不是很明显。
|
|
|
|
|
"""
|
|
|
|
|
def decorator(func):
|
|
|
|
|
@functools.wraps(func)
|
|
|
|
|
def wrapper(self, *args, **kwargs):
|
|
|
|
|
self.ddb_sess.run("""
|
|
|
|
|
// 载入计算使用的原始数据:分钟线数据
|
|
|
|
|
tbl = loadTable("{hft_ddb_path}", "{hft_tbl_name}");
|
|
|
|
|
""".format(
|
|
|
|
|
hft_ddb_path = DDBLoader.ddb_path,
|
|
|
|
|
hft_tbl_name = hft_tbl_name,
|
|
|
|
|
))
|
|
|
|
|
print('Did load', hft_tbl_name)
|
|
|
|
|
|
|
|
|
|
return func(self)
|
|
|
|
|
return wrapper
|
|
|
|
|
return decorator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DailyFactor(object):
|
|
|
|
|
|
|
|
|
|
#ddb_hft_path = "dfs://hft_stock_ts"
|
|
|
|
|
#ddb_hft_dbname = "db_hft_stock"
|
|
|
|
|
ddb_daily_path = "dfs://daily_stock_ts"
|
|
|
|
|
ddb_daily_dbname = "db_daily_stock"
|
|
|
|
|
|
|
|
|
|
ddb_config = {
|
|
|
|
|
'host' : '192.168.1.167',
|
|
|
|
|
'username' : 'admin',
|
|
|
|
|
'password' : '123456',
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 这里的partition数量未必需要和hft表的一致
|
|
|
|
|
# 当读取hft表的时候,需要使用DDBLoader中的`num_code_partition`,而不是此字段
|
|
|
|
|
num_code_partition = 50
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
|
|
|
|
|
self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_ddb_database(self):
|
|
|
|
|
"""
|
|
|
|
|
因为日频数据量较小,内部可以使用m_nDate作为sort_key,所以分区仅需要对stock_id做[HASH, 50]即可,因此不需要输入calendar数据
|
|
|
|
|
"""
|
|
|
|
|
self.ddb_sess.run("""
|
|
|
|
|
daily_stock_ts = database(
|
|
|
|
|
"{ddb_hft_path}",
|
|
|
|
|
HASH, [SYMBOL, {num_code_parition}],
|
|
|
|
|
engine = 'TSDB'
|
|
|
|
|
)
|
|
|
|
|
""".format(
|
|
|
|
|
ddb_hft_path = DDBLoader.ddb_path,
|
|
|
|
|
num_code_partition = self.num_code_partition
|
|
|
|
|
))
|
|
|
|
|
print('Did create database')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_ddb_database(self):
|
|
|
|
|
self.ddb_sess.run("""
|
|
|
|
|
{dbName} = database(
|
|
|
|
|
directory = '{dbPath}',
|
|
|
|
|
partitionType = HASH,
|
|
|
|
|
partitionScheme = [SYMBOL, {num_code_partition}],
|
|
|
|
|
engine = 'TSDB'
|
|
|
|
|
)
|
|
|
|
|
""".format(
|
|
|
|
|
dbName = self.ddb_daily_dbname,
|
|
|
|
|
dbPath = self.ddb_daily_path,
|
|
|
|
|
num_code_partition = self.num_code_partition
|
|
|
|
|
))
|
|
|
|
|
print('Did load database.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def append_factor_columns(self, factor_name_list, memory_tbl_name, partition_tbl_name):
|
|
|
|
|
code = """
|
|
|
|
|
addColumn({partition_tbl_name}, {col_name_list}, {col_type_list});
|
|
|
|
|
""".format(
|
|
|
|
|
partition_tbl_name = partition_tbl_name,
|
|
|
|
|
col_name_list = '`' + '`'.join(factor_name_list),
|
|
|
|
|
col_type_list = '[' + ','.join(['DOUBLE']*len(factor_name_list)) + ']'
|
|
|
|
|
)
|
|
|
|
|
print('Will add columns via script:')
|
|
|
|
|
print(code)
|
|
|
|
|
self.ddb_sess.run(code)
|
|
|
|
|
|
|
|
|
|
code = """
|
|
|
|
|
{partition_tbl_name}.tableInsert({memory_tbl_name})
|
|
|
|
|
""".format(
|
|
|
|
|
partition_tbl_name = partition_tbl_name,
|
|
|
|
|
memory_tbl_name = memory_tbl_name
|
|
|
|
|
)
|
|
|
|
|
print('Will append date via script:')
|
|
|
|
|
print(code)
|
|
|
|
|
self.ddb_sess.run(code)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def append_to_partition_table(self, partition_tbl_name, memory_tbl_name):
|
|
|
|
|
self.ddb_sess.run("""
|
|
|
|
|
{partition_tbl_name}.tableInsert({memory_tbl_name})
|
|
|
|
|
""".format(
|
|
|
|
|
partition_tbl_name = partition_tbl_name,
|
|
|
|
|
memory_tbl_name = memory_tbl_name
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_factor_partition_table(self, partition_tbl_name, memory_tbl_name):
|
|
|
|
|
"""
|
|
|
|
|
把数据从内存表(`memory_tbl_name`)添加到分区表(`partition_tbl_name`)
|
|
|
|
|
"""
|
|
|
|
|
# createPartitionedTable(
|
|
|
|
|
# dbHandle, table, tableName,
|
|
|
|
|
# [partitionColumns], [compressMethods],
|
|
|
|
|
# [sortColumns], [keepDuplicates=ALL], [sortKeyMappingFunction])
|
|
|
|
|
code = """
|
|
|
|
|
// 保证创建新的分区表不会和已经存在表冲突
|
|
|
|
|
if (existsTable("{ddb_daily_path}", "{partition_tbl_name}")) {{
|
|
|
|
|
dropTable({ddb_daily_dbname}, "{partition_tbl_name}");
|
|
|
|
|
}}
|
|
|
|
|
|
|
|
|
|
{partition_tbl_name} = createPartitionedTable(
|
|
|
|
|
dbHandle = {ddb_daily_dbname},
|
|
|
|
|
table = {memory_tbl_name},
|
|
|
|
|
tableName = "{partition_tbl_name}",
|
|
|
|
|
partitionColumns = 'code',
|
|
|
|
|
compressMethods = {{'m_nDate' : 'delta'}},
|
|
|
|
|
sortColumns = `code`m_nDate
|
|
|
|
|
);
|
|
|
|
|
""".format(
|
|
|
|
|
ddb_daily_path = self.ddb_daily_path,
|
|
|
|
|
ddb_daily_dbname = self.ddb_daily_dbname,
|
|
|
|
|
partition_tbl_name = partition_tbl_name,
|
|
|
|
|
memory_tbl_name = memory_tbl_name,
|
|
|
|
|
)
|
|
|
|
|
print('Will create partitioned factor table via script:')
|
|
|
|
|
print(code)
|
|
|
|
|
self.ddb_sess.run(code)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@load_ddb_table("KLinePartitioned")
|
|
|
|
|
def make_kurto_memory_table(self):
|
|
|
|
|
|
|
|
|
|
memory_table_name = "kurto"
|
|
|
|
|
code_tpl = """
|
|
|
|
|
// 需要首先创建分钟线收益表
|
|
|
|
|
// 使用`context by`使得计算结果仍然为一个序列
|
|
|
|
|
// 使用`where partition()`来逐个加载分区
|
|
|
|
|
ret_sql = select
|
|
|
|
|
code, m_nDate, eachPre(\, m_nClose)-1.0 as ret
|
|
|
|
|
from tbl
|
|
|
|
|
where partition(code, {partition_id})
|
|
|
|
|
context by m_nDate;
|
|
|
|
|
|
|
|
|
|
// 计算kurto指标,`ret`表中每日第一条记录为空,似乎并不造成影响
|
|
|
|
|
kurto_sql = select
|
|
|
|
|
code, m_nDate, sqrt(239) * sum(pow(ret, 3)) / pow(sum(pow(ret, 2)), 1.5) as kurto
|
|
|
|
|
from ret_sql
|
|
|
|
|
group by code, m_nDate;
|
|
|
|
|
"""
|
|
|
|
|
with tqdm(range(DDBLoader.num_code_partition)) as pbar:
|
|
|
|
|
#with tqdm(range(1)) as pbar:
|
|
|
|
|
for partition_id in pbar:
|
|
|
|
|
self.ddb_sess.run(code_tpl.format(
|
|
|
|
|
partition_id = partition_id,
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
# 因为原表有50个分区,需要逐个计算,因此先创建一个内存临时表
|
|
|
|
|
# 否则一旦第一个分区插入分区表后,就无法再插入后续只包含部分字段的数据了
|
|
|
|
|
if partition_id == 0:
|
|
|
|
|
self.ddb_sess.run("""
|
|
|
|
|
{memory_table_name} = table(kurto_sql)
|
|
|
|
|
""".format(
|
|
|
|
|
memory_table_name = memory_table_name
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
# 上面`table`语句仅仅是创建表结构,
|
|
|
|
|
# 然后使用`tableInsert`把真实数据插入进去
|
|
|
|
|
self.ddb_sess.run("""
|
|
|
|
|
{memory_table_name}.tableInsert(kurto_sql)
|
|
|
|
|
""".format(
|
|
|
|
|
memory_table_name = memory_table_name
|
|
|
|
|
))
|
|
|
|
|
print('Did finish all parititons for kurto.')
|
|
|
|
|
pprint(self.ddb_sess.run(f"{memory_table_name}"))
|
|
|
|
|
|
|
|
|
|
return memory_table_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
factor = DailyFactor()
|
|
|
|
|
factor.load_ddb_database()
|
|
|
|
|
|
|
|
|
|
memory_table_name = factor.make_kurto_memory_table()
|
|
|
|
|
factor.create_factor_partition_table(
|
|
|
|
|
'hft_daily_factor',
|
|
|
|
|
memory_table_name
|
|
|
|
|
)
|
|
|
|
|
factor.append_to_partition_table(
|
|
|
|
|
'hft_daily_factor',
|
|
|
|
|
memory_table_name
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|
|
|
|
|
|