|
|
|
@ -119,12 +119,15 @@ class DDBHFTLoader(DDBLoader):
|
|
|
|
|
|
|
|
|
|
# 不能重复创建Pool对象,因此需要在循环的最外侧创建好Pool对象,然后传参进去
|
|
|
|
|
with Pool(self.num_workers if num_workers is None else num_workers) as pool:
|
|
|
|
|
# Always reuse the connection object, to reduce the memory consumption.
|
|
|
|
|
with self.mssql_engine.connect() as conn:
|
|
|
|
|
# Loop through the stock list.
|
|
|
|
|
for hft_type_name in self.hft_type_list:
|
|
|
|
|
print('Will work on hft type:', hft_type_name)
|
|
|
|
|
with tqdm(stock_list) as pbar:
|
|
|
|
|
for stock_id in pbar:
|
|
|
|
|
pbar.set_description(f"Working on stock {stock_id}")
|
|
|
|
|
self.dump_hft_to_ddb(hft_type_name, stock_id, pbar=pbar, pool=pool)
|
|
|
|
|
self.dump_hft_to_ddb(hft_type_name, stock_id, conn, pbar=pbar, pool=pool)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_stock_date_list(self, cache=False):
|
|
|
|
@ -354,7 +357,7 @@ class DDBHFTLoader(DDBLoader):
|
|
|
|
|
print('-' * 80)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def dump_hft_to_ddb(self, type_name, stock_id, trade_date=None, pbar=None, pool=None):
|
|
|
|
|
def dump_hft_to_ddb(self, type_name, stock_id, conn, trade_date=None, pbar=None, pool=None):
|
|
|
|
|
if (type_name, stock_id, 'OK') in self.dump_journal_df.index:
|
|
|
|
|
message = f"Will skip ({type_name}, {stock_id}) as it appears in the dump journal."
|
|
|
|
|
if pbar is None:
|
|
|
|
@ -376,7 +379,6 @@ class DDBHFTLoader(DDBLoader):
|
|
|
|
|
|
|
|
|
|
# 经过尝试,按个股来做batch查询效率还是可以接受的
|
|
|
|
|
# mssql中,索引字段是(S_INFO_WINDCODE, TRADE_DT)
|
|
|
|
|
with self.mssql_engine.connect() as conn:
|
|
|
|
|
stat = """
|
|
|
|
|
select * from [Level2Bytes{mssql_type_name}].dbo.[{mssql_type_name}]
|
|
|
|
|
where S_INFO_WINDCODE='{stock_id}'
|
|
|
|
@ -392,6 +394,7 @@ class DDBHFTLoader(DDBLoader):
|
|
|
|
|
row_list = [row for row in row_list
|
|
|
|
|
if pd.to_datetime(row[1]) not in _journal_dt.index]
|
|
|
|
|
print(f"Resume job for {stock_id}, with {len(row_list)} rows left.")
|
|
|
|
|
del(_journal_dt)
|
|
|
|
|
|
|
|
|
|
num_rows = len(row_list)
|
|
|
|
|
# 如果行数为0,则说明是空数据,可以直接返回
|
|
|
|
@ -424,6 +427,7 @@ class DDBHFTLoader(DDBLoader):
|
|
|
|
|
):
|
|
|
|
|
sub_pbar.update()
|
|
|
|
|
|
|
|
|
|
del(row_list)
|
|
|
|
|
self.dump_journal_writer.write(f"{type_name},{stock_id},OK\n")
|
|
|
|
|
self.dump_journal_writer.flush()
|
|
|
|
|
|
|
|
|
@ -475,8 +479,8 @@ class DDBHFTLoader(DDBLoader):
|
|
|
|
|
|
|
|
|
|
ddb_sess.upload({df_table_name : df})
|
|
|
|
|
# 因为在做Tick数据的时候,偶然发生'CHUNK[xxx] does not exist.',所以在这里使用`append!`函数代换一下试试
|
|
|
|
|
ddb_sess.run("append!(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
|
|
|
|
|
#ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
|
|
|
|
|
#ddb_sess.run("append!(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
|
|
|
|
|
ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
|
|
|
|
|
dbPath = DDBHFTLoader.ddb_path,
|
|
|
|
|
partitioned_table_name = type_name + DDBHFTLoader.ddb_partition_table_suffix,
|
|
|
|
|
df_table_name = df_table_name
|
|
|
|
@ -484,5 +488,6 @@ class DDBHFTLoader(DDBLoader):
|
|
|
|
|
|
|
|
|
|
# 由于不是复用`DDBHFTLoader`对象内部的Session,因此如果不手动关闭就会造成内存逐渐泄漏
|
|
|
|
|
ddb_sess.close()
|
|
|
|
|
del(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|