From 5af38a6421c66761c48a4869d400d17124811b53 Mon Sep 17 00:00:00 2001 From: Guofu Li Date: Fri, 26 Aug 2022 15:14:36 +0800 Subject: [PATCH] 1. `DDBHFTLoader.py`: Mannually remove some in-memory objects to reduce memory consumption, but mem-consumption still keeps growing... 2. `DDBPITLoader.py`: Add support for specifying `SYMBOL` typed columns. --- src/loader/DDBHFTLoader.py | 7 +++++-- src/loader/DDBPITLoader.py | 9 +++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/loader/DDBHFTLoader.py b/src/loader/DDBHFTLoader.py index c14f786..cbd77ce 100644 --- a/src/loader/DDBHFTLoader.py +++ b/src/loader/DDBHFTLoader.py @@ -394,6 +394,7 @@ class DDBHFTLoader(DDBLoader): row_list = [row for row in row_list if pd.to_datetime(row[1]) not in _journal_dt.index] print(f"Resume job for {stock_id}, with {len(row_list)} rows left.") + del(_journal_dt) num_rows = len(row_list) # 如果行数为0,则说明是空数据,可以直接返回 @@ -426,6 +427,7 @@ class DDBHFTLoader(DDBLoader): ): sub_pbar.update() + del(row_list) self.dump_journal_writer.write(f"{type_name},{stock_id},OK\n") self.dump_journal_writer.flush() @@ -477,8 +479,8 @@ class DDBHFTLoader(DDBLoader): ddb_sess.upload({df_table_name : df}) # 因为在做Tick数据的时候,偶然发生'CHUNK[xxx] does not exist.',所以在这里使用`append!`函数代换一下试试 - ddb_sess.run("append!(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( - #ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( + #ddb_sess.run("append!(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( + ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( dbPath = DDBHFTLoader.ddb_path, partitioned_table_name = type_name + DDBHFTLoader.ddb_partition_table_suffix, df_table_name = df_table_name @@ -486,5 +488,6 @@ class DDBHFTLoader(DDBLoader): # 由于不是复用`DDBHFTLoader`对象内部的Session,因此如果不手动关闭就会造成内存逐渐泄漏 ddb_sess.close() + del(df) diff --git a/src/loader/DDBPITLoader.py b/src/loader/DDBPITLoader.py index 95575a6..56719b2 100644 --- a/src/loader/DDBPITLoader.py +++ b/src/loader/DDBPITLoader.py @@ -46,6 +46,10 @@ class DDBPITLoader(DDBLoader): 'AppearAtDate' : ('appear_at_date', 'DATE') } + symbol_col_set = { + 'S_PROFITNOTICE_STYLE' + } + date_col_set = { 'report_period', 'appear_in_period', @@ -136,6 +140,11 @@ class DDBPITLoader(DDBLoader): mssql_col_name_list.append(_col_name) ddb_col_name_list.append(_col_name) ddb_col_type_list.append('DATE') + # 需要被转换成SYMBOL的字段 + elif _col_name in self.symbol_col_set: + mssql_col_name_list.append(_col_name) + ddb_col_name_list.append(_col_name) + ddb_col_type_list.append('SYMBOL') # 按照对照表进行类型转换 elif _col_type in self.ddb_type_mapping: mssql_col_name_list.append(_col_name)