From 5af38a6421c66761c48a4869d400d17124811b53 Mon Sep 17 00:00:00 2001
From: Guofu Li
Date: Fri, 26 Aug 2022 15:14:36 +0800
Subject: [PATCH] 1. `DDBHFTLoader.py`: Mannually remove some in-memory objects
to reduce memory consumption, but mem-consumption still keeps growing... 2.
`DDBPITLoader.py`: Add support for specifying `SYMBOL` typed columns.
---
src/loader/DDBHFTLoader.py | 7 +++++--
src/loader/DDBPITLoader.py | 9 +++++++++
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/src/loader/DDBHFTLoader.py b/src/loader/DDBHFTLoader.py
index c14f786..cbd77ce 100644
--- a/src/loader/DDBHFTLoader.py
+++ b/src/loader/DDBHFTLoader.py
@@ -394,6 +394,7 @@ class DDBHFTLoader(DDBLoader):
row_list = [row for row in row_list
if pd.to_datetime(row[1]) not in _journal_dt.index]
print(f"Resume job for {stock_id}, with {len(row_list)} rows left.")
+ del(_journal_dt)
num_rows = len(row_list)
# 如果行数为0,则说明是空数据,可以直接返回
@@ -426,6 +427,7 @@ class DDBHFTLoader(DDBLoader):
):
sub_pbar.update()
+ del(row_list)
self.dump_journal_writer.write(f"{type_name},{stock_id},OK\n")
self.dump_journal_writer.flush()
@@ -477,8 +479,8 @@ class DDBHFTLoader(DDBLoader):
ddb_sess.upload({df_table_name : df})
# 因为在做Tick数据的时候,偶然发生'CHUNK[xxx] does not exist.',所以在这里使用`append!`函数代换一下试试
- ddb_sess.run("append!(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
- #ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
+ #ddb_sess.run("append!(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
+ ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
dbPath = DDBHFTLoader.ddb_path,
partitioned_table_name = type_name + DDBHFTLoader.ddb_partition_table_suffix,
df_table_name = df_table_name
@@ -486,5 +488,6 @@ class DDBHFTLoader(DDBLoader):
# 由于不是复用`DDBHFTLoader`对象内部的Session,因此如果不手动关闭就会造成内存逐渐泄漏
ddb_sess.close()
+ del(df)
diff --git a/src/loader/DDBPITLoader.py b/src/loader/DDBPITLoader.py
index 95575a6..56719b2 100644
--- a/src/loader/DDBPITLoader.py
+++ b/src/loader/DDBPITLoader.py
@@ -46,6 +46,10 @@ class DDBPITLoader(DDBLoader):
'AppearAtDate' : ('appear_at_date', 'DATE')
}
+ symbol_col_set = {
+ 'S_PROFITNOTICE_STYLE'
+ }
+
date_col_set = {
'report_period',
'appear_in_period',
@@ -136,6 +140,11 @@ class DDBPITLoader(DDBLoader):
mssql_col_name_list.append(_col_name)
ddb_col_name_list.append(_col_name)
ddb_col_type_list.append('DATE')
+ # 需要被转换成SYMBOL的字段
+ elif _col_name in self.symbol_col_set:
+ mssql_col_name_list.append(_col_name)
+ ddb_col_name_list.append(_col_name)
+ ddb_col_type_list.append('SYMBOL')
# 按照对照表进行类型转换
elif _col_type in self.ddb_type_mapping:
mssql_col_name_list.append(_col_name)