From 4d2ad2b411d40569e91c3615d55ab8b7a3284066 Mon Sep 17 00:00:00 2001 From: yzlocal Date: Thu, 10 Nov 2022 11:04:44 +0800 Subject: [PATCH] single adding working --- src/DDBfm.py | 15 ++++++++------- src/TSLfm.py | 4 ++-- src/data_loader.py | 28 +++++++++++++++++----------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/DDBfm.py b/src/DDBfm.py index 25347f6..2b7e3b1 100644 --- a/src/DDBfm.py +++ b/src/DDBfm.py @@ -83,23 +83,24 @@ class DDBfm(): def append_hft_table(self, tbName, df): - # load table to check? time&date&code - - # print(df.shape) - appender = ddb.tableAppender(tableName=tbName, ddbSession=self.sess,dbPath=self.ddb_hft_dbPath) appender.append(df) - + logger.info(f"sucessfully append some df of {df.shape}") def search_code_date_in_tb(self,tbName,curr_date,curr_code): curr_date_formatted = curr_date[:4]+'.'+curr_date[4:6]+'.'+curr_date[6:] # print('?did i split this right') # print(curr_date_formatted) tb = self.sess.loadTable(dbPath=self.ddb_hft_dbPath, tableName=tbName) + logger.info(f"Quickly checking if data on {curr_code} {curr_date} exists...") # could do a slow checking of num of data try: # doing this cuz there's no method to check if a table is empty lol - df = tb.select('*').where(f"code=`{curr_code}").where(f"m_nDatetime>={curr_date_formatted}d").top(1).toDF() - print(df) + cond=f"code=`{curr_code}, m_nDatetime.date()={curr_date_formatted}d" + # print(cond) + df = tb.select('*').where(cond).top(1).toDF() + if df.empty or df.shape[0]==0: + # print(df) + return 0 except: return 0 return 1 diff --git a/src/TSLfm.py b/src/TSLfm.py index 82fdb26..f2d156a 100644 --- a/src/TSLfm.py +++ b/src/TSLfm.py @@ -214,7 +214,7 @@ class TSLfm: df = pd.DataFrame(r.value()) if df.empty: logger.info('No data on this day.') - return 0 + return pd.DataFrame() logger.info(f"Processing new df of shape {df.shape}, which looks like\n{df.head(5)}") # new = df["m_nDatetime"].str.split(" ", n = 1, expand = True) @@ -238,6 +238,6 @@ if __name__ == '__main__': logger.add("../logs/{time:YYYYMMDD-HHmmss}_TSLfm.log", rotation="10 MB", compression="zip", level="INFO") with TSLfm() as tsl: - t_list=['CF2211'] + t_list=['T2212'] df = tsl.process_result_data_type(tsl.get_mkt_min_k('20221031','20221101',t_list)) print(df) diff --git a/src/data_loader.py b/src/data_loader.py index 012ce2e..42f0ea4 100644 --- a/src/data_loader.py +++ b/src/data_loader.py @@ -3,14 +3,12 @@ import sys running_which_env='prd' - from os.path import dirname, abspath, join ROOT_DIR = abspath(join(dirname(abspath(__file__)), "..")) from loguru import logger logger.remove() logger.add(sys.stderr, level="INFO") -logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}_{running_which_env}.log", rotation="10 MB", compression="zip", level="INFO") - +logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}.log", rotation="10 MB", compression="zip", level="INFO") @@ -22,20 +20,27 @@ from TSLfm import TSLfm from code_list import code_list_pickel def run_add_1day_code_init_minKline(date,code_list): - with TSLfm() as tsl: - df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list)) ddb = DDBfm(running_which_env) + code_list_filtered = [] for code in code_list: if ddb.search_code_date_in_tb(ddb.ddf_hft_mink_tbname,date,code): logger.warning(f"Possible duplicates on {date} and {code}") - return 0 - ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df) + else: + code_list_filtered.append(code) + if len(code_list_filtered)==0: + return 0 + + with TSLfm() as tsl: + df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list_filtered)) + if not df.empty: + logger.info(f'Getting a df of {df.shape}: {code_list[0][:-4]} on {date}') + ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df) def run_create_db_minKline(): date = '20221101' with TSLfm() as tsl: - code_list=['CF2211'] + code_list=['T2212'] df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list)) # print(df) ddb = DDBfm(running_which_env) @@ -54,8 +59,8 @@ def run(): # print(all_code_dict_by_init) - start_date='2022-10-31' - end_date='2022-11-08' + start_date='2022-09-30' + end_date='2022-11-09' allDates = pd.date_range(start_date, end_date, freq ='D') allDates = [i.replace('-','') for i in list(allDates.astype('str'))] @@ -88,4 +93,5 @@ def run(): # ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df) if __name__ == '__main__': - run() \ No newline at end of file + run() + # run_create_db_minKline() \ No newline at end of file