diff --git a/src/data_loader.py b/src/data_loader.py index 09eccd0..89a92ef 100644 --- a/src/data_loader.py +++ b/src/data_loader.py @@ -69,19 +69,27 @@ def run_add_1day_code_init_minKline(date,code_list): # del ddb2 -def run_pool_add_byday_code_init_tick(date_list,code_list): - df_list=[] +def check_if_date_codelist_exists(date,code_list): code_list_filtered=code_list - for date in date_list: - - ddb1 = DDBfm(running_which_env) - code_list_filtered = ddb1.get_missing_code_date_in_tb(ddb1.ddf_hft_mink_tbname,date,code_list) - if len(code_list_filtered)==0: - continue + ddb1 = DDBfm(running_which_env) + code_list_filtered = ddb1.get_missing_code_date_in_tb(ddb1.ddf_hft_mink_tbname,date,code_list) + if code_list_filtered: logger.info(f"getting {'+'.join(code_list_filtered)} on {date}") - ddb1.close_sess() - del ddb1 + else: + logger.info(f"all checked in database") + ddb1.close_sess() + del ddb1 + return code_list_filtered + +def run_pool_add_byday_code_init_tick(date_list,code_list,if_check=1): + df_list=[] + + for date in date_list: + if if_check: + code_list_filtered = check_if_date_codelist_exists(date,code_list) + else: + code_list_filtered = code_list with TSLfm() as tsl: df = tsl.process_result_data_type(tsl.get_trade_tick(date,date,code_list_filtered)) if not df.empty: @@ -142,7 +150,7 @@ def run(): -def run_pool_dates_by_code_init_n_group(typ='mink',group_amount=10,start_date='20220101',end_date='20221031'): +def run_pool_dates_by_code_init_n_group(typ='mink',gp_amt=10,start_date='20220101',end_date='20221031',if_check=1): logger.info("Running run_pool_dates_by_group") all_code_dict_by_init={} for c in code_list_pickel: @@ -159,7 +167,7 @@ def run_pool_dates_by_code_init_n_group(typ='mink',group_amount=10,start_date='2 dates_dict_by_day={} for d in list(allDates.astype('str')): - group_no = int(d[-2:])%group_amount + group_no = int(d[-2:])%gp_amt if group_no not in dates_dict_by_day: dates_dict_by_day[group_no] = [d.replace('-','')] else: @@ -173,8 +181,8 @@ def run_pool_dates_by_code_init_n_group(typ='mink',group_amount=10,start_date='2 num_of_init = len(all_code_dict_by_init) for ind,code_init in enumerate(all_code_dict_by_init): # done: 'T','TS','TS','TF' - if code_init in ['T']: # todo filtered this ,,'TF', 'IC','IF','IH','IM' - logger.info(f"Getting {code_init} (no.{ind}/{num_of_init} of group {group_no}/{group_amount})") + # if code_init in ['T']: # todo filtered this ,,'TF', 'IC','IF','IH','IM' + logger.info(f"Getting {code_init} (no.{ind}/{num_of_init} of group {group_no}/{gp_amt})") code_list = all_code_dict_by_init[code_init] if typ=='mink': # logger.info('Running mink') @@ -183,18 +191,17 @@ def run_pool_dates_by_code_init_n_group(typ='mink',group_amount=10,start_date='2 # run_pool_add_byday_code_init_minKline(date_list,code_list) elif typ=='tick': logger.info('Running tick') - run_pool_add_byday_code_init_tick(date_list,code_list) + run_pool_add_byday_code_init_tick(date_list,code_list,if_check) if __name__ == '__main__': # run() - - # run_create_hft_db() # including two tables + run_create_hft_db() # including two tables import time tic = time.perf_counter() - run_pool_dates_by_code_init_n_group(typ='tick') + run_pool_dates_by_code_init_n_group(typ='tick',gp_amt=3,start_date='20220601',end_date='20221031',if_check=0) # run_pool_dates_by_code_init_n_group(typ='mink',group_amount=5) toc = time.perf_counter()