from code_list import code_list_pickel from TSLfm import TSLfm from DDBfm import DDBfm import pandas as pd from loguru import logger from os.path import dirname, abspath, join import sys running_which_env = 'prd' ROOT_DIR = abspath(join(dirname(abspath(__file__)), "..")) logger.remove() logger.add(sys.stderr, level="WARNING") logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}.log", rotation="10 MB", compression="zip", level="INFO") def run_add_1day_code_init_minKline(date, code_list): """ too slow. depracated. """ ddb = DDBfm(running_which_env) code_list_filtered = [] for code in code_list: if ddb.search_code_date_in_tb(ddb.ddf_hft_mink_tbname, date, code): logger.warning(f"Possible duplicates on {date} and {code}") else: code_list_filtered.append(code) if len(code_list_filtered) == 0: return 0 with TSLfm() as tsl: df = tsl.process_result_data_type( tsl.get_mkt_min_k(date, date, code_list_filtered)) if not df.empty: logger.info( f'Getting a df of {df.shape}: {code_list[0][:-4]} on {date}') ddb.append_hft_table(ddb.ddf_hft_mink_tbname, df) def check_if_date_codelist_exists(typ, date, code_list): code_list_filtered = code_list ddb1 = DDBfm(running_which_env) if typ == 'tick': tbName = ddb1.ddf_hft_tick_tbname elif typ == 'mink': tbName = ddb1.ddf_hft_mink_tbname code_list_filtered = ddb1.get_missing_code_date_in_tb( tbName, date, code_list) if code_list_filtered: logger.info( f"Need to download {'+'.join(code_list_filtered)} on {date} in {tbName}") else: logger.info(f"all codes checked in database {tbName} on {date}") ddb1.close_sess() del ddb1 return code_list_filtered def run_pool_add_by_datelist_codeinit(typ, date_list, code_list, if_check=1): df_list = [] for date in date_list: if if_check: code_list_filtered = check_if_date_codelist_exists( typ, date, code_list) else: code_list_filtered = code_list with TSLfm() as tsl: if typ == 'tick': df = tsl.process_result_data_type( tsl.get_trade_tick(date, date, code_list_filtered)) elif typ == 'mink': df = tsl.process_result_data_type( tsl.get_mkt_min_k(date, date, code_list_filtered)) if not df.empty: df_list.append(df) if not df_list: return 0 df_all = pd.concat(df_list) ddb2 = DDBfm(running_which_env, pool=True) logger.info( f'Getting a df of {df_all.shape}: {code_list[0][:-4]} on {"+".join(date_list)}') if typ == 'tick': ddb2.append_pool_hft_table(ddb2.ddf_hft_tick_tbname, df_all) elif typ == 'mink': ddb2.append_pool_hft_table(ddb2.ddf_hft_mink_tbname, df_all) ddb2.close_sess() del ddb2 def run_create_hft_db(date='20221101', if_mink=1, if_tick=1): code_list = ['T2212'] ddb = DDBfm(running_which_env) ddb.create_hft_database() if if_mink: logger.info(f"creating mink on {date} for {code_list[0]}") with TSLfm() as tsl: df_mink = tsl.process_result_data_type( tsl.get_mkt_min_k(date, date, code_list)) # print(df) ddb.create_hft_table(ddb.ddf_hft_mink_tbname, df_mink) if if_tick: logger.info(f"creating tick on {date} for {code_list[0]}") with TSLfm() as tsl: df_tick = tsl.process_result_data_type( tsl.get_trade_tick(date, date, code_list)) # print(df) ddb.create_hft_table(ddb.ddf_hft_tick_tbname, df_tick) def run(): """ too slow. depracated. """ all_code_dict_by_init = {} for c in code_list_pickel: init = c[:-4] if init in all_code_dict_by_init: all_code_dict_by_init[init].append(c) else: all_code_dict_by_init[init] = [c] # print(all_code_dict_by_init) start_date = '2022-09-30' end_date = '2022-10-31' allDates = pd.date_range(start_date, end_date, freq='D') allDates = [i.replace('-', '') for i in list(allDates.astype('str'))] for date in allDates: for ind, code_init in enumerate(all_code_dict_by_init): logger.info(f"Getting {code_init} (no.{ind})") code_list = all_code_dict_by_init[code_init] run_add_1day_code_init_minKline(date, code_list) def run_pool_dates_by_code_init_n_group(typ='mink', code_gp_amt=10, date_gp_amt=10, start_date='20220101', end_date='20221031', if_check=1, code_dict_by='init'): logger.info("Running run_pool_dates_by_group") all_code_dict_by_init = {} for c in code_list_pickel: init = c[:-4] if init in all_code_dict_by_init: all_code_dict_by_init[init].append(c) else: all_code_dict_by_init[init] = [c] if code_dict_by == 'init': all_code_dict = all_code_dict_by_init if code_dict_by == 'group': all_code_dict_by_group_no = {} for ind, code_init in enumerate(sorted(all_code_dict_by_init)): group_no = ind % code_gp_amt if group_no not in all_code_dict_by_group_no: all_code_dict_by_group_no[group_no] = all_code_dict_by_init[code_init] else: all_code_dict_by_group_no[group_no] += all_code_dict_by_init[code_init] all_code_dict = all_code_dict_by_group_no allDates = pd.date_range(start_date, end_date, freq='D') dates_dict_by_day = {} for d in list(allDates.astype('str')): group_no = int(d[-2:]) % date_gp_amt if group_no not in dates_dict_by_day: dates_dict_by_day[group_no] = [d.replace('-', '')] else: dates_dict_by_day[group_no].append(d.replace('-', '')) logger.debug(dates_dict_by_day) for group_no in dates_dict_by_day: date_list = dates_dict_by_day[group_no] num_of_code_group = len(all_code_dict) for ind, code_init in enumerate(all_code_dict): # done: 'T','TS','TS','TF' # if code_init in ['T']: # todo filtered this ,,'TF', 'IC','IF','IH','IM' logger.info( f"Getting {code_init} (no.{ind}/{num_of_code_group} of date_group {group_no}/{date_gp_amt})") code_list = all_code_dict[code_init] if typ == 'mink': # logger.info('Running mink') logger.info(date_list) logger.info(code_list) run_pool_add_by_datelist_codeinit( 'mink', date_list, code_list, if_check) # run_pool_add_byday_code_init_minKline(date_list,code_list) elif typ == 'tick': logger.info('Running tick') run_pool_add_by_datelist_codeinit( 'tick', date_list, code_list, if_check) if __name__ == '__main__': import time # run_create_hft_db() # including two tables tic = time.perf_counter() if len(sys.argv)>1: typ,st_d,en_d,if_check,split_code_into_howmany_groups_no,split_date_into_howmany_groups = sys.argv[1:] if_check=int(if_check) split_code_into_howmany_groups_no = int(split_code_into_howmany_groups_no) split_date_into_howmany_groups=int(split_date_into_howmany_groups) else: typ = 'mink' st_d = '20210104' en_d = '20211231' if_check = 0 split_code_into_howmany_groups_no = 5 split_date_into_howmany_groups = 20 logger.warning( f"Going to run *{typ}* from {st_d} to {en_d} with if_check dupliactes={if_check} in *{running_which_env}*, plz check if this info is correct.\n\n\n\n") run_pool_dates_by_code_init_n_group(typ=typ, code_gp_amt=split_code_into_howmany_groups_no, date_gp_amt=split_date_into_howmany_groups, start_date=st_d, end_date=en_d, if_check=if_check, code_dict_by='group') # run_pool_dates_by_code_init_n_group(typ='mink',group_amount=5) toc = time.perf_counter() logger.info(f"Running used {toc - tic:0.4f} seconds") # all t taks Running used 588.5782 seconds for 10 months # 600/60=10 min 12min for take code_init # 12* 71 = 850 min / 60 = 15 hr for all code for each year