You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

258 lines
9.2 KiB

from code_list_all_pkl import code_list as code_list_pickel_from_file, all_fm_init_curr
from TSLfm import TSLfm
from DDBfm import DDBfm
import pandas as pd
from loguru import logger
from os.path import dirname, abspath, join
2 years ago
import sys
running_which_env = 'prd'
2 years ago
def run_add_1day_code_init_minKline(date, code_list):
"""
too slow. depracated.
"""
2 years ago
ddb = DDBfm(running_which_env)
2 years ago
code_list_filtered = []
2 years ago
for code in code_list:
if ddb.search_code_date_in_tb(ddb.ddf_hft_mink_tbname, date, code):
2 years ago
logger.warning(f"Possible duplicates on {date} and {code}")
else:
code_list_filtered.append(code)
if len(code_list_filtered) == 0:
return
with TSLfm() as tsl:
df = tsl.process_result_data_type(
tsl.get_mkt_min_k(date, date, code_list_filtered))
if not df.empty:
logger.info(
f'Getting a df of {df.shape}: {"+".join(code_list)} on {date}')
ddb.append_hft_table(ddb.ddf_hft_mink_tbname, df)
def check_if_date_codelist_exists(typ, date, code_list):
2 years ago
code_list_filtered=[]
for c in code_list:
if c[:-4] not in all_fm_init_curr: #todo code that's not in curr is not counted!!! len: s6
logger.warning("There's unrecognized code init!!!!")
continue
2 years ago
if c[-4:] >= date[2:6]:
code_list_filtered.append(c)
if not code_list_filtered:
return code_list_filtered
ddb1 = DDBfm(running_which_env)
2 years ago
if typ == 'tick':
2 years ago
tbName = ddb1.ddf_hft_tick_tbname
elif typ == 'mink':
2 years ago
tbName = ddb1.ddf_hft_mink_tbname
code_list_filtered = ddb1.get_missing_code_date_in_tb(
tbName, date, code_list_filtered)
2 years ago
if code_list_filtered:
logger.info(
f"Need to download {'+'.join(code_list_filtered)} on {date} in {tbName}")
else:
2 years ago
logger.info(f"all codes checked in database {tbName} on {date}")
ddb1.close_sess()
del ddb1
return code_list_filtered
def run_pool_add_by_datelist_codeinit(typ, date_list, code_list, if_check=1):
df_list = []
for date in date_list:
if if_check:
code_list_filtered = check_if_date_codelist_exists(
typ, date, code_list)
else:
code_list_filtered = code_list
with TSLfm() as tsl:
2 years ago
if typ == 'tick':
df = tsl.process_result_data_type(
tsl.get_trade_tick(date, date, code_list_filtered))
2 years ago
elif typ == 'mink':
df = tsl.process_result_data_type(
tsl.get_mkt_min_k(date, date, code_list_filtered))
if not df.empty:
df_list.append(df)
2 years ago
if not df_list:
return
df_all = pd.concat(df_list)
ddb2 = DDBfm(running_which_env, pool=True)
logger.info(
f'Getting a df of {df_all.shape}: {code_list[0][:-4]} on {"+".join(date_list)}')
if typ == 'tick':
ddb2.append_pool_hft_table(ddb2.ddf_hft_tick_tbname, df_all)
elif typ == 'mink':
ddb2.append_pool_hft_table(ddb2.ddf_hft_mink_tbname, df_all)
ddb2.close_sess()
del ddb2
def run_create_hft_db(date='20221101', if_mink=1, if_tick=1):
code_list = ['T2212']
ddb = DDBfm(running_which_env)
ddb.create_hft_database()
if if_mink:
logger.info(f"creating mink on {date} for {code_list[0]}")
with TSLfm() as tsl:
df_mink = tsl.process_result_data_type(
tsl.get_mkt_min_k(date, date, code_list))
# print(df)
ddb.create_hft_table(ddb.ddf_hft_mink_tbname, df_mink)
if if_tick:
logger.info(f"creating tick on {date} for {code_list[0]}")
with TSLfm() as tsl:
df_tick = tsl.process_result_data_type(
tsl.get_trade_tick(date, date, code_list))
# print(df)
ddb.create_hft_table(ddb.ddf_hft_tick_tbname, df_tick)
2 years ago
def run():
"""
too slow. depracated.
"""
all_code_dict_by_init = {}
for c in code_list_pickel_from_file:
2 years ago
init = c[:-4]
if init in all_code_dict_by_init:
all_code_dict_by_init[init].append(c)
else:
all_code_dict_by_init[init] = [c]
2 years ago
# print(all_code_dict_by_init)
start_date = '2022-09-30'
end_date = '2022-10-31'
allDates = pd.date_range(start_date, end_date, freq='D')
allDates = [i.replace('-', '') for i in list(allDates.astype('str'))]
2 years ago
for date in allDates:
for ind, code_init in enumerate(all_code_dict_by_init):
logger.info(f"Getting {code_init} (no.{ind})")
2 years ago
code_list = all_code_dict_by_init[code_init]
run_add_1day_code_init_minKline(date, code_list)
2 years ago
def run_pool_dates_by_code_init_n_group(typ='mink', code_gp_amt=10, date_gp_amt=10, start_date='20220101', end_date='20221031', if_check=1, code_dict_by='init'):
logger.info("Running run_pool_dates_by_group")
2 years ago
code_list_pickel=code_list_pickel_from_file
code_list_pickel=sorted(list(set(code_list_pickel)))
all_code_dict_by_init = {}
for c in code_list_pickel:
2 years ago
if c[-4:] < start_date[2:6]:
continue
if c[:-4] in all_fm_init_curr: #todo code that's not in curr is not counted!!! len: s6
init = c[:-4]
if init in all_code_dict_by_init:
all_code_dict_by_init[init].append(c)
else:
all_code_dict_by_init[init] = [c]
else:
logger.warning("There's unrecognized code init!!!!")
if code_dict_by == 'init':
all_code_dict = all_code_dict_by_init
if code_dict_by == 'group':
all_code_dict_by_group_no = {}
for ind, code_init in enumerate(sorted(all_code_dict_by_init)):
group_no = ind % code_gp_amt
if group_no not in all_code_dict_by_group_no:
all_code_dict_by_group_no[group_no] = all_code_dict_by_init[code_init]
else:
all_code_dict_by_group_no[group_no] += all_code_dict_by_init[code_init]
all_code_dict = all_code_dict_by_group_no
allDates = pd.date_range(start_date, end_date, freq='D')
dates_dict_by_day = {}
for d in list(allDates.astype('str')):
group_no = int(d[-2:]) % date_gp_amt
if group_no not in dates_dict_by_day:
dates_dict_by_day[group_no] = [d.replace('-', '')]
else:
dates_dict_by_day[group_no].append(d.replace('-', ''))
logger.debug(dates_dict_by_day)
for group_no in dates_dict_by_day:
date_list = dates_dict_by_day[group_no]
num_of_code_group = len(all_code_dict)
for ind, code_init in enumerate(all_code_dict):
# done: 'T','TS','TS','TF'
# if code_init in ['T']: # todo filtered this ,,'TF', 'IC','IF','IH','IM'
logger.info(
f"Getting {code_init} (no.{ind}/{num_of_code_group} of date_group {group_no}/{date_gp_amt})")
code_list = all_code_dict[code_init]
logger.info(date_list)
logger.info(code_list)
if typ == 'mink':
# logger.info('Running mink')
run_pool_add_by_datelist_codeinit(
'mink', date_list, code_list, if_check)
# run_pool_add_byday_code_init_minKline(date_list,code_list)
elif typ == 'tick':
logger.info('Running tick')
run_pool_add_by_datelist_codeinit(
'tick', date_list, code_list, if_check)
2 years ago
if __name__ == '__main__':
ROOT_DIR = abspath(join(dirname(abspath(__file__)), ".."))
logger.remove()
logger.add(sys.stderr, level="WARNING")
# logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}.log",
# rotation="10 MB", compression="zip", level="INFO")
2 years ago
import time
2 years ago
# run_create_hft_db() # including two tables
tic = time.perf_counter()
2 years ago
2 years ago
if len(sys.argv)>1:
typ,st_d,en_d,if_check,split_code_into_howmany_groups_no,split_date_into_howmany_groups = sys.argv[1:]
if_check=int(if_check)
split_code_into_howmany_groups_no = int(split_code_into_howmany_groups_no)
split_date_into_howmany_groups=int(split_date_into_howmany_groups)
else:
typ = 'tick'
st_d = '20211201'
2 years ago
en_d = '20211231'
if_check = True
split_code_into_howmany_groups_no = 20
split_date_into_howmany_groups = 5
2 years ago
logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}_{typ}_{st_d}_{en_d}_{if_check}_{split_code_into_howmany_groups_no}_{split_date_into_howmany_groups}.log",
rotation="10 MB", compression="zip", level="INFO")
2 years ago
logger.warning(
f"Going to run *{typ}* from {st_d} to {en_d} with if_check dupliactes={if_check} in *{running_which_env}*, plz check if this info is correct.\n\n\n\n")
run_pool_dates_by_code_init_n_group(typ=typ, code_gp_amt=split_code_into_howmany_groups_no,
date_gp_amt=split_date_into_howmany_groups, start_date=st_d, end_date=en_d, if_check=if_check, code_dict_by='group')
# run_pool_dates_by_code_init_n_group(typ='mink',group_amount=5)
toc = time.perf_counter()
logger.info(f"Running used {toc - tic:0.4f} seconds")
# all t taks Running used 588.5782 seconds for 10 months
# 600/60=10 min 12min for take code_init
# 12* 71 = 850 min / 60 = 15 hr for all code for each year