|
|
|
from code_list import code_list_pickel
|
|
|
|
from TSLfm import TSLfm
|
|
|
|
from DDBfm import DDBfm
|
|
|
|
import pandas as pd
|
|
|
|
from loguru import logger
|
|
|
|
from os.path import dirname, abspath, join
|
|
|
|
import sys
|
|
|
|
|
|
|
|
running_which_env = 'prd'
|
|
|
|
|
|
|
|
|
|
|
|
ROOT_DIR = abspath(join(dirname(abspath(__file__)), ".."))
|
|
|
|
logger.remove()
|
|
|
|
logger.add(sys.stderr, level="INFO")
|
|
|
|
logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}.log",
|
|
|
|
rotation="10 MB", compression="zip", level="DEBUG")
|
|
|
|
|
|
|
|
|
|
|
|
def run_add_1day_code_init_minKline(date, code_list):
|
|
|
|
"""
|
|
|
|
too slow. depracated.
|
|
|
|
"""
|
|
|
|
ddb = DDBfm(running_which_env)
|
|
|
|
|
|
|
|
code_list_filtered = []
|
|
|
|
for code in code_list:
|
|
|
|
if ddb.search_code_date_in_tb(ddb.ddf_hft_mink_tbname, date, code):
|
|
|
|
logger.warning(f"Possible duplicates on {date} and {code}")
|
|
|
|
else:
|
|
|
|
code_list_filtered.append(code)
|
|
|
|
if len(code_list_filtered) == 0:
|
|
|
|
return 0
|
|
|
|
|
|
|
|
with TSLfm() as tsl:
|
|
|
|
df = tsl.process_result_data_type(
|
|
|
|
tsl.get_mkt_min_k(date, date, code_list_filtered))
|
|
|
|
if not df.empty:
|
|
|
|
logger.info(
|
|
|
|
f'Getting a df of {df.shape}: {code_list[0][:-4]} on {date}')
|
|
|
|
ddb.append_hft_table(ddb.ddf_hft_mink_tbname, df)
|
|
|
|
|
|
|
|
|
|
|
|
def check_if_date_codelist_exists(typ, date, code_list):
|
|
|
|
code_list_filtered = code_list
|
|
|
|
ddb1 = DDBfm(running_which_env)
|
|
|
|
|
|
|
|
if typ == 'tick':
|
|
|
|
tbName = ddb1.ddf_hft_tick_tbname
|
|
|
|
elif typ == 'mink':
|
|
|
|
tbName = ddb1.ddf_hft_mink_tbname
|
|
|
|
|
|
|
|
code_list_filtered = ddb1.get_missing_code_date_in_tb(
|
|
|
|
tbName, date, code_list)
|
|
|
|
|
|
|
|
if code_list_filtered:
|
|
|
|
logger.info(
|
|
|
|
f"Need to download {'+'.join(code_list_filtered)} on {date} in {tbName}")
|
|
|
|
else:
|
|
|
|
logger.info(f"all codes checked in database {tbName} on {date}")
|
|
|
|
ddb1.close_sess()
|
|
|
|
del ddb1
|
|
|
|
return code_list_filtered
|
|
|
|
|
|
|
|
|
|
|
|
def run_pool_add_by_datelist_codeinit(typ, date_list, code_list, if_check=1):
|
|
|
|
df_list = []
|
|
|
|
|
|
|
|
for date in date_list:
|
|
|
|
if if_check:
|
|
|
|
code_list_filtered = check_if_date_codelist_exists(
|
|
|
|
typ, date, code_list)
|
|
|
|
else:
|
|
|
|
code_list_filtered = code_list
|
|
|
|
with TSLfm() as tsl:
|
|
|
|
if typ == 'tick':
|
|
|
|
df = tsl.process_result_data_type(
|
|
|
|
tsl.get_trade_tick(date, date, code_list_filtered))
|
|
|
|
elif typ == 'mink':
|
|
|
|
df = tsl.process_result_data_type(
|
|
|
|
tsl.get_mkt_min_k(date, date, code_list_filtered))
|
|
|
|
if not df.empty:
|
|
|
|
df_list.append(df)
|
|
|
|
if not df_list:
|
|
|
|
return 0
|
|
|
|
df_all = pd.concat(df_list)
|
|
|
|
|
|
|
|
ddb2 = DDBfm(running_which_env, pool=True)
|
|
|
|
logger.info(
|
|
|
|
f'Getting a df of {df_all.shape}: {code_list[0][:-4]} on {"+".join(date_list)}')
|
|
|
|
if typ == 'tick':
|
|
|
|
ddb2.append_pool_hft_table(ddb2.ddf_hft_tick_tbname, df_all)
|
|
|
|
elif typ == 'mink':
|
|
|
|
ddb2.append_pool_hft_table(ddb2.ddf_hft_mink_tbname, df_all)
|
|
|
|
ddb2.close_sess()
|
|
|
|
del ddb2
|
|
|
|
|
|
|
|
|
|
|
|
def run_create_hft_db(date='20221101', if_mink=1, if_tick=1):
|
|
|
|
code_list = ['T2212']
|
|
|
|
ddb = DDBfm(running_which_env)
|
|
|
|
ddb.create_hft_database()
|
|
|
|
|
|
|
|
if if_mink:
|
|
|
|
logger.info(f"creating mink on {date} for {code_list[0]}")
|
|
|
|
with TSLfm() as tsl:
|
|
|
|
df_mink = tsl.process_result_data_type(
|
|
|
|
tsl.get_mkt_min_k(date, date, code_list))
|
|
|
|
# print(df)
|
|
|
|
ddb.create_hft_table(ddb.ddf_hft_mink_tbname, df_mink)
|
|
|
|
|
|
|
|
if if_tick:
|
|
|
|
logger.info(f"creating tick on {date} for {code_list[0]}")
|
|
|
|
with TSLfm() as tsl:
|
|
|
|
df_tick = tsl.process_result_data_type(
|
|
|
|
tsl.get_trade_tick(date, date, code_list))
|
|
|
|
# print(df)
|
|
|
|
|
|
|
|
ddb.create_hft_table(ddb.ddf_hft_tick_tbname, df_tick)
|
|
|
|
|
|
|
|
|
|
|
|
def run():
|
|
|
|
"""
|
|
|
|
too slow. depracated.
|
|
|
|
"""
|
|
|
|
all_code_dict_by_init = {}
|
|
|
|
for c in code_list_pickel:
|
|
|
|
init = c[:-4]
|
|
|
|
if init in all_code_dict_by_init:
|
|
|
|
all_code_dict_by_init[init].append(c)
|
|
|
|
else:
|
|
|
|
all_code_dict_by_init[init] = [c]
|
|
|
|
|
|
|
|
# print(all_code_dict_by_init)
|
|
|
|
|
|
|
|
start_date = '2022-09-30'
|
|
|
|
end_date = '2022-10-31'
|
|
|
|
allDates = pd.date_range(start_date, end_date, freq='D')
|
|
|
|
allDates = [i.replace('-', '') for i in list(allDates.astype('str'))]
|
|
|
|
|
|
|
|
for date in allDates:
|
|
|
|
for ind, code_init in enumerate(all_code_dict_by_init):
|
|
|
|
logger.info(f"Getting {code_init} (no.{ind})")
|
|
|
|
code_list = all_code_dict_by_init[code_init]
|
|
|
|
|
|
|
|
run_add_1day_code_init_minKline(date, code_list)
|
|
|
|
|
|
|
|
|
|
|
|
def run_pool_dates_by_code_init_n_group(typ='mink', code_gp_amt=10, date_gp_amt=10, start_date='20220101', end_date='20221031', if_check=1, code_dict_by='init'):
|
|
|
|
logger.info("Running run_pool_dates_by_group")
|
|
|
|
|
|
|
|
all_code_dict_by_init = {}
|
|
|
|
for c in code_list_pickel:
|
|
|
|
init = c[:-4]
|
|
|
|
if init in all_code_dict_by_init:
|
|
|
|
all_code_dict_by_init[init].append(c)
|
|
|
|
else:
|
|
|
|
all_code_dict_by_init[init] = [c]
|
|
|
|
|
|
|
|
if code_dict_by == 'init':
|
|
|
|
all_code_dict = all_code_dict_by_init
|
|
|
|
if code_dict_by == 'group':
|
|
|
|
all_code_dict_by_group_no = {}
|
|
|
|
for ind, code_init in enumerate(sorted(all_code_dict_by_init)):
|
|
|
|
group_no = ind % code_gp_amt
|
|
|
|
if group_no not in all_code_dict_by_group_no:
|
|
|
|
all_code_dict_by_group_no[group_no] = all_code_dict_by_init[code_init]
|
|
|
|
else:
|
|
|
|
all_code_dict_by_group_no[group_no] += all_code_dict_by_init[code_init]
|
|
|
|
all_code_dict = all_code_dict_by_group_no
|
|
|
|
|
|
|
|
allDates = pd.date_range(start_date, end_date, freq='D')
|
|
|
|
dates_dict_by_day = {}
|
|
|
|
|
|
|
|
for d in list(allDates.astype('str')):
|
|
|
|
group_no = int(d[-2:]) % date_gp_amt
|
|
|
|
if group_no not in dates_dict_by_day:
|
|
|
|
dates_dict_by_day[group_no] = [d.replace('-', '')]
|
|
|
|
else:
|
|
|
|
dates_dict_by_day[group_no].append(d.replace('-', ''))
|
|
|
|
|
|
|
|
logger.debug(dates_dict_by_day)
|
|
|
|
|
|
|
|
for group_no in dates_dict_by_day:
|
|
|
|
date_list = dates_dict_by_day[group_no]
|
|
|
|
num_of_code_group = len(all_code_dict)
|
|
|
|
for ind, code_init in enumerate(all_code_dict):
|
|
|
|
# done: 'T','TS','TS','TF'
|
|
|
|
# if code_init in ['T']: # todo filtered this ,,'TF', 'IC','IF','IH','IM'
|
|
|
|
logger.info(
|
|
|
|
f"Getting {code_init} (no.{ind}/{num_of_code_group} of date_group {group_no}/{date_gp_amt})")
|
|
|
|
code_list = all_code_dict[code_init]
|
|
|
|
if typ == 'mink':
|
|
|
|
# logger.info('Running mink')
|
|
|
|
print(date_list)
|
|
|
|
print(code_list)
|
|
|
|
run_pool_add_by_datelist_codeinit(
|
|
|
|
'mink', date_list, code_list, if_check)
|
|
|
|
|
|
|
|
# run_pool_add_byday_code_init_minKline(date_list,code_list)
|
|
|
|
elif typ == 'tick':
|
|
|
|
logger.info('Running tick')
|
|
|
|
run_pool_add_by_datelist_codeinit(
|
|
|
|
'tick', date_list, code_list, if_check)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
import time
|
|
|
|
|
|
|
|
# run_create_hft_db() # including two tables
|
|
|
|
|
|
|
|
tic = time.perf_counter()
|
|
|
|
|
|
|
|
typ = 'mink'
|
|
|
|
st_d = '20221102'
|
|
|
|
en_d = '20221103'
|
|
|
|
if_check = 1
|
|
|
|
split_code_into_howmany_groups_no = 10
|
|
|
|
split_date_into_howmany_groups = 5
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
f"Going to run {typ} from {st_d} to {en_d} with if_check dupliactes={if_check} in {running_which_env}, plz check if this info is correct.\n\n\n\n")
|
|
|
|
run_pool_dates_by_code_init_n_group(typ=typ, code_gp_amt=split_code_into_howmany_groups_no,
|
|
|
|
date_gp_amt=split_date_into_howmany_groups, start_date=st_d, end_date=en_d, if_check=if_check, code_dict_by='group')
|
|
|
|
# run_pool_dates_by_code_init_n_group(typ='mink',group_amount=5)
|
|
|
|
|
|
|
|
toc = time.perf_counter()
|
|
|
|
|
|
|
|
logger.info(f"Running used {toc - tic:0.4f} seconds")
|
|
|
|
|
|
|
|
# all t taks Running used 588.5782 seconds for 10 months
|
|
|
|
# 600/60=10 min 12min for take code_init
|
|
|
|
# 12* 71 = 850 min / 60 = 15 hr for all code for each year
|