You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

232 lines
7.5 KiB

2 years ago
import sys
2 years ago
running_which_env='dev'
2 years ago
2 years ago
from os.path import dirname, abspath, join
ROOT_DIR = abspath(join(dirname(abspath(__file__)), ".."))
from loguru import logger
logger.remove()
logger.add(sys.stderr, level="INFO")
logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}.log", rotation="10 MB", compression="zip", level="DEBUG")
2 years ago
import pandas as pd
2 years ago
from DDBfm import DDBfm
from TSLfm import TSLfm
2 years ago
from code_list import code_list_pickel
2 years ago
2 years ago
def run_add_1day_code_init_minKline(date,code_list):
ddb = DDBfm(running_which_env)
2 years ago
code_list_filtered = []
2 years ago
for code in code_list:
if ddb.search_code_date_in_tb(ddb.ddf_hft_mink_tbname,date,code):
logger.warning(f"Possible duplicates on {date} and {code}")
else:
code_list_filtered.append(code)
if len(code_list_filtered)==0:
return 0
with TSLfm() as tsl:
df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list_filtered))
if not df.empty:
logger.info(f'Getting a df of {df.shape}: {code_list[0][:-4]} on {date}')
ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df)
2 years ago
# def run_pool_add_byday_code_init_minKline(date_list,code_list):
# df_list=[]
# code_list_filtered=code_list
# ddb1 = DDBfm(running_which_env)
# tb=ddb1.load_tb(tableName=ddb1.ddf_hft_mink_tbname)
# # tb=ddb1.sess.loadTable(dbPath=ddb1.ddb_hft_dbPath, tableName=ddb1.ddf_hft_mink_tbname)
# for date in date_list:
# with TSLfm() as tsl:
# df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list))
# if df.empty:
# continue
# code_list_filtered = ddb1.get_missing_code_date_in_tb(tb,date,code_list)
# if len(code_list_filtered)==0:
# continue
# logger.info(f"getting {'+'.join(code_list_filtered)} on {date}")
# df=df[df['code'].isin(code_list_filtered)]
# df_list.append(df)
# ddb1.close_sess()
# del ddb1
# if df_list:
# df_all = pd.concat(df_list)
# ddb2 = DDBfm(running_which_env,pool=True)
# logger.info(f'Getting a df of {df_all.shape}: {code_list[0][:-4]} on {"+".join(date_list)}')
# ddb2.append_pool_hft_table(ddb2.ddf_hft_mink_tbname,df_all)
# ddb2.clear_pool()
# del ddb2
2 years ago
def check_if_date_codelist_exists(typ,date,code_list):
code_list_filtered=code_list
ddb1 = DDBfm(running_which_env)
2 years ago
if typ=='tick':
tbName = ddb1.ddf_hft_tick_tbname
elif typ=='mink':
tbName = ddb1.ddf_hft_mink_tbname
code_list_filtered = ddb1.get_missing_code_date_in_tb(tbName,date,code_list)
if code_list_filtered:
2 years ago
logger.info(f"Need to download {'+'.join(code_list_filtered)} on {date} in {tbName}")
else:
2 years ago
logger.info(f"all codes checked in database {tbName} on {date}")
ddb1.close_sess()
del ddb1
return code_list_filtered
2 years ago
def run_pool_add_by_datelist_codeinit(typ,date_list,code_list,if_check=1):
df_list=[]
for date in date_list:
if if_check:
2 years ago
code_list_filtered = check_if_date_codelist_exists(typ,date,code_list)
else:
code_list_filtered = code_list
with TSLfm() as tsl:
2 years ago
if typ == 'tick':
df = tsl.process_result_data_type(tsl.get_trade_tick(date,date,code_list_filtered))
elif typ == 'mink':
df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list_filtered))
if not df.empty:
df_list.append(df)
2 years ago
if not df_list:
return 0
df_all = pd.concat(df_list)
ddb2 = DDBfm(running_which_env,pool=True)
logger.info(f'Getting a df of {df_all.shape}: {code_list[0][:-4]} on {"+".join(date_list)}')
ddb2.append_pool_hft_table(ddb2.ddf_hft_tick_tbname,df_all)
ddb2.close_sess()
del ddb2
def run_create_hft_db(date = '20221101'):
ddb = DDBfm(running_which_env)
ddb.create_hft_database()
2 years ago
with TSLfm() as tsl:
code_list=['T2212']
df_mink = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list))
2 years ago
# print(df)
ddb.create_hft_table(ddb.ddf_hft_mink_tbname,df_mink)
with TSLfm() as tsl:
code_list=['T2212']
df_tick = tsl.process_result_data_type(tsl.get_trade_tick(date,date,code_list))
# print(df)
ddb.create_hft_table(ddb.ddf_hft_tick_tbname,df_tick)
2 years ago
def run():
all_code_dict_by_init={}
for c in code_list_pickel:
init = c[:-4]
if init in all_code_dict_by_init:
all_code_dict_by_init[init].append(c)
else:
all_code_dict_by_init[init]=[c]
# print(all_code_dict_by_init)
start_date='2022-09-30'
end_date='2022-10-31'
2 years ago
allDates = pd.date_range(start_date, end_date, freq ='D')
allDates = [i.replace('-','') for i in list(allDates.astype('str'))]
for date in allDates:
for ind,code_init in enumerate(all_code_dict_by_init):
logger.info(f"Getting {code_init} (no.{ind})")
2 years ago
code_list = all_code_dict_by_init[code_init]
run_add_1day_code_init_minKline(date,code_list)
def run_pool_dates_by_code_init_n_group(typ='mink',gp_amt=10,start_date='20220101',end_date='20221031',if_check=1):
logger.info("Running run_pool_dates_by_group")
all_code_dict_by_init={}
for c in code_list_pickel:
init = c[:-4]
if init in all_code_dict_by_init:
all_code_dict_by_init[init].append(c)
else:
all_code_dict_by_init[init]=[c]
# print(all_code_dict_by_init)
allDates = pd.date_range(start_date, end_date, freq ='D')
dates_dict_by_day={}
for d in list(allDates.astype('str')):
group_no = int(d[-2:])%gp_amt
if group_no not in dates_dict_by_day:
dates_dict_by_day[group_no] = [d.replace('-','')]
else:
dates_dict_by_day[group_no].append(d.replace('-',''))
logger.debug(dates_dict_by_day)
for group_no in dates_dict_by_day:
date_list=dates_dict_by_day[group_no]
num_of_init = len(all_code_dict_by_init)
for ind,code_init in enumerate(all_code_dict_by_init):
# done: 'T','TS','TS','TF'
# if code_init in ['T']: # todo filtered this ,,'TF', 'IC','IF','IH','IM'
logger.info(f"Getting {code_init} (no.{ind}/{num_of_init} of group {group_no}/{gp_amt})")
code_list = all_code_dict_by_init[code_init]
if typ=='mink':
# logger.info('Running mink')
2 years ago
run_pool_add_by_datelist_codeinit('mink',date_list,code_list,if_check)
# run_pool_add_byday_code_init_minKline(date_list,code_list)
elif typ=='tick':
logger.info('Running tick')
2 years ago
run_pool_add_by_datelist_codeinit('tick',date_list,code_list,if_check)
2 years ago
if __name__ == '__main__':
2 years ago
import time
2 years ago
# run_create_hft_db() # including two tables
tic = time.perf_counter()
2 years ago
2 years ago
typ='mink'
st_d='20221101'
en_d='20221102'
if_check = 1
logger.info(f"Going to run {typ} from {st_d} to {en_d} with if_check dupliactes={if_check} in {running_which_env}, plz check if this info is correct.\n\n\n\n")
2 years ago
run_pool_dates_by_code_init_n_group(typ=typ,gp_amt=3,start_date=st_d,end_date=en_d,if_check=if_check)
# run_pool_dates_by_code_init_n_group(typ='mink',group_amount=5)
toc = time.perf_counter()
logger.info(f"Running used {toc - tic:0.4f} seconds")
# all t taks Running used 588.5782 seconds for 10 months
# 600/60=10 min 12min for take code_init
# 12* 71 = 850 min / 60 = 15 hr for all code for each year