update getting index line

main^2
yzlocal 2 years ago
parent 155851bc18
commit c79fb69145

@ -1,5 +1,5 @@
all_fm_init_curr=['sc', 'v', 'TS', 'MA', 'AP', 'jm', 'bc', 'bb', 'fu', 'IM', 'IF', 'a', 'lu', 'FG', 'cu', 'al', 'IH', 'RS', 'pg', 'CF', 'SF', 'ni', 'hc', 'UR', 'm', 'SR', 'j', 'PF', 'RM', 'T', 'c', 'JR', 'l', 'p', 'sp', 'CY', 'pb', 'TF', 'b', 'eg', 'rb', 'PK', 'sn', 'nr', 'pp', 'CJ', 'eb', 'SA', 'y', 'RI', 'lh', 'jd', 'OI', 'WH', 'ss', 'ru', 'zn', 'fb', 'rr', 'PM', 'au', 'TA', 'ZC', 'IC', 'bu', 'SM', 'wr', 'cs', 'LR', 'ag', 'i'] all_fm_init_curr=['sc', 'v', 'TS', 'MA', 'AP', 'jm', 'bc', 'bb', 'fu', 'IM', 'IF', 'a', 'lu', 'FG', 'cu', 'al', 'IH', 'RS', 'pg', 'CF', 'SF', 'ni', 'hc', 'UR', 'm', 'SR', 'j', 'PF', 'RM', 'T', 'c', 'JR', 'l', 'p', 'sp', 'CY', 'pb', 'TF', 'b', 'eg', 'rb', 'PK', 'sn', 'nr', 'pp', 'CJ', 'eb', 'SA', 'y', 'RI', 'lh', 'jd', 'OI', 'WH', 'ss', 'ru', 'zn', 'fb', 'rr', 'PM', 'au', 'TA', 'ZC', 'IC', 'bu', 'SM', 'wr', 'cs', 'LR', 'ag', 'i']
tsl_ind_codeinit_mapping={'ICInd': 'IC', 'IFInd': 'IF', 'IHInd': 'IH', 'IMInd': 'IM', 'TInd': 'T', 'TFInd': 'TF', 'TSInd': 'TS', 'QI000001': 'al', 'QI000002': 'au', 'QI000003': 'cu', 'QI000004': 'fu', 'QI000005': 'ru', 'QI000006': 'zn', 'QI000007': 'CF', 'QI000009': 'RO', 'QI000010': 'SR', 'QI000011': 'TA', 'QI000012': 'WS', 'QI000013': 'WT', 'QI000014': 'a', 'QI000015': 'b', 'QI000016': 'c', 'QI000017': 'l', 'QI000018': 'm', 'QI000019': 'p', 'QI000020': 'y', 'QI000021': 'ER', 'QI000022': 'wr', 'QI000023': 'rb', 'QI000024': 'v', 'QI000025': 'pb', 'QI000026': 'j', 'QI000027': 'ME', 'QI000028': 'ag', 'QI000029': 'FG', 'QI000030': 'RS', 'QI000031': 'RM', 'QI000032': 'jm', 'QI000033': 'bu', 'QI000034': 'i', 'QI000035': 'ZC', 'QI000036': 'fb', 'QI000037': 'bb', 'QI000038': 'JR', 'QI000039': 'jd', 'QI000040': 'hc', 'QI000041': 'pp', 'QI000042': 'LR', 'QI000043': 'SF', 'QI000044': 'SM', 'QI000045': 'cs', 'QI000046': 'ni', 'QI000047': 'sn', 'QI000048': 'CY', 'QI000049': 'AP', 'QI000050': 'sc', 'QI000051': 'sp', 'QI000052': 'eg', 'QI000053': 'CJ', 'QI000054': 'nr', 'QI000055': 'UR', 'QI000056': 'rr', 'QI000057': 'ss', 'QI000058': 'eb', 'QI000059': 'SA', 'QI000060': 'pg', 'QI000061': 'lu', 'QI000062': 'PF', 'QI000063': 'bc', 'QI000064': 'lh', 'QI000065': 'PK'}
code_init_case_mapping={'ap': 'AP', code_init_case_mapping={'ap': 'AP',
'fg': 'FG', 'fg': 'FG',
'rm': 'RM', 'rm': 'RM',

@ -1,12 +1,13 @@
from code_list_all_pkl import code_list as code_list_pickel_from_file, all_fm_init_curr from code_list_all_pkl import code_list as code_list_pickel_from_file, all_fm_init_curr,tsl_ind_codeinit_mapping
from TSLfm import TSLfm from TSLfm import TSLfm
from DDBfm import DDBfm from DDBfm import DDBfm
import pandas as pd import pandas as pd
import numpy as np
from loguru import logger from loguru import logger
from os.path import dirname, abspath, join from os.path import dirname, abspath, join
import sys import sys
running_which_env = 'prd' running_which_env = 'dev'
def run_add_1day_code_init_minKline(date, code_list): def run_add_1day_code_init_minKline(date, code_list):
@ -32,6 +33,12 @@ def run_add_1day_code_init_minKline(date, code_list):
f'Getting a df of {df.shape}: {"+".join(code_list)} on {date}') f'Getting a df of {df.shape}: {"+".join(code_list)} on {date}')
ddb.append_hft_table(ddb.ddf_hft_mink_tbname, df) ddb.append_hft_table(ddb.ddf_hft_mink_tbname, df)
def map_code_tsl_to_9999indcode(code_tsl):
if code_tsl in tsl_ind_codeinit_mapping:
return tsl_ind_codeinit_mapping[code_tsl]+'9999'
else:
logger.error(f'TSL code didnt find mapping for {code_tsl}')
return np.nan
def check_if_date_codelist_exists(typ, date, code_list): def check_if_date_codelist_exists(typ, date, code_list):
code_list_filtered=[] code_list_filtered=[]
@ -47,7 +54,7 @@ def check_if_date_codelist_exists(typ, date, code_list):
if typ == 'tick': if typ == 'tick':
tbName = ddb1.ddf_hft_tick_tbname tbName = ddb1.ddf_hft_tick_tbname
elif typ == 'mink': elif typ in ['mink','indl']:
tbName = ddb1.ddf_hft_mink_tbname tbName = ddb1.ddf_hft_mink_tbname
code_list_filtered = ddb1.get_missing_code_date_in_tb( code_list_filtered = ddb1.get_missing_code_date_in_tb(
@ -64,15 +71,26 @@ def check_if_date_codelist_exists(typ, date, code_list):
def run_pool_add_by_datelist_codeinit(typ, date_list, code_list, if_check=1): def run_pool_add_by_datelist_codeinit(typ, date_list, code_list, if_check=1):
df_list = [] df_list = []
if typ=='indl':
code_list_filtered = [] # real code in ddb
code_list_tsl_code=[]
for c in code_list:
code_list_filtered.append(map_code_tsl_to_9999indcode(c))
code_list_tsl_code.append(c)
else:
code_list_filtered= code_list
for date in date_list: for date in date_list:
if if_check: if if_check:
code_list_filtered = check_if_date_codelist_exists( code_list_filtered = check_if_date_codelist_exists(
typ, date, code_list) typ, date, code_list_filtered)
else:
code_list_filtered = code_list
with TSLfm() as tsl: with TSLfm() as tsl:
if typ == 'tick': if typ == 'tick':
df = tsl.process_result_data_type( df = tsl.process_result_data_type(
@ -80,6 +98,15 @@ def run_pool_add_by_datelist_codeinit(typ, date_list, code_list, if_check=1):
elif typ == 'mink': elif typ == 'mink':
df = tsl.process_result_data_type( df = tsl.process_result_data_type(
tsl.get_mkt_min_k(date, date, code_list_filtered)) tsl.get_mkt_min_k(date, date, code_list_filtered))
elif typ == 'indl':
df = tsl.process_result_data_type(
tsl.get_mkt_min_k(date, date, code_list_tsl_code))
if typ == 'indl' and not df.empty:
# df.rename(columns={'code':'code_tsl'},inplace=True)
df['code']=df['code'].apply(map_code_tsl_to_9999indcode)
df['code_init']=df['code'].apply(lambda x: x[:-4])
# df.drop('code_tsl',axis=1,inplace=True)
if not df.empty: if not df.empty:
df_list.append(df) df_list.append(df)
if not df_list: if not df_list:
@ -91,7 +118,7 @@ def run_pool_add_by_datelist_codeinit(typ, date_list, code_list, if_check=1):
f'Getting a df of {df_all.shape}: {code_list[0][:-4]} on {"+".join(date_list)}') f'Getting a df of {df_all.shape}: {code_list[0][:-4]} on {"+".join(date_list)}')
if typ == 'tick': if typ == 'tick':
ddb2.append_pool_hft_table(ddb2.ddf_hft_tick_tbname, df_all) ddb2.append_pool_hft_table(ddb2.ddf_hft_tick_tbname, df_all)
elif typ == 'mink': elif typ in ['mink','indl']:
ddb2.append_pool_hft_table(ddb2.ddf_hft_mink_tbname, df_all) ddb2.append_pool_hft_table(ddb2.ddf_hft_mink_tbname, df_all)
ddb2.close_sess() ddb2.close_sess()
del ddb2 del ddb2
@ -149,32 +176,46 @@ def run():
def run_pool_dates_by_code_init_n_group(typ='mink', code_gp_amt=10, date_gp_amt=10, start_date='20220101', end_date='20221031', if_check=1, code_dict_by='init'): def run_pool_dates_by_code_init_n_group(typ='mink', code_gp_amt=10, date_gp_amt=10, start_date='20220101', end_date='20221031', if_check=1, code_dict_by='init'):
logger.info("Running run_pool_dates_by_group") logger.info("Running run_pool_dates_by_group")
code_list_pickel=code_list_pickel_from_file
code_list_pickel=sorted(list(set(code_list_pickel)))
all_code_dict_by_init = {}
for c in code_list_pickel:
if c[-4:] < start_date[2:6]:
continue
if c[:-4] in all_fm_init_curr: #todo code that's not in curr is not counted!!! len: s6
init = c[:-4]
if init in all_code_dict_by_init:
all_code_dict_by_init[init].append(c)
else:
all_code_dict_by_init[init] = [c]
else:
logger.warning("There's unrecognized code init!!!!")
if code_dict_by == 'init': if typ in ['mink','tick']:
all_code_dict = all_code_dict_by_init code_list_pickel=code_list_pickel_from_file
if code_dict_by == 'group': code_list_pickel=sorted(list(set(code_list_pickel)))
all_code_dict_by_group_no = {} all_code_dict_by_init = {}
for ind, code_init in enumerate(sorted(all_code_dict_by_init)): for c in code_list_pickel:
group_no = ind % code_gp_amt if c[-4:] < start_date[2:6]:
if group_no not in all_code_dict_by_group_no: continue
all_code_dict_by_group_no[group_no] = all_code_dict_by_init[code_init] if c[:-4] in all_fm_init_curr: #todo code that's not in curr is not counted!!! len: s6
init = c[:-4]
if init in all_code_dict_by_init:
all_code_dict_by_init[init].append(c)
else:
all_code_dict_by_init[init] = [c]
else: else:
all_code_dict_by_group_no[group_no] += all_code_dict_by_init[code_init] logger.warning("There's unrecognized code init!!!!")
all_code_dict = all_code_dict_by_group_no if code_dict_by == 'init':
all_code_dict = all_code_dict_by_init
if code_dict_by == 'group':
all_code_dict_by_group_no = {}
for ind, code_init in enumerate(sorted(all_code_dict_by_init)):
group_no = ind % code_gp_amt
if group_no not in all_code_dict_by_group_no:
all_code_dict_by_group_no[group_no] = all_code_dict_by_init[code_init]
else:
all_code_dict_by_group_no[group_no] += all_code_dict_by_init[code_init]
all_code_dict = all_code_dict_by_group_no
elif typ in ['indl']:
tsl_ind_code_list = []
real_code_init_list = []
all_code_dict={}
for c in sorted(tsl_ind_codeinit_mapping):
if tsl_ind_codeinit_mapping[c] in all_fm_init_curr:
tsl_ind_code_list.append(c)
real_code_init_list.append(tsl_ind_codeinit_mapping[c])
all_code_dict[tsl_ind_codeinit_mapping[c]]=[c]
else:
logger.warning(f"There's unrecognized code init: {tsl_ind_codeinit_mapping[c]}!!!!")
allDates = pd.date_range(start_date, end_date, freq='D') allDates = pd.date_range(start_date, end_date, freq='D')
dates_dict_by_day = {} dates_dict_by_day = {}
@ -199,16 +240,9 @@ def run_pool_dates_by_code_init_n_group(typ='mink', code_gp_amt=10, date_gp_amt=
code_list = all_code_dict[code_init] code_list = all_code_dict[code_init]
logger.info(date_list) logger.info(date_list)
logger.info(code_list) logger.info(code_list)
if typ == 'mink': run_pool_add_by_datelist_codeinit(
# logger.info('Running mink') typ, date_list, code_list, if_check)
run_pool_add_by_datelist_codeinit(
'mink', date_list, code_list, if_check)
# run_pool_add_byday_code_init_minKline(date_list,code_list)
elif typ == 'tick':
logger.info('Running tick')
run_pool_add_by_datelist_codeinit(
'tick', date_list, code_list, if_check)
if __name__ == '__main__': if __name__ == '__main__':
@ -231,12 +265,12 @@ if __name__ == '__main__':
split_code_into_howmany_groups_no = int(split_code_into_howmany_groups_no) split_code_into_howmany_groups_no = int(split_code_into_howmany_groups_no)
split_date_into_howmany_groups=int(split_date_into_howmany_groups) split_date_into_howmany_groups=int(split_date_into_howmany_groups)
else: else:
typ = 'tick' typ = 'indl'
st_d = '20211201' st_d = '20221101'
en_d = '20211231' en_d = '20221101'
if_check = True if_check = True
split_code_into_howmany_groups_no = 20 split_code_into_howmany_groups_no = 1 # how many in 1 gp
split_date_into_howmany_groups = 5 split_date_into_howmany_groups = 1
logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}_{typ}_{st_d}_{en_d}_{if_check}_{split_code_into_howmany_groups_no}_{split_date_into_howmany_groups}.log", logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}_{typ}_{st_d}_{en_d}_{if_check}_{split_code_into_howmany_groups_no}_{split_date_into_howmany_groups}.log",

@ -6,7 +6,7 @@ from loguru import logger
from os.path import dirname, abspath, join from os.path import dirname, abspath, join
import sys import sys
running_which_env = 'prd' running_which_env = 'dev'
def get_code_init_og_code(code): def get_code_init_og_code(code):
@ -21,13 +21,13 @@ def process_zl_cl_data(df_zl, df_cl):
df_zl.rename(columns={0: 'code_dom'}, inplace=True) df_zl.rename(columns={0: 'code_dom'}, inplace=True)
df_zl['code_dom'] = df_zl['code_dom'].apply(get_code_init_og_code) df_zl['code_dom'] = df_zl['code_dom'].apply(get_code_init_og_code)
df_zl['code_init'] = df_zl['code_dom'].apply(lambda x: x[:-4]) df_zl['code_init'] = df_zl['code_dom'].apply(lambda x: x[:-4])
# print(df_zl) print(df_zl)
df_cl.dropna(inplace=True) df_cl.dropna(inplace=True)
df_cl.rename(columns={0: 'code_cont'}, inplace=True) df_cl.rename(columns={0: 'code_cont'}, inplace=True)
df_cl['code_cont'] = df_cl['code_cont'].apply(get_code_init_og_code) df_cl['code_cont'] = df_cl['code_cont'].apply(get_code_init_og_code)
df_cl['code_init'] = df_cl['code_cont'].apply(lambda x: x[:-4]) df_cl['code_init'] = df_cl['code_cont'].apply(lambda x: x[:-4])
# print(df_cl) print(df_cl)
df_cl_new_list = [] df_cl_new_list = []
for ci, cigp in df_cl.groupby("code_init"): for ci, cigp in df_cl.groupby("code_init"):
@ -44,6 +44,8 @@ def process_zl_cl_data(df_zl, df_cl):
for i, cont in enumerate(all_cont): for i, cont in enumerate(all_cont):
df_cont.loc[0, cont_ind_list[i]] = cont df_cont.loc[0, cont_ind_list[i]] = cont
df_cl_new_list.append(df_cont) df_cl_new_list.append(df_cont)
if not df_cl_new_list:
return pd.DataFrame()
df_cl_new = pd.concat(df_cl_new_list) df_cl_new = pd.concat(df_cl_new_list)
# print(df_cl_new) # print(df_cl_new)
return pd.merge(df_zl, df_cl_new, on='code_init') return pd.merge(df_zl, df_cl_new, on='code_init')
@ -137,26 +139,35 @@ def run_pool_append_zl_table_in_db(start_date, end_date, if_check=True):
if __name__ == '__main__': if __name__ == '__main__':
import time
typ = 'dailydom'
st_d = '20220101'
en_d = '20221031'
if_check = False
ROOT_DIR = abspath(join(dirname(abspath(__file__)), "..")) ROOT_DIR = abspath(join(dirname(abspath(__file__)), ".."))
logger.remove() logger.remove()
logger.add(sys.stderr, level="INFO") logger.add(sys.stderr, level="INFO")
logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}_{typ}_{st_d}_{en_d}_{if_check}.log", logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}.log",
rotation="10 MB", compression="zip", level="DEBUG") rotation="10 MB", compression="zip", level="INFO")
for i in range(0,21):
logger.warning( typ = 'dailydom'
f"Going to run *{typ}* from {st_d} to {en_d} with if_check dupliactes={if_check} in *{running_which_env}*, plz check if this info is correct.\n\n\n\n") # st_d = '20220101'
# en_d = '20221031'
st_d = str(2000+i)+'0101'
en_d = str(2000+i)+'1231'
if_check = False
import time logger.warning(
tic = time.perf_counter() f"Going to run *{typ}* from {st_d} to {en_d} with if_check dupliactes={if_check} in *{running_which_env}*, plz check if this info is correct.\n\n\n\n")
tic = time.perf_counter()
# run_create_zl_table_in_db()
run_pool_append_zl_table_in_db(
start_date=st_d, end_date=en_d, if_check=if_check)
# run_create_zl_table_in_db() toc = time.perf_counter()
run_pool_append_zl_table_in_db( logger.info(f"Running used {toc - tic:0.4f} seconds")
start_date=st_d, end_date=en_d, if_check=if_check)
toc = time.perf_counter() time.sleep(10)
logger.info(f"Running used {toc - tic:0.4f} seconds")

@ -23,7 +23,7 @@ select count(*) from loadTable("dfs://hft_futuremarket_ts", "DailyFutureInfoPart
pt=loadTable("dfs://daily_stock_ts","daily_kline") pt=loadTable("dfs://daily_stock_ts","daily_kline")
select top 400 * from pt where code_init=`T select top 400 * from pt where code=`IC9999
schema(pt) schema(pt)
select count(*) from pt select count(*) from pt

Loading…
Cancel
Save