commit
4d2fb39bd7
File diff suppressed because one or more lines are too long
@ -0,0 +1,173 @@
|
|||||||
|
from code_list_all_pkl import code_list as code_list_pickel_from_file, code_init_case_mapping, all_fm_init_curr
|
||||||
|
from TSLfm import TSLfm
|
||||||
|
from DDBfm import DDBfm
|
||||||
|
import pandas as pd
|
||||||
|
from loguru import logger
|
||||||
|
from os.path import dirname, abspath, join
|
||||||
|
import sys
|
||||||
|
|
||||||
|
running_which_env = 'dev'
|
||||||
|
|
||||||
|
|
||||||
|
def get_code_init_og_code(code):
|
||||||
|
if not code:
|
||||||
|
print('?')
|
||||||
|
code_init_lower = code[:-4]
|
||||||
|
return code_init_case_mapping[code_init_lower]+code[-4:]
|
||||||
|
|
||||||
|
|
||||||
|
def process_zl_cl_data(df_zl, df_cl):
|
||||||
|
df_zl.dropna(inplace=True)
|
||||||
|
df_zl.rename(columns={0: 'code_dom'}, inplace=True)
|
||||||
|
df_zl['code_dom'] = df_zl['code_dom'].apply(get_code_init_og_code)
|
||||||
|
df_zl['code_init'] = df_zl['code_dom'].apply(lambda x: x[:-4])
|
||||||
|
print(df_zl)
|
||||||
|
|
||||||
|
df_cl.dropna(inplace=True)
|
||||||
|
df_cl.rename(columns={0: 'code_cont'}, inplace=True)
|
||||||
|
df_cl['code_cont'] = df_cl['code_cont'].apply(get_code_init_og_code)
|
||||||
|
df_cl['code_init'] = df_cl['code_cont'].apply(lambda x: x[:-4])
|
||||||
|
print(df_cl)
|
||||||
|
|
||||||
|
df_cl_new_list = []
|
||||||
|
for ci, cigp in df_cl.groupby("code_init"):
|
||||||
|
if ci not in all_fm_init_curr:
|
||||||
|
logger.warning("There's unrecognized code init!!!!")
|
||||||
|
continue
|
||||||
|
|
||||||
|
cont_ind_list = ['code_cont', 'code_cont1',
|
||||||
|
'code_cont2', 'code_cont3', 'code_cont4']
|
||||||
|
df_cont = pd.DataFrame([], columns=['code_init']+cont_ind_list)
|
||||||
|
df_cont.loc[0, 'code_init'] = ci
|
||||||
|
all_cont = sorted(list(cigp['code_cont']))
|
||||||
|
|
||||||
|
for i, cont in enumerate(all_cont):
|
||||||
|
df_cont.loc[0, cont_ind_list[i]] = cont
|
||||||
|
df_cl_new_list.append(df_cont)
|
||||||
|
if not df_cl_new_list:
|
||||||
|
return pd.DataFrame()
|
||||||
|
df_cl_new = pd.concat(df_cl_new_list)
|
||||||
|
# print(df_cl_new)
|
||||||
|
return pd.merge(df_zl, df_cl_new, on='code_init')
|
||||||
|
|
||||||
|
|
||||||
|
def get_zl_cl_df_by_date(start_date, end_date, date_gp_amt=10):
|
||||||
|
|
||||||
|
date_list = []
|
||||||
|
allDates = pd.date_range(start_date, end_date, freq='D')
|
||||||
|
for d in allDates.astype('str'):
|
||||||
|
date_list.append(d.replace('-', ''))
|
||||||
|
|
||||||
|
df_list = []
|
||||||
|
|
||||||
|
date_list_group = {}
|
||||||
|
|
||||||
|
for ind, date in enumerate(date_list):
|
||||||
|
gp_no = int(ind/date_gp_amt)
|
||||||
|
if gp_no not in date_list_group:
|
||||||
|
date_list_group[gp_no] = [date]
|
||||||
|
else:
|
||||||
|
date_list_group[gp_no].append(date)
|
||||||
|
print(date_list_group)
|
||||||
|
for gp_no in date_list_group:
|
||||||
|
date_list = date_list_group[gp_no]
|
||||||
|
with TSLfm() as tsl:
|
||||||
|
for ind, date in enumerate(date_list):
|
||||||
|
df_zl = tsl.get_zl_code_list(date)
|
||||||
|
df_cl = tsl.get_cont_code_list(date)
|
||||||
|
|
||||||
|
if df_cl.empty and df_zl.empty:
|
||||||
|
continue
|
||||||
|
df = process_zl_cl_data(df_zl, df_cl)
|
||||||
|
df['m_nDate'] = pd.to_datetime(date, format='%Y%m%d')
|
||||||
|
|
||||||
|
df_list.append(df)
|
||||||
|
|
||||||
|
df_all = pd.concat(df_list)
|
||||||
|
logger.debug(f'getting zl+cl data of \n{df_all}')
|
||||||
|
return df_all
|
||||||
|
|
||||||
|
|
||||||
|
def check_if_date_codeinitlist_exists(date, code_init_list):
|
||||||
|
code_init_list_filtered = []
|
||||||
|
for ci in code_init_list:
|
||||||
|
if ci not in all_fm_init_curr: # todo code that's not in curr is not counted!!! len: s6
|
||||||
|
logger.warning(f"There's unrecognized code init: {ci}!!!!")
|
||||||
|
continue
|
||||||
|
code_init_list_filtered.append(ci)
|
||||||
|
if not code_init_list_filtered:
|
||||||
|
return code_init_list_filtered
|
||||||
|
|
||||||
|
ddb1 = DDBfm(running_which_env)
|
||||||
|
|
||||||
|
code_init_list_filtered = ddb1.get_missing_code_init_date_in_tb(
|
||||||
|
ddb1.ddf_hft_dailydom_tbname, date, code_init_list_filtered)
|
||||||
|
|
||||||
|
if code_init_list_filtered:
|
||||||
|
logger.info(
|
||||||
|
f"Need to download {'+'.join(code_init_list_filtered)} on {date} in {ddb1.ddf_hft_dailydom_tbname}")
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
f"all codes checked in database tb {ddb1.ddf_hft_dailydom_tbname} on {date}")
|
||||||
|
ddb1.close_sess()
|
||||||
|
del ddb1
|
||||||
|
return code_init_list_filtered
|
||||||
|
|
||||||
|
|
||||||
|
def run_create_zl_table_in_db(date='20221101'):
|
||||||
|
|
||||||
|
logger.info(f"creating zl cl table on {date}")
|
||||||
|
|
||||||
|
df = get_zl_cl_df_by_date(date, date)
|
||||||
|
ddbfm = DDBfm(running_which_env)
|
||||||
|
|
||||||
|
ddbfm.create_daily_info_table(df)
|
||||||
|
|
||||||
|
|
||||||
|
def run_pool_append_zl_table_in_db(start_date, end_date, if_check=True):
|
||||||
|
logger.info(f"Running append zl cl table from {start_date} to {end_date}")
|
||||||
|
|
||||||
|
df = get_zl_cl_df_by_date(start_date, end_date)
|
||||||
|
|
||||||
|
ddbfm = DDBfm(running_which_env, pool=True)
|
||||||
|
|
||||||
|
# for date
|
||||||
|
# if if_check:
|
||||||
|
# check_if_date_codeinitlist_exists(date,df['code_init'].tolist())
|
||||||
|
ddbfm.append_pool_hft_table(
|
||||||
|
ddbfm.ddf_hft_dailydom_tbname, df, sort_col='m_nDate')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import time
|
||||||
|
ROOT_DIR = abspath(join(dirname(abspath(__file__)), ".."))
|
||||||
|
logger.remove()
|
||||||
|
logger.add(sys.stderr, level="INFO")
|
||||||
|
logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}.log",
|
||||||
|
rotation="10 MB", compression="zip", level="INFO")
|
||||||
|
|
||||||
|
for i in range(0,21):
|
||||||
|
|
||||||
|
|
||||||
|
typ = 'dailydom'
|
||||||
|
# st_d = '20220101'
|
||||||
|
# en_d = '20221031'
|
||||||
|
st_d = str(2000+i)+'0101'
|
||||||
|
en_d = str(2000+i)+'1231'
|
||||||
|
|
||||||
|
if_check = False
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
f"Going to run *{typ}* from {st_d} to {en_d} with if_check dupliactes={if_check} in *{running_which_env}*, plz check if this info is correct.\n\n\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
tic = time.perf_counter()
|
||||||
|
|
||||||
|
# run_create_zl_table_in_db()
|
||||||
|
run_pool_append_zl_table_in_db(
|
||||||
|
start_date=st_d, end_date=en_d, if_check=if_check)
|
||||||
|
|
||||||
|
toc = time.perf_counter()
|
||||||
|
logger.info(f"Running used {toc - tic:0.4f} seconds")
|
||||||
|
|
||||||
|
time.sleep(10)
|
Loading…
Reference in new issue