single adding working

main
yzlocal 2 years ago
parent a1e27ed251
commit 4d2ad2b411

@ -83,23 +83,24 @@ class DDBfm():
def append_hft_table(self, tbName, df): def append_hft_table(self, tbName, df):
# load table to check? time&date&code
# print(df.shape)
appender = ddb.tableAppender(tableName=tbName, ddbSession=self.sess,dbPath=self.ddb_hft_dbPath) appender = ddb.tableAppender(tableName=tbName, ddbSession=self.sess,dbPath=self.ddb_hft_dbPath)
appender.append(df) appender.append(df)
logger.info(f"sucessfully append some df of {df.shape}")
def search_code_date_in_tb(self,tbName,curr_date,curr_code): def search_code_date_in_tb(self,tbName,curr_date,curr_code):
curr_date_formatted = curr_date[:4]+'.'+curr_date[4:6]+'.'+curr_date[6:] curr_date_formatted = curr_date[:4]+'.'+curr_date[4:6]+'.'+curr_date[6:]
# print('?did i split this right') # print('?did i split this right')
# print(curr_date_formatted) # print(curr_date_formatted)
tb = self.sess.loadTable(dbPath=self.ddb_hft_dbPath, tableName=tbName) tb = self.sess.loadTable(dbPath=self.ddb_hft_dbPath, tableName=tbName)
logger.info(f"Quickly checking if data on {curr_code} {curr_date} exists...") # could do a slow checking of num of data
try: try:
# doing this cuz there's no method to check if a table is empty lol # doing this cuz there's no method to check if a table is empty lol
df = tb.select('*').where(f"code=`{curr_code}").where(f"m_nDatetime>={curr_date_formatted}d").top(1).toDF() cond=f"code=`{curr_code}, m_nDatetime.date()={curr_date_formatted}d"
print(df) # print(cond)
df = tb.select('*').where(cond).top(1).toDF()
if df.empty or df.shape[0]==0:
# print(df)
return 0
except: except:
return 0 return 0
return 1 return 1

@ -214,7 +214,7 @@ class TSLfm:
df = pd.DataFrame(r.value()) df = pd.DataFrame(r.value())
if df.empty: if df.empty:
logger.info('No data on this day.') logger.info('No data on this day.')
return 0 return pd.DataFrame()
logger.info(f"Processing new df of shape {df.shape}, which looks like\n{df.head(5)}") logger.info(f"Processing new df of shape {df.shape}, which looks like\n{df.head(5)}")
# new = df["m_nDatetime"].str.split(" ", n = 1, expand = True) # new = df["m_nDatetime"].str.split(" ", n = 1, expand = True)
@ -238,6 +238,6 @@ if __name__ == '__main__':
logger.add("../logs/{time:YYYYMMDD-HHmmss}_TSLfm.log", rotation="10 MB", compression="zip", level="INFO") logger.add("../logs/{time:YYYYMMDD-HHmmss}_TSLfm.log", rotation="10 MB", compression="zip", level="INFO")
with TSLfm() as tsl: with TSLfm() as tsl:
t_list=['CF2211'] t_list=['T2212']
df = tsl.process_result_data_type(tsl.get_mkt_min_k('20221031','20221101',t_list)) df = tsl.process_result_data_type(tsl.get_mkt_min_k('20221031','20221101',t_list))
print(df) print(df)

@ -3,14 +3,12 @@ import sys
running_which_env='prd' running_which_env='prd'
from os.path import dirname, abspath, join from os.path import dirname, abspath, join
ROOT_DIR = abspath(join(dirname(abspath(__file__)), "..")) ROOT_DIR = abspath(join(dirname(abspath(__file__)), ".."))
from loguru import logger from loguru import logger
logger.remove() logger.remove()
logger.add(sys.stderr, level="INFO") logger.add(sys.stderr, level="INFO")
logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}_{running_which_env}.log", rotation="10 MB", compression="zip", level="INFO") logger.add(ROOT_DIR+"/logs/{time:YYYYMMDD-HHmmss}"+f"_{running_which_env}.log", rotation="10 MB", compression="zip", level="INFO")
@ -22,20 +20,27 @@ from TSLfm import TSLfm
from code_list import code_list_pickel from code_list import code_list_pickel
def run_add_1day_code_init_minKline(date,code_list): def run_add_1day_code_init_minKline(date,code_list):
with TSLfm() as tsl:
df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list))
ddb = DDBfm(running_which_env) ddb = DDBfm(running_which_env)
code_list_filtered = []
for code in code_list: for code in code_list:
if ddb.search_code_date_in_tb(ddb.ddf_hft_mink_tbname,date,code): if ddb.search_code_date_in_tb(ddb.ddf_hft_mink_tbname,date,code):
logger.warning(f"Possible duplicates on {date} and {code}") logger.warning(f"Possible duplicates on {date} and {code}")
return 0 else:
ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df) code_list_filtered.append(code)
if len(code_list_filtered)==0:
return 0
with TSLfm() as tsl:
df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list_filtered))
if not df.empty:
logger.info(f'Getting a df of {df.shape}: {code_list[0][:-4]} on {date}')
ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df)
def run_create_db_minKline(): def run_create_db_minKline():
date = '20221101' date = '20221101'
with TSLfm() as tsl: with TSLfm() as tsl:
code_list=['CF2211'] code_list=['T2212']
df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list)) df = tsl.process_result_data_type(tsl.get_mkt_min_k(date,date,code_list))
# print(df) # print(df)
ddb = DDBfm(running_which_env) ddb = DDBfm(running_which_env)
@ -54,8 +59,8 @@ def run():
# print(all_code_dict_by_init) # print(all_code_dict_by_init)
start_date='2022-10-31' start_date='2022-09-30'
end_date='2022-11-08' end_date='2022-11-09'
allDates = pd.date_range(start_date, end_date, freq ='D') allDates = pd.date_range(start_date, end_date, freq ='D')
allDates = [i.replace('-','') for i in list(allDates.astype('str'))] allDates = [i.replace('-','') for i in list(allDates.astype('str'))]
@ -88,4 +93,5 @@ def run():
# ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df) # ddb.append_hft_table(ddb.ddf_hft_mink_tbname,df)
if __name__ == '__main__': if __name__ == '__main__':
run() run()
# run_create_db_minKline()
Loading…
Cancel
Save