1. `DDBDailyFactor.py` => `DDBFactor.py`

2. 在`DDBLoader.py`中增加了`DDBPITLoader`类,专门用于向DolphinDB里面添加PIT数据
3. 其他一些优化
main
Guofu Li 2 years ago
parent 621b38dce3
commit 95de99046a

@ -10,6 +10,9 @@ from DDBLoader import DDBLoader
def load_ddb_table(hft_tbl_name): def load_ddb_table(hft_tbl_name):
"""
这是一个用来简化载入分区表过程的语法糖但似乎需要预先调用这个函数的场景并不多简化效果不是很明显
"""
def decorator(func): def decorator(func):
@functools.wraps(func) @functools.wraps(func)
def wrapper(self, *args, **kwargs): def wrapper(self, *args, **kwargs):

@ -2,6 +2,8 @@ import importlib
import gzip import gzip
import pickle import pickle
import functools import functools
import abc
import warnings
from pprint import pprint from pprint import pprint
from pathlib import Path from pathlib import Path
@ -11,6 +13,8 @@ from multiprocessing import Pool
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
import dolphindb as ddb import dolphindb as ddb
import dolphindb.settings as keys import dolphindb.settings as keys
@ -20,84 +24,465 @@ import sqlalchemy as sa
import ProtoBuffEntitys import ProtoBuffEntitys
def make_stock_daily_df(blob, type_name, stock_id):
class DDBLoader(object):
""" """
用于做多进程录入ddb的函数 - 放了几个公用的配置字段包括
1. SQL-Server的链接参数
2. DolphinDB的链接参数
- 放了几个@abstractmethod在里面不过如果不需要使用多态特性那应该用处不大
1. create_ddb_database
2. create_ddb_partition_table
""" """
blob = gzip.decompress(blob)
dataArray = eval(f"ProtoBuffEntitys.{type_name}Message_pb2.{type_name}Array()")
dataArray.ParseFromString(blob)
data_dict_list = [ mssql_config = {
{field.name : val for field, val in entry.ListFields()} 'host' : '192.168.1.7',
for entry in dataArray.dataArray 'username' : 'sa',
'password' : 'passw0rd!'
}
ddb_config = {
'host' : '192.168.1.167',
'username' : 'admin',
'password' : '123456'
}
def __init__(self):
self.mssql_engine = sa.create_engine(
"mssql+pyodbc://{username}:{password}@{host}/master?driver=ODBC+Driver+18+for+SQL+Server".format(**self.mssql_config),
connect_args = {
"TrustServerCertificate": "yes"
}, echo=False
)
self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password'])
@abc.abstractmethod
def create_ddb_database(self, *args, **kwargs):
"""
创建database函数需要被子类具体实现
"""
return
@abc.abstractmethod
def create_ddb_partition_table(self, *args, **kwargs):
"""
创建分区表函数需要被子类具体实现
"""
return
@staticmethod
def tscode_to_windcode(series):
return series.apply(lambda x : x[2:] + '.' + x[:2])
@staticmethod
def make_symbol(series):
return series.astype('int32').astype('str')\
.apply(str.zfill, args=(6,))\
.apply(lambda code : \
code + '.SH' if code[0] == '6' \
else code + '.SZ')
@staticmethod
def make_date(series):
# 特别是对于分红表如果某些关键日期还未公布则会填充0导致日期解析失败
series.loc[series == 0] = np.nan
return pd.to_datetime(
series.astype(str), format='%Y%m%d')
@staticmethod
def make_nparray(series):
return series.apply(lambda x : np.array(x))
@staticmethod
def make_time(series):
s_hr = series // 10000000 * 3600000
s_min = series % 10000000 // 100000 * 60000
s_sec = series % 100000 // 1000
s_ms = series % 1000
return pd.to_timedelta(s_hr + s_min + s_sec + s_ms, unit='ms')
class DDBPITLoader(DDBLoader):
ddb_path = "dfs://pit_stock_ts"
ddb_dbname = "ddb_pit_stock_ts"
num_code_partition = 50
table_name_mapping = {
#'CBS_AFTER_ADJ' : 'bs_common_adj',
#'CBS_BEFORE_ADJ' : 'bs_common_ori',
#'CCFS_AFTER_ADJ' : 'cfs_common_adj',
#'CCFS_BEFORE_ADJ' : 'cfs_common_ori',
#'CIS_AFTER_ADJ' : 'is_common_adj',
#'CIS_BEFORE_ADJ' : 'is_common_ori',
'DIV_WIND' : 'divident',
#'EP_WIND' : 'earnings_preannouncement',
#'PEE_WIND' : 'preliminary_earnings_estimate'
}
meta_col_config = {
'WIND_CODE' : ('code', 'SYMBOL'),
# mssql表中不需要记录的meta字段在这里直接设置为None
'IntCode' : None,
'ACTUAL_ANN_DT' : None,
'ReportPeriod' : ('report_period', 'DATE'),
'AppearInPeriod' : ('appear_in_period', 'DATE'),
'AppearAtDate' : ('appear_at_date', 'DATE')
}
date_col_set = {
'report_period',
'appear_in_period',
'appear_at_date',
'ReportPeriod',
'AppearInPeriod',
'AppearAtDate',
'EQY_RECORD_DT',
'EX_DT',
'DVD_PAYOUT_DT',
'S_DIV_PRELANDATE',
'S_DIV_SMTGDATE',
'DVD_ANN_DT',
'S_DIV_PREANNDT'
}
ddb_type_mapping = {
'float' : 'DOUBLE',
'int' : 'INT',
'text' : 'STRING',
'varchar' : 'STRING',
'str' : 'STRING'
}
# 基本面数据库现在存放在91服务器之上
mssql_config = {
'host' : '192.168.1.91',
'username' : 'sa',
'password' : 'xn.123',
'dbname' : 'tr_statement'
}
def __init__(self):
super().__init__()
# 重新设定mssql_engine对象此时我们需要使用基本面数据库
self.mssql_engine = sa.create_engine(
"mssql+pyodbc://{username}:{password}@{host}/{dbname}?driver=ODBC+Driver+18+for+SQL+Server".format(**self.mssql_config),
connect_args = {
"TrustServerCertificate": "yes"
}, echo=False
)
def create_ddb_database(self):
self.ddb_sess.run("""
{dbName} = database(
directory = '{dbPath}',
partitionType = HASH,
partitionScheme = [SYMBOL, {num_code_partition}],
engine = 'TSDB'
)
""".format(
dbName = self.ddb_dbname,
dbPath = self.ddb_path,
num_code_partition = self.num_code_partition
))
def _make_col_config(self, mssql_table_name):
"""
Return:
mssql_col_name_list, ddb_col_name_list, ddb_col_type_list
"""
with self.mssql_engine.connect() as conn:
col_sp_list = list(conn.execute(f"exec sp_columns {mssql_table_name}").fetchall())
mssql_col_name_list, ddb_col_name_list, ddb_col_type_list = \
[], [], []
for col_sp in col_sp_list:
_col_name = col_sp[3]
_col_type = col_sp[5]
# 对于meta字段需要根据meta配置表来进行处理
if _col_name in self.meta_col_config:
# 跳过mssql表中 不需要记录的meta字段
if self.meta_col_config[_col_name] is None:
continue
# 字段名和字段类型都要进行映射
mssql_col_name_list.append(_col_name)
ddb_col_name_list.append(self.meta_col_config[_col_name][0])
ddb_col_type_list.append(self.meta_col_config[_col_name][1])
# 对于非meta字段仅需要检查是否是float类型对于float类型设置类型为DOUBLE
else:
# 需要之后被转换成DATE的字段一般在原表中为为INT类型
if _col_name in self.date_col_set:
mssql_col_name_list.append(_col_name)
ddb_col_name_list.append(_col_name)
ddb_col_type_list.append('DATE')
# 按照对照表进行类型转换
elif _col_type in self.ddb_type_mapping:
mssql_col_name_list.append(_col_name)
ddb_col_name_list.append(_col_name)
ddb_col_type_list.append(self.ddb_type_mapping[_col_type])
# 对照表中没有的字段类型,就不加入到字段列表中了
else:
print(f"!**Unrecognized type '{_col_type}' for column {_col_name}, will skip.")
return mssql_col_name_list, ddb_col_name_list, ddb_col_type_list
def create_ddb_partition_table(self, mssql_table_name):
"""创建分区表"""
memory_table_name = mssql_table_name
partition_table_name = self.table_name_mapping[mssql_table_name]
mssql_col_name_list, ddb_col_name_list, ddb_col_type_list = \
self._make_col_config(mssql_table_name)
# 根据是否
if 'appear_in_period' in ddb_col_name_list:
compress_methods = """{
'report_period' : 'delta',
'appear_in_period' : 'delta',
'appear_at_date' : 'delta'
}"""
else:
compress_methods = """{
'report_period' : 'delta',
'appear_at_date' : 'delta'
}"""
# 因为已经根据`appear_in_period`分列了调整前和调整后,因此不需要对它再进行排序了
sort_columns = "`code`report_period`appear_at_date"
# 1. 先创建内存表,内存表中设定好列名和列类型
# 2. 然后根据内存表创建分区表,设定分区列等信息
self.ddb_sess.run("""
{memory_table_name} = table(
{capacity}:0,
{column_name_list},
[{column_type_list}]
);
if (existsTable("{ddb_path}", "{partition_table_name}")) {{
dropTable({ddb_dbname}, "{partition_table_name}");
}}
{partition_table_name} = createPartitionedTable(
dbHandle = {ddb_dbname},
table = {memory_table_name},
tableName = "{partition_table_name}",
partitionColumns = 'code',
compressMethods = {compress_methods},
sortColumns = {sort_columns}
);
""".format(
ddb_dbname = self.ddb_dbname,
ddb_path = self.ddb_path,
memory_table_name = memory_table_name,
partition_table_name = partition_table_name,
capacity = 10,
column_name_list = '`' + '`'.join(ddb_col_name_list),
column_type_list = ','.join(ddb_col_type_list),
compress_methods = compress_methods.replace('\n', '').replace(' ', ''),
sort_columns = sort_columns
))
print('-' * 80)
print(f"Did create parition table <{partition_table_name}>:")
pprint(self.ddb_sess.run(f"schema({partition_table_name});"))
return partition_table_name, mssql_col_name_list
def create_ddb_partition_tables(self):
for mssql_table_name in self.table_name_mapping:
self.create_ddb_partition_table(mssql_table_name)
def _dump_pit_to_ddb(self, mssql_table_name):
print('Will work on table', mssql_table_name)
# 返回的`mssql_col_name_list`可以用来对SQL-Server获取的dataframe进行列过滤
partition_table_name, mssql_col_name_list = \
self.create_ddb_partition_table(mssql_table_name)
with self.mssql_engine.connect() as conn:
stat = f"select distinct [WIND_CODE] from {mssql_table_name}"
stock_id_list = list(conn.execute(stat).fetchall())
with tqdm(stock_id_list) as pbar:
for (stock_id,) in pbar:
pbar.set_description(f"Will work on {stock_id}")
#pbar.set_description(f"Will fetch all data of {stock_id} from SQL Server")
stat = """
select * from {mssql_table_name}
where WIND_CODE='{stock_id}' and AppearAtDate>0
""".format(
mssql_table_name = mssql_table_name,
stock_id = stock_id
)
row_list = list(conn.execute(stat).fetchall())
num_rows = len(row_list)
# 因为对AppearAtDate做了过滤所以有可能得到一个空的数据集
if num_rows == 0:
print(f"Will skip {stock_id} due to empty result.")
continue
#pbar.set_description(f"Will work on dumping job on {stock_id} of len {num_rows}")
# 这里需要对select语句获取的所有列进行一次过滤以保证和partition table中的列一致
df = pd.DataFrame(row_list)[mssql_col_name_list]
# 需要把部分字段的int字段类型转换成DATE字段类型
for df_col in df.columns:
if df_col in self.date_col_set:
df[df_col] = DDBLoader.make_date(df[df_col])
# 因为在做数据库View的时候已经做过一轮转换了所以这里就不需要再次转换了
#df['WIND_CODE'] = DDBLoader.tscode_to_windcode(df['WIND_CODE'])
self.ddb_sess.upload({mssql_table_name : df})
self.ddb_sess.run(f"{partition_table_name}.tableInsert({mssql_table_name})")
def dump_pit_to_ddb(self):
for mssql_table_name in self.table_name_mapping:
self._dump_pit_to_ddb(mssql_table_name)
class DDBDailyLoader(DDBLoader):
ddb_path = "dfs://daily_stock_ts"
ddb_dbname = "db_daily_stock_ts"
daily_kline_cols = [
'code', 'm_nDate',
'open', 'high', 'low', 'close', 'vol',
'amount', 'cjbs', 'yclose',
'PctChg', 'IsZt', 'IsDt', 'IsST', 'IsGoDelist',
'FloatShares', 'MarketValues',
'factor'
] ]
array_type_list = [ daily_kline_col_types = [
field.name 'SYMBOL', 'DATE',
for field, val in dataArray.dataArray[0].ListFields() 'DOUBLE', 'DOUBLE', 'DOUBLE', 'DOUBLE', 'DOUBLE',
if isinstance(field.default_value, list) 'DOUBLE', 'INT', 'DOUBLE',
'DOUBLE', 'INT', 'INT', 'INT', 'INT',
'DOUBLE', 'DOUBLE',
'DOUBLE'
] ]
#pprint(array_type_list)
df = pd.DataFrame(data_dict_list)
#df['code'] = make_symbol(df['code'])
df['code'] = stock_id
df['m_nDate'] = make_date(df['m_nDate'])
df['m_nTime'] = df['m_nDate'] + make_time(df['m_nTime'])
for field_name in array_type_list:
df[field_name] = make_nparray(df[field_name])
#print(f"Did create ddb table for dataframe of shape {df.shape}") def create_ddb_database(self):
# self.make_table_skeleton(type_name, df.shape[0]) # TODO: daily数据库已经在DDBDailyFactor中被创建了
return df # 后续可以迁移过来,不过现在就暂时先不管了
pass
def dump_stock_daily_to_ddb(row, type_name, stock_id): def load_ddb_database(self):
""" self.ddb_sess.run("""
用于做多进程录入ddb的函数 {dbName} = database(directory='{dbPath}')
""" """.format(
df_table_name = type_name dbName = self.ddb_dbname,
df = make_stock_daily_df(row[2], type_name, stock_id) dbPath = self.ddb_path
))
print('Did load database from', self.ddb_path)
ddb_sess = ddb.session(DDBLoader.ddb_config['host'], 8848)
ddb_sess.login(DDBLoader.ddb_config['username'], DDBLoader.ddb_config['password'])
ddb_sess.upload({df_table_name : df}) def create_ddb_partition_table(self, memory_table_name, partition_table_name):
ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( # TODO: 现在只做一个日频行情数据表,今后可能考虑把基本面数据也迁移过来
dbPath = DDBLoader.ddb_path,
partitioned_table_name = type_name + DDBLoader.ddb_partition_table_suffix,
df_table_name = df_table_name
))
# 由于日频行情数据的表结构相对简单,所以直接把表结构写在这里代码里即可
# 搬迁数据的时候需要考虑按照逐个股票来搬迁,以免造成对内存的巨大压力
self.ddb_sess.run("""
// 确保删除原表
if (existsTable("{ddb_daily_path}", "{partition_table_name}")) {{
dropTable({ddb_daily_dbname}, "{partition_table_name}");
}}
// 然后根据内存表的结构创建持久化的分区表
{partition_table_name} = {ddb_daily_dbname}.createPartitionedTable(
table = {memory_table_name},
tableName = "{partition_table_name}",
partitionColumns = `code,
sortColumns = `code`m_nDate,
compressMethods = {{m_nDate:"delta"}}
);
""".format(
ddb_daily_path = self.ddb_path,
ddb_daily_dbname = self.ddb_dbname,
memory_table_name = memory_table_name,
partition_table_name = partition_table_name,
))
def make_symbol(series):
return series.astype('int32').astype('str')\
.apply(str.zfill, args=(6,))\
.apply(lambda code : \
code + '.SH' if code[0] == '6' \
else code + '.SZ')
def create_ddb_memory_table(self, memory_table_name, capacity):
self.ddb_sess.run("""
// 先创建一个空的内存表用来表征结构如果无需插入数据capacity可以设为10
{memory_table_name} = table({capacity}:0, {col_names}, [{col_types}]);
""".format(
memory_table_name = memory_table_name,
capacity = capacity,
col_names = '`' + '`'.join(self.daily_kline_cols),
col_types = ', '.join(self.daily_kline_col_types)
))
def make_date(series):
return pd.to_datetime(
series.astype(str), format='%Y%m%d')
def dump_daily_kline_to_ddb(self):
# 先创建一个分区表,然后再逐个股票的数据插入
# 1. 需要额外控制在插入第一个股票数据的时候创建分区表比较麻烦
# 2. python程序中的dataframe直接上传到dolphindb内存表不需要考虑内存表字段类型分区表中设置好即可
def make_nparray(series): memory_table_name = 'daily_kline_mem'
return series.apply(lambda x : np.array(x)) partition_table_name = 'daily_kline'
self.create_ddb_memory_table(memory_table_name, 10)
print('Did create ddb memory table.')
pprint(self.ddb_sess.run(f"schema({memory_table_name})"))
self.create_ddb_partition_table(memory_table_name, partition_table_name)
print('Did create ddb partition table.')
pprint(self.ddb_sess.run(f"schema({partition_table_name})"))
def make_time(series): with self.mssql_engine.connect() as conn:
s_hr = series // 10000000 * 3600000 stat = "select distinct [StockID] from [StockDaily].dbo.[DailyKLine]"
s_min = series % 10000000 // 100000 * 60000 stock_id_list = list(conn.execute(stat).fetchall())
s_sec = series % 100000 // 1000
s_ms = series % 1000 with tqdm(stock_id_list) as pbar:
return pd.to_timedelta(s_hr + s_min + s_sec + s_ms, unit='ms') for (stock_id,) in pbar:
pbar.set_description(f"Will work on {stock_id}")
#pbar.set_description(f"Will fetch all data of {stock_id} from SQL Server")
stat = """
select * from [StockDaily].dbo.[DailyKLine]
where StockID='{stock_id}'
""".format(
stock_id = stock_id
)
row_list = list(conn.execute(stat).fetchall())
num_rows = len(row_list)
#pbar.set_description(f"Will work on dumping job on {stock_id} of len {num_rows}")
df = pd.DataFrame(row_list)
df['date'] = DDBLoader.make_date(df['date'])
df['StockID'] = DDBLoader.tscode_to_windcode(df['StockID'])
self.ddb_sess.upload({memory_table_name : df})
#print('Did upload dataframe to ddb.')
#pprint(self.ddb_sess.run(f"schema({memory_table_name})"))
#break
self.ddb_sess.run(f"{partition_table_name}.tableInsert({memory_table_name})")
class DDBLoader(object):
class DDBHFTLoader(DDBLoader):
""" """
0. 从sql-server中读取calendar数据并创建成员变量df_calendardf_calendar可以保存在本地pickle作为缓存 0. 从sql-server中读取calendar数据并创建成员变量df_calendardf_calendar可以保存在本地pickle作为缓存
|- `def make_calendar_df(self) -> df_calendar` |- `def make_calendar_df(self) -> df_calendar`
@ -162,18 +547,6 @@ class DDBLoader(object):
13 : 'INT', 13 : 'INT',
} }
mssql_config = {
'host' : '192.168.1.7',
'username' : 'sa',
'password' : 'passw0rd!'
}
ddb_config = {
'host' : '192.168.1.7',
'username' : 'admin',
'password' : '123456'
}
# this value may be used by factor makers, which may loop through code partitions # this value may be used by factor makers, which may loop through code partitions
num_code_partition = 50 num_code_partition = 50
@ -182,18 +555,6 @@ class DDBLoader(object):
ddb_dump_journal_fname = 'ddb_dump_journal.csv' ddb_dump_journal_fname = 'ddb_dump_journal.csv'
def __init__(self):
self.mssql_engine = sa.create_engine(
"mssql+pyodbc://{username}:{password}@{host}/master?driver=ODBC+Driver+18+for+SQL+Server".format(**self.mssql_config),
connect_args = {
"TrustServerCertificate": "yes"
}, echo=False
)
self.ddb_sess = ddb.session(self.ddb_config['host'], 8848)
self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password'])
def init_ddb_database(self, df_calendar): def init_ddb_database(self, df_calendar):
""" """
1. 创建ddb_database 1. 创建ddb_database
@ -490,6 +851,7 @@ class DDBLoader(object):
print("Will create new Pool object, but this is not encourage for large batch work.") print("Will create new Pool object, but this is not encourage for large batch work.")
pool = Pool(self.num_worker) pool = Pool(self.num_worker)
# 在单个股票内部,对不同日期进行并行处理,对内存使用较为友好,不需要同时载入多个股票海量的全历史数据
with tqdm(total=num_rows, leave=False) as sub_pbar: with tqdm(total=num_rows, leave=False) as sub_pbar:
for _ in pool.imap_unordered( for _ in pool.imap_unordered(
functools.partial( functools.partial(
@ -505,18 +867,85 @@ class DDBLoader(object):
self.dump_journal_writer.flush() self.dump_journal_writer.flush()
@staticmethod
def make_stock_daily_df(blob, type_name, stock_id):
"""
用于做多进程录入ddb的函数
"""
blob = gzip.decompress(blob)
dataArray = eval(f"ProtoBuffEntitys.{type_name}Message_pb2.{type_name}Array()")
dataArray.ParseFromString(blob)
data_dict_list = [
{field.name : val for field, val in entry.ListFields()}
for entry in dataArray.dataArray
]
array_type_list = [
field.name
for field, val in dataArray.dataArray[0].ListFields()
if isinstance(field.default_value, list)
]
#pprint(array_type_list)
df = pd.DataFrame(data_dict_list)
#df['code'] = make_symbol(df['code'])
df['code'] = stock_id
df['m_nDate'] = make_date(df['m_nDate'])
df['m_nTime'] = df['m_nDate'] + make_time(df['m_nTime'])
for field_name in array_type_list:
df[field_name] = make_nparray(df[field_name])
#print(f"Did create ddb table for dataframe of shape {df.shape}")
# self.make_table_skeleton(type_name, df.shape[0])
return df
@staticmethod
def dump_stock_daily_to_ddb(row, type_name, stock_id):
"""
用于做多进程录入ddb的函数
"""
df_table_name = type_name
df = make_stock_daily_df(row[2], type_name, stock_id)
ddb_sess = ddb.session(DDBLoader.ddb_config['host'], 8848)
ddb_sess.login(DDBLoader.ddb_config['username'], DDBLoader.ddb_config['password'])
ddb_sess.upload({df_table_name : df})
ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format(
dbPath = DDBLoader.ddb_path,
partitioned_table_name = type_name + DDBLoader.ddb_partition_table_suffix,
df_table_name = df_table_name
))
def main(): def main():
loader = DDBLoader()
df_calendar = loader.make_calendar_df()
loader.init_ddb_database(df_calendar) # PIT基本面数据
print('Did finish init_ddb_database') loader = DDBPITLoader()
loader.create_ddb_database()
#loader.create_ddb_partition_tables()
loader.dump_pit_to_ddb()
# 日频行情数据
#loader = DDBDailyLoader()
#loader.load_ddb_database()
#loader.dump_daily_kline_to_ddb()
# 高频数据
#df_calendar = loader.make_calendar_df()
#loader.init_ddb_database(df_calendar)
#print('Did finish init_ddb_database')
#loader.load_ddb_database() #loader.load_ddb_database()
#print('Did load ddb database') #print('Did load ddb database')
loader.init_ddb_table_data(df_calendar) #loader.init_ddb_table_data(df_calendar)
print('Did finish init_table_data') #print('Did finish init_table_data')
if __name__ == '__main__': if __name__ == '__main__':

@ -12,7 +12,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 66,
"id": "5d0f471e-682e-43cc-abdb-7e52f3bbd707", "id": "5d0f471e-682e-43cc-abdb-7e52f3bbd707",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -23,30 +23,30 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 67,
"id": "ed17fd0b-9b36-47e4-9ab6-11459a3621fb", "id": "ed17fd0b-9b36-47e4-9ab6-11459a3621fb",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# backup(backup_path, sql_obj, force, parallel)\n", "# backup(backup_path, sql_obj, force, parallel)\n",
"code = \"\"\"\n", "code = \"\"\"\n",
" backup('/data/dolphindb/backup/', <select * from loadTable(\"dfs://hft_stock_ts\", \"KLinePartitioned\")>, false, true)\n", " backup('/data/dolphindb/backup/', <select * from loadTable(\"dfs://hft_stock_ts\", \"OrderPartitioned\")>, true, true)\n",
"\"\"\"" "\"\"\""
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 68,
"id": "8b7dae3d-aef1-4c50-92b2-460d4fea0a96", "id": "8b7dae3d-aef1-4c50-92b2-460d4fea0a96",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"0" "114350"
] ]
}, },
"execution_count": 10, "execution_count": 68,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -55,6 +55,27 @@
"sess.run(code)" "sess.run(code)"
] ]
}, },
{
"cell_type": "code",
"execution_count": 69,
"id": "bf3bc38e-74cb-4549-bdca-6a3d2a601488",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2287.0"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"114350 / 50"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 6,

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save