From 95de99046afe6dd44b004c5379d36b22bc4bc2f7 Mon Sep 17 00:00:00 2001 From: Guofu Li Date: Thu, 18 Aug 2022 14:33:47 +0800 Subject: [PATCH] =?UTF-8?q?1.=20`DDBDailyFactor.py`=20=3D>=20`DDBFactor.py?= =?UTF-8?q?`=202.=20=E5=9C=A8`DDBLoader.py`=E4=B8=AD=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E4=BA=86`DDBPITLoader`=E7=B1=BB=EF=BC=8C=E4=B8=93=E9=97=A8?= =?UTF-8?q?=E7=94=A8=E4=BA=8E=E5=90=91DolphinDB=E9=87=8C=E9=9D=A2=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0PIT=E6=95=B0=E6=8D=AE=203.=20=E5=85=B6=E4=BB=96?= =?UTF-8?q?=E4=B8=80=E4=BA=9B=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DDBDailyFactor.py => DDBFactor.py | 3 + DDBLoader.py | 599 ++++++++++++--- ddb.ipynb | 33 +- mssql.ipynb | 1151 ++++++++++++++++++++++++++++- 4 files changed, 1692 insertions(+), 94 deletions(-) rename DDBDailyFactor.py => DDBFactor.py (97%) diff --git a/DDBDailyFactor.py b/DDBFactor.py similarity index 97% rename from DDBDailyFactor.py rename to DDBFactor.py index 6c95703..c0470cb 100644 --- a/DDBDailyFactor.py +++ b/DDBFactor.py @@ -10,6 +10,9 @@ from DDBLoader import DDBLoader def load_ddb_table(hft_tbl_name): + """ + 这是一个用来简化载入分区表过程的语法糖,但似乎需要预先调用这个函数的场景并不多,简化效果不是很明显。 + """ def decorator(func): @functools.wraps(func) def wrapper(self, *args, **kwargs): diff --git a/DDBLoader.py b/DDBLoader.py index e5d8f0e..e26c465 100644 --- a/DDBLoader.py +++ b/DDBLoader.py @@ -2,6 +2,8 @@ import importlib import gzip import pickle import functools +import abc +import warnings from pprint import pprint from pathlib import Path @@ -11,6 +13,8 @@ from multiprocessing import Pool import numpy as np import pandas as pd +from pandas.core.common import SettingWithCopyWarning +warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) import dolphindb as ddb import dolphindb.settings as keys @@ -20,84 +24,465 @@ import sqlalchemy as sa import ProtoBuffEntitys -def make_stock_daily_df(blob, type_name, stock_id): + +class DDBLoader(object): """ - 用于做多进程录入ddb的函数 + - 放了几个公用的配置字段,包括: + 1. SQL-Server的链接参数 + 2. DolphinDB的链接参数 + + - 放了几个@abstractmethod在里面,不过如果不需要使用多态特性,那应该用处不大: + 1. create_ddb_database + 2. create_ddb_partition_table """ - blob = gzip.decompress(blob) - dataArray = eval(f"ProtoBuffEntitys.{type_name}Message_pb2.{type_name}Array()") - dataArray.ParseFromString(blob) - data_dict_list = [ - {field.name : val for field, val in entry.ListFields()} - for entry in dataArray.dataArray + mssql_config = { + 'host' : '192.168.1.7', + 'username' : 'sa', + 'password' : 'passw0rd!' + } + + ddb_config = { + 'host' : '192.168.1.167', + 'username' : 'admin', + 'password' : '123456' + } + + + def __init__(self): + self.mssql_engine = sa.create_engine( + "mssql+pyodbc://{username}:{password}@{host}/master?driver=ODBC+Driver+18+for+SQL+Server".format(**self.mssql_config), + connect_args = { + "TrustServerCertificate": "yes" + }, echo=False + ) + + self.ddb_sess = ddb.session(self.ddb_config['host'], 8848) + self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password']) + + + @abc.abstractmethod + def create_ddb_database(self, *args, **kwargs): + """ + 创建database函数,需要被子类具体实现。 + """ + return + + @abc.abstractmethod + def create_ddb_partition_table(self, *args, **kwargs): + """ + 创建分区表函数,需要被子类具体实现。 + """ + return + + + @staticmethod + def tscode_to_windcode(series): + return series.apply(lambda x : x[2:] + '.' + x[:2]) + + + @staticmethod + def make_symbol(series): + return series.astype('int32').astype('str')\ + .apply(str.zfill, args=(6,))\ + .apply(lambda code : \ + code + '.SH' if code[0] == '6' \ + else code + '.SZ') + + + @staticmethod + def make_date(series): + # 特别是对于分红表,如果某些关键日期还未公布,则会填充0,导致日期解析失败 + series.loc[series == 0] = np.nan + return pd.to_datetime( + series.astype(str), format='%Y%m%d') + + + @staticmethod + def make_nparray(series): + return series.apply(lambda x : np.array(x)) + + + @staticmethod + def make_time(series): + s_hr = series // 10000000 * 3600000 + s_min = series % 10000000 // 100000 * 60000 + s_sec = series % 100000 // 1000 + s_ms = series % 1000 + return pd.to_timedelta(s_hr + s_min + s_sec + s_ms, unit='ms') + + + +class DDBPITLoader(DDBLoader): + + ddb_path = "dfs://pit_stock_ts" + ddb_dbname = "ddb_pit_stock_ts" + + num_code_partition = 50 + + table_name_mapping = { + #'CBS_AFTER_ADJ' : 'bs_common_adj', + #'CBS_BEFORE_ADJ' : 'bs_common_ori', + #'CCFS_AFTER_ADJ' : 'cfs_common_adj', + #'CCFS_BEFORE_ADJ' : 'cfs_common_ori', + #'CIS_AFTER_ADJ' : 'is_common_adj', + #'CIS_BEFORE_ADJ' : 'is_common_ori', + 'DIV_WIND' : 'divident', + #'EP_WIND' : 'earnings_preannouncement', + #'PEE_WIND' : 'preliminary_earnings_estimate' + } + + meta_col_config = { + 'WIND_CODE' : ('code', 'SYMBOL'), + # mssql表中不需要记录的meta字段,在这里直接设置为None + 'IntCode' : None, + 'ACTUAL_ANN_DT' : None, + 'ReportPeriod' : ('report_period', 'DATE'), + 'AppearInPeriod' : ('appear_in_period', 'DATE'), + 'AppearAtDate' : ('appear_at_date', 'DATE') + } + + date_col_set = { + 'report_period', + 'appear_in_period', + 'appear_at_date', + 'ReportPeriod', + 'AppearInPeriod', + 'AppearAtDate', + 'EQY_RECORD_DT', + 'EX_DT', + 'DVD_PAYOUT_DT', + 'S_DIV_PRELANDATE', + 'S_DIV_SMTGDATE', + 'DVD_ANN_DT', + 'S_DIV_PREANNDT' + } + + ddb_type_mapping = { + 'float' : 'DOUBLE', + 'int' : 'INT', + 'text' : 'STRING', + 'varchar' : 'STRING', + 'str' : 'STRING' + } + + # 基本面数据库现在存放在91服务器之上 + mssql_config = { + 'host' : '192.168.1.91', + 'username' : 'sa', + 'password' : 'xn.123', + 'dbname' : 'tr_statement' + } + + + def __init__(self): + super().__init__() + # 重新设定mssql_engine对象,此时我们需要使用基本面数据库 + self.mssql_engine = sa.create_engine( + "mssql+pyodbc://{username}:{password}@{host}/{dbname}?driver=ODBC+Driver+18+for+SQL+Server".format(**self.mssql_config), + connect_args = { + "TrustServerCertificate": "yes" + }, echo=False + ) + + + def create_ddb_database(self): + self.ddb_sess.run(""" + {dbName} = database( + directory = '{dbPath}', + partitionType = HASH, + partitionScheme = [SYMBOL, {num_code_partition}], + engine = 'TSDB' + ) + """.format( + dbName = self.ddb_dbname, + dbPath = self.ddb_path, + num_code_partition = self.num_code_partition + )) + + + def _make_col_config(self, mssql_table_name): + """ + Return: + mssql_col_name_list, ddb_col_name_list, ddb_col_type_list + """ + with self.mssql_engine.connect() as conn: + col_sp_list = list(conn.execute(f"exec sp_columns {mssql_table_name}").fetchall()) + + mssql_col_name_list, ddb_col_name_list, ddb_col_type_list = \ + [], [], [] + + for col_sp in col_sp_list: + _col_name = col_sp[3] + _col_type = col_sp[5] + + # 对于meta字段,需要根据meta配置表来进行处理 + if _col_name in self.meta_col_config: + # 跳过mssql表中 不需要记录的meta字段 + if self.meta_col_config[_col_name] is None: + continue + # 字段名和字段类型都要进行映射 + mssql_col_name_list.append(_col_name) + ddb_col_name_list.append(self.meta_col_config[_col_name][0]) + ddb_col_type_list.append(self.meta_col_config[_col_name][1]) + # 对于非meta字段,仅需要检查是否是float类型,对于float类型设置类型为DOUBLE + else: + # 需要之后被转换成DATE的字段,一般在原表中为为INT类型 + if _col_name in self.date_col_set: + mssql_col_name_list.append(_col_name) + ddb_col_name_list.append(_col_name) + ddb_col_type_list.append('DATE') + # 按照对照表进行类型转换 + elif _col_type in self.ddb_type_mapping: + mssql_col_name_list.append(_col_name) + ddb_col_name_list.append(_col_name) + ddb_col_type_list.append(self.ddb_type_mapping[_col_type]) + # 对照表中没有的字段类型,就不加入到字段列表中了 + else: + print(f"!**Unrecognized type '{_col_type}' for column {_col_name}, will skip.") + + return mssql_col_name_list, ddb_col_name_list, ddb_col_type_list + + + def create_ddb_partition_table(self, mssql_table_name): + """创建分区表""" + memory_table_name = mssql_table_name + partition_table_name = self.table_name_mapping[mssql_table_name] + + mssql_col_name_list, ddb_col_name_list, ddb_col_type_list = \ + self._make_col_config(mssql_table_name) + + # 根据是否 + if 'appear_in_period' in ddb_col_name_list: + compress_methods = """{ + 'report_period' : 'delta', + 'appear_in_period' : 'delta', + 'appear_at_date' : 'delta' + }""" + else: + compress_methods = """{ + 'report_period' : 'delta', + 'appear_at_date' : 'delta' + }""" + + # 因为已经根据`appear_in_period`分列了调整前和调整后,因此不需要对它再进行排序了 + sort_columns = "`code`report_period`appear_at_date" + + # 1. 先创建内存表,内存表中设定好列名和列类型 + # 2. 然后根据内存表创建分区表,设定分区列等信息 + self.ddb_sess.run(""" + {memory_table_name} = table( + {capacity}:0, + {column_name_list}, + [{column_type_list}] + ); + + if (existsTable("{ddb_path}", "{partition_table_name}")) {{ + dropTable({ddb_dbname}, "{partition_table_name}"); + }} + + {partition_table_name} = createPartitionedTable( + dbHandle = {ddb_dbname}, + table = {memory_table_name}, + tableName = "{partition_table_name}", + partitionColumns = 'code', + compressMethods = {compress_methods}, + sortColumns = {sort_columns} + ); + """.format( + ddb_dbname = self.ddb_dbname, + ddb_path = self.ddb_path, + memory_table_name = memory_table_name, + partition_table_name = partition_table_name, + capacity = 10, + column_name_list = '`' + '`'.join(ddb_col_name_list), + column_type_list = ','.join(ddb_col_type_list), + compress_methods = compress_methods.replace('\n', '').replace(' ', ''), + sort_columns = sort_columns + )) + print('-' * 80) + print(f"Did create parition table <{partition_table_name}>:") + pprint(self.ddb_sess.run(f"schema({partition_table_name});")) + return partition_table_name, mssql_col_name_list + + + def create_ddb_partition_tables(self): + for mssql_table_name in self.table_name_mapping: + self.create_ddb_partition_table(mssql_table_name) + + + def _dump_pit_to_ddb(self, mssql_table_name): + print('Will work on table', mssql_table_name) + # 返回的`mssql_col_name_list`可以用来对SQL-Server获取的dataframe进行列过滤 + partition_table_name, mssql_col_name_list = \ + self.create_ddb_partition_table(mssql_table_name) + + with self.mssql_engine.connect() as conn: + stat = f"select distinct [WIND_CODE] from {mssql_table_name}" + stock_id_list = list(conn.execute(stat).fetchall()) + + with tqdm(stock_id_list) as pbar: + for (stock_id,) in pbar: + pbar.set_description(f"Will work on {stock_id}") + #pbar.set_description(f"Will fetch all data of {stock_id} from SQL Server") + stat = """ + select * from {mssql_table_name} + where WIND_CODE='{stock_id}' and AppearAtDate>0 + """.format( + mssql_table_name = mssql_table_name, + stock_id = stock_id + ) + row_list = list(conn.execute(stat).fetchall()) + num_rows = len(row_list) + + # 因为对AppearAtDate做了过滤,所以有可能得到一个空的数据集 + if num_rows == 0: + print(f"Will skip {stock_id} due to empty result.") + continue + + #pbar.set_description(f"Will work on dumping job on {stock_id} of len {num_rows}") + # 这里需要对select语句获取的所有列进行一次过滤,以保证和partition table中的列一致 + df = pd.DataFrame(row_list)[mssql_col_name_list] + # 需要把部分字段的int字段类型转换成DATE字段类型 + for df_col in df.columns: + if df_col in self.date_col_set: + df[df_col] = DDBLoader.make_date(df[df_col]) + # 因为在做数据库View的时候已经做过一轮转换了,所以这里就不需要再次转换了 + #df['WIND_CODE'] = DDBLoader.tscode_to_windcode(df['WIND_CODE']) + + self.ddb_sess.upload({mssql_table_name : df}) + self.ddb_sess.run(f"{partition_table_name}.tableInsert({mssql_table_name})") + + + def dump_pit_to_ddb(self): + for mssql_table_name in self.table_name_mapping: + self._dump_pit_to_ddb(mssql_table_name) + + +class DDBDailyLoader(DDBLoader): + + ddb_path = "dfs://daily_stock_ts" + ddb_dbname = "db_daily_stock_ts" + + daily_kline_cols = [ + 'code', 'm_nDate', + 'open', 'high', 'low', 'close', 'vol', + 'amount', 'cjbs', 'yclose', + 'PctChg', 'IsZt', 'IsDt', 'IsST', 'IsGoDelist', + 'FloatShares', 'MarketValues', + 'factor' ] - array_type_list = [ - field.name - for field, val in dataArray.dataArray[0].ListFields() - if isinstance(field.default_value, list) + daily_kline_col_types = [ + 'SYMBOL', 'DATE', + 'DOUBLE', 'DOUBLE', 'DOUBLE', 'DOUBLE', 'DOUBLE', + 'DOUBLE', 'INT', 'DOUBLE', + 'DOUBLE', 'INT', 'INT', 'INT', 'INT', + 'DOUBLE', 'DOUBLE', + 'DOUBLE' ] - #pprint(array_type_list) - df = pd.DataFrame(data_dict_list) - #df['code'] = make_symbol(df['code']) - df['code'] = stock_id - df['m_nDate'] = make_date(df['m_nDate']) - df['m_nTime'] = df['m_nDate'] + make_time(df['m_nTime']) - for field_name in array_type_list: - df[field_name] = make_nparray(df[field_name]) - #print(f"Did create ddb table for dataframe of shape {df.shape}") - # self.make_table_skeleton(type_name, df.shape[0]) - return df + def create_ddb_database(self): + # TODO: daily数据库已经在DDBDailyFactor中被创建了 + # 后续可以迁移过来,不过现在就暂时先不管了 + pass -def dump_stock_daily_to_ddb(row, type_name, stock_id): - """ - 用于做多进程录入ddb的函数 - """ - df_table_name = type_name - df = make_stock_daily_df(row[2], type_name, stock_id) + def load_ddb_database(self): + self.ddb_sess.run(""" + {dbName} = database(directory='{dbPath}') + """.format( + dbName = self.ddb_dbname, + dbPath = self.ddb_path + )) + print('Did load database from', self.ddb_path) - ddb_sess = ddb.session(DDBLoader.ddb_config['host'], 8848) - ddb_sess.login(DDBLoader.ddb_config['username'], DDBLoader.ddb_config['password']) - ddb_sess.upload({df_table_name : df}) - ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( - dbPath = DDBLoader.ddb_path, - partitioned_table_name = type_name + DDBLoader.ddb_partition_table_suffix, - df_table_name = df_table_name - )) + def create_ddb_partition_table(self, memory_table_name, partition_table_name): + # TODO: 现在只做一个日频行情数据表,今后可能考虑把基本面数据也迁移过来 + # 由于日频行情数据的表结构相对简单,所以直接把表结构写在这里代码里即可 + # 搬迁数据的时候需要考虑按照逐个股票来搬迁,以免造成对内存的巨大压力 + self.ddb_sess.run(""" + // 确保删除原表 + if (existsTable("{ddb_daily_path}", "{partition_table_name}")) {{ + dropTable({ddb_daily_dbname}, "{partition_table_name}"); + }} + + // 然后根据内存表的结构,创建持久化的分区表 + {partition_table_name} = {ddb_daily_dbname}.createPartitionedTable( + table = {memory_table_name}, + tableName = "{partition_table_name}", + partitionColumns = `code, + sortColumns = `code`m_nDate, + compressMethods = {{m_nDate:"delta"}} + ); + """.format( + ddb_daily_path = self.ddb_path, + ddb_daily_dbname = self.ddb_dbname, + memory_table_name = memory_table_name, + partition_table_name = partition_table_name, + )) + + def create_ddb_memory_table(self, memory_table_name, capacity): + self.ddb_sess.run(""" + // 先创建一个空的内存表用来表征结构,如果无需插入数据,capacity可以设为10 + {memory_table_name} = table({capacity}:0, {col_names}, [{col_types}]); + """.format( + memory_table_name = memory_table_name, + capacity = capacity, + col_names = '`' + '`'.join(self.daily_kline_cols), + col_types = ', '.join(self.daily_kline_col_types) + )) -def make_symbol(series): - return series.astype('int32').astype('str')\ - .apply(str.zfill, args=(6,))\ - .apply(lambda code : \ - code + '.SH' if code[0] == '6' \ - else code + '.SZ') + def dump_daily_kline_to_ddb(self): + # 先创建一个分区表,然后再逐个股票的数据插入 + # 1. 需要额外控制在插入第一个股票数据的时候创建分区表比较麻烦 + # 2. python程序中的dataframe直接上传到dolphindb内存表,不需要考虑内存表字段类型,分区表中设置好即可 -def make_date(series): - return pd.to_datetime( - series.astype(str), format='%Y%m%d') + memory_table_name = 'daily_kline_mem' + partition_table_name = 'daily_kline' + self.create_ddb_memory_table(memory_table_name, 10) + print('Did create ddb memory table.') + pprint(self.ddb_sess.run(f"schema({memory_table_name})")) + self.create_ddb_partition_table(memory_table_name, partition_table_name) + print('Did create ddb partition table.') + pprint(self.ddb_sess.run(f"schema({partition_table_name})")) -def make_nparray(series): - return series.apply(lambda x : np.array(x)) + with self.mssql_engine.connect() as conn: + stat = "select distinct [StockID] from [StockDaily].dbo.[DailyKLine]" + stock_id_list = list(conn.execute(stat).fetchall()) + + with tqdm(stock_id_list) as pbar: + for (stock_id,) in pbar: + pbar.set_description(f"Will work on {stock_id}") + #pbar.set_description(f"Will fetch all data of {stock_id} from SQL Server") + stat = """ + select * from [StockDaily].dbo.[DailyKLine] + where StockID='{stock_id}' + """.format( + stock_id = stock_id + ) + row_list = list(conn.execute(stat).fetchall()) + num_rows = len(row_list) + #pbar.set_description(f"Will work on dumping job on {stock_id} of len {num_rows}") + df = pd.DataFrame(row_list) + df['date'] = DDBLoader.make_date(df['date']) + df['StockID'] = DDBLoader.tscode_to_windcode(df['StockID']) + self.ddb_sess.upload({memory_table_name : df}) + #print('Did upload dataframe to ddb.') + #pprint(self.ddb_sess.run(f"schema({memory_table_name})")) + #break + self.ddb_sess.run(f"{partition_table_name}.tableInsert({memory_table_name})") -def make_time(series): - s_hr = series // 10000000 * 3600000 - s_min = series % 10000000 // 100000 * 60000 - s_sec = series % 100000 // 1000 - s_ms = series % 1000 - return pd.to_timedelta(s_hr + s_min + s_sec + s_ms, unit='ms') -class DDBLoader(object): +class DDBHFTLoader(DDBLoader): """ 0. 从sql-server中读取calendar数据,并创建成员变量df_calendar,df_calendar可以保存在本地pickle作为缓存 |- `def make_calendar_df(self) -> df_calendar` @@ -162,18 +547,6 @@ class DDBLoader(object): 13 : 'INT', } - mssql_config = { - 'host' : '192.168.1.7', - 'username' : 'sa', - 'password' : 'passw0rd!' - } - - ddb_config = { - 'host' : '192.168.1.7', - 'username' : 'admin', - 'password' : '123456' - } - # this value may be used by factor makers, which may loop through code partitions num_code_partition = 50 @@ -182,18 +555,6 @@ class DDBLoader(object): ddb_dump_journal_fname = 'ddb_dump_journal.csv' - def __init__(self): - self.mssql_engine = sa.create_engine( - "mssql+pyodbc://{username}:{password}@{host}/master?driver=ODBC+Driver+18+for+SQL+Server".format(**self.mssql_config), - connect_args = { - "TrustServerCertificate": "yes" - }, echo=False - ) - - self.ddb_sess = ddb.session(self.ddb_config['host'], 8848) - self.ddb_sess.login(self.ddb_config['username'], self.ddb_config['password']) - - def init_ddb_database(self, df_calendar): """ 1. 创建ddb_database @@ -490,6 +851,7 @@ class DDBLoader(object): print("Will create new Pool object, but this is not encourage for large batch work.") pool = Pool(self.num_worker) + # 在单个股票内部,对不同日期进行并行处理,对内存使用较为友好,不需要同时载入多个股票海量的全历史数据 with tqdm(total=num_rows, leave=False) as sub_pbar: for _ in pool.imap_unordered( functools.partial( @@ -505,18 +867,85 @@ class DDBLoader(object): self.dump_journal_writer.flush() + @staticmethod + def make_stock_daily_df(blob, type_name, stock_id): + """ + 用于做多进程录入ddb的函数 + """ + blob = gzip.decompress(blob) + dataArray = eval(f"ProtoBuffEntitys.{type_name}Message_pb2.{type_name}Array()") + dataArray.ParseFromString(blob) + + data_dict_list = [ + {field.name : val for field, val in entry.ListFields()} + for entry in dataArray.dataArray + ] + + array_type_list = [ + field.name + for field, val in dataArray.dataArray[0].ListFields() + if isinstance(field.default_value, list) + ] + #pprint(array_type_list) + + df = pd.DataFrame(data_dict_list) + #df['code'] = make_symbol(df['code']) + df['code'] = stock_id + df['m_nDate'] = make_date(df['m_nDate']) + df['m_nTime'] = df['m_nDate'] + make_time(df['m_nTime']) + for field_name in array_type_list: + df[field_name] = make_nparray(df[field_name]) + + #print(f"Did create ddb table for dataframe of shape {df.shape}") + # self.make_table_skeleton(type_name, df.shape[0]) + return df + + + @staticmethod + def dump_stock_daily_to_ddb(row, type_name, stock_id): + """ + 用于做多进程录入ddb的函数 + """ + df_table_name = type_name + df = make_stock_daily_df(row[2], type_name, stock_id) + + ddb_sess = ddb.session(DDBLoader.ddb_config['host'], 8848) + ddb_sess.login(DDBLoader.ddb_config['username'], DDBLoader.ddb_config['password']) + + ddb_sess.upload({df_table_name : df}) + ddb_sess.run("tableInsert(loadTable('{dbPath}', `{partitioned_table_name}), {df_table_name})".format( + dbPath = DDBLoader.ddb_path, + partitioned_table_name = type_name + DDBLoader.ddb_partition_table_suffix, + df_table_name = df_table_name + )) + + + def main(): - loader = DDBLoader() - df_calendar = loader.make_calendar_df() - loader.init_ddb_database(df_calendar) - print('Did finish init_ddb_database') + # PIT基本面数据 + loader = DDBPITLoader() + loader.create_ddb_database() + #loader.create_ddb_partition_tables() + loader.dump_pit_to_ddb() + + # 日频行情数据 + #loader = DDBDailyLoader() + #loader.load_ddb_database() + #loader.dump_daily_kline_to_ddb() + + + # 高频数据 + #df_calendar = loader.make_calendar_df() + + #loader.init_ddb_database(df_calendar) + #print('Did finish init_ddb_database') #loader.load_ddb_database() #print('Did load ddb database') - loader.init_ddb_table_data(df_calendar) - print('Did finish init_table_data') + #loader.init_ddb_table_data(df_calendar) + #print('Did finish init_table_data') if __name__ == '__main__': diff --git a/ddb.ipynb b/ddb.ipynb index 5cea5a3..95ce37c 100644 --- a/ddb.ipynb +++ b/ddb.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 66, "id": "5d0f471e-682e-43cc-abdb-7e52f3bbd707", "metadata": {}, "outputs": [], @@ -23,30 +23,30 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 67, "id": "ed17fd0b-9b36-47e4-9ab6-11459a3621fb", "metadata": {}, "outputs": [], "source": [ "# backup(backup_path, sql_obj, force, parallel)\n", "code = \"\"\"\n", - " backup('/data/dolphindb/backup/', , true, true)\n", "\"\"\"" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 68, "id": "8b7dae3d-aef1-4c50-92b2-460d4fea0a96", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0" + "114350" ] }, - "execution_count": 10, + "execution_count": 68, "metadata": {}, "output_type": "execute_result" } @@ -55,6 +55,27 @@ "sess.run(code)" ] }, + { + "cell_type": "code", + "execution_count": 69, + "id": "bf3bc38e-74cb-4549-bdca-6a3d2a601488", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2287.0" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "114350 / 50" + ] + }, { "cell_type": "code", "execution_count": 6, diff --git a/mssql.ipynb b/mssql.ipynb index faeda4b..8301f19 100644 --- a/mssql.ipynb +++ b/mssql.ipynb @@ -2,12 +2,12 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ - " import sqlalchemy as sa\n", - " engine = sa.create_engine(\n", + "import sqlalchemy as sa\n", + "engine = sa.create_engine(\n", " 'mssql+pyodbc://sa:passw0rd!@192.168.1.7/master?driver=ODBC+Driver+18+for+SQL+Server',\n", " connect_args = {\n", " \"TrustServerCertificate\": \"yes\"\n", @@ -792,6 +792,1151 @@ "date_list[:10]" ] }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "with engine.connect() as conn:\n", + " stat = \"select * from [StockDaily].dbo.[DailyKLine] where StockID='NE430047'\"\n", + " row_list = list(conn.execute(stat).fetchall())\n", + " df = pd.DataFrame(row_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StockIDdateopenhighlowclosevolamountcjbsyclosePctChgIsZtIsDtIsSTIsGoDelistFloatSharesMarketValuesfactor
0NE4300472015030926.5027.4026.5026.9840000.01079070.00026.481.888218000022504096.06.071605e+081.000000
1NE4300472015031027.5027.5027.5027.509000.0247500.00026.981.927354000022504096.06.188626e+081.000000
2NE4300472015031127.4027.6027.4027.6029000.0797310.00027.500.363636000022504096.06.211130e+081.000000
3NE4300472015031327.6028.0027.5027.8031000.0861900.00027.600.724638000022504096.06.256139e+081.000000
4NE4300472015031628.0028.8028.0028.00110000.03099050.00027.800.719424000022504096.06.301147e+081.000000
.........................................................
1072NE4300472022080410.7011.3310.6911.03702842.07824240.64010.683.2771540000149259448.01.646332e+095.414924
1073NE4300472022080511.0311.3610.7011.14458649.05037450.46011.030.9972800000149259448.01.662750e+095.414924
1074NE4300472022080811.1711.1710.9110.99208995.02290471.20011.14-1.3464990000149259448.01.640361e+095.414924
1075NE4300472022080910.8811.2210.8811.06294810.03273892.85010.990.6369430000149259448.01.650809e+095.414924
1076NE4300472022081010.9111.1110.9110.95236886.02596216.69011.06-0.9945750000149259448.01.634391e+095.414924
\n", + "

1077 rows × 18 columns

\n", + "
" + ], + "text/plain": [ + " StockID date open high low close vol amount \\\n", + "0 NE430047 20150309 26.50 27.40 26.50 26.98 40000.0 1079070.00 \n", + "1 NE430047 20150310 27.50 27.50 27.50 27.50 9000.0 247500.00 \n", + "2 NE430047 20150311 27.40 27.60 27.40 27.60 29000.0 797310.00 \n", + "3 NE430047 20150313 27.60 28.00 27.50 27.80 31000.0 861900.00 \n", + "4 NE430047 20150316 28.00 28.80 28.00 28.00 110000.0 3099050.00 \n", + "... ... ... ... ... ... ... ... ... \n", + "1072 NE430047 20220804 10.70 11.33 10.69 11.03 702842.0 7824240.64 \n", + "1073 NE430047 20220805 11.03 11.36 10.70 11.14 458649.0 5037450.46 \n", + "1074 NE430047 20220808 11.17 11.17 10.91 10.99 208995.0 2290471.20 \n", + "1075 NE430047 20220809 10.88 11.22 10.88 11.06 294810.0 3273892.85 \n", + "1076 NE430047 20220810 10.91 11.11 10.91 10.95 236886.0 2596216.69 \n", + "\n", + " cjbs yclose PctChg IsZt IsDt IsST IsGoDelist FloatShares \\\n", + "0 0 26.48 1.888218 0 0 0 0 22504096.0 \n", + "1 0 26.98 1.927354 0 0 0 0 22504096.0 \n", + "2 0 27.50 0.363636 0 0 0 0 22504096.0 \n", + "3 0 27.60 0.724638 0 0 0 0 22504096.0 \n", + "4 0 27.80 0.719424 0 0 0 0 22504096.0 \n", + "... ... ... ... ... ... ... ... ... \n", + "1072 0 10.68 3.277154 0 0 0 0 149259448.0 \n", + "1073 0 11.03 0.997280 0 0 0 0 149259448.0 \n", + "1074 0 11.14 -1.346499 0 0 0 0 149259448.0 \n", + "1075 0 10.99 0.636943 0 0 0 0 149259448.0 \n", + "1076 0 11.06 -0.994575 0 0 0 0 149259448.0 \n", + "\n", + " MarketValues factor \n", + "0 6.071605e+08 1.000000 \n", + "1 6.188626e+08 1.000000 \n", + "2 6.211130e+08 1.000000 \n", + "3 6.256139e+08 1.000000 \n", + "4 6.301147e+08 1.000000 \n", + "... ... ... \n", + "1072 1.646332e+09 5.414924 \n", + "1073 1.662750e+09 5.414924 \n", + "1074 1.640361e+09 5.414924 \n", + "1075 1.650809e+09 5.414924 \n", + "1076 1.634391e+09 5.414924 \n", + "\n", + "[1077 rows x 18 columns]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import sqlalchemy as sa\n", + "\n", + "engine = sa.create_engine(\n", + " 'mssql+pyodbc://sa:xn.123@192.168.1.91/tr_statement?driver=ODBC+Driver+18+for+SQL+Server',\n", + " connect_args = {\n", + " \"TrustServerCertificate\": \"yes\"\n", + " }, echo=False)\n", + "\n", + "with engine.connect() as conn:\n", + " stat = \"\"\"exec sp_columns CBS_AFTER_ADJ \"\"\"\n", + " row_list = list(conn.execute(stat).fetchall())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "14\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "with engine.connect() as conn:\n", + " stat = \"\"\"select * from DIV_WIND where WIND_CODE='000001.SZ' \"\"\"\n", + " row_list = list(conn.execute(stat).fetchall())\n", + " print(len(row_list))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "df2 = pd.DataFrame(row_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
WIND_CODEIntCodeReportPeriodAppearAtDateEQY_RECORD_DTEX_DTDVD_PAYOUT_DTS_DIV_PRELANDATES_DIV_SMTGDATEDVD_ANN_DTS_DIV_BASESHARES_DIV_BONUSRATES_DIV_CONVERSEDRATEMEMOS_DIV_PREANNDT
0000001.SZ120011231200204182002072220020723200207232002041820020523200207171.945822e+090.00.0nan0
1000001.SZ120021231200304242003092620030929200309292003042420030827200309231.945822e+090.00.0nan0
2000001.SZ120080630200808212008103020081031200810312008082120081015200810242.388795e+090.30.0nan0
3000001.SZ120120630201208162012101820121019201210192012081620120831201210125.123350e+090.00.0nan0
4000001.SZ120121231201303082013061920130620201306202013030820130523201306145.123350e+090.60.0nan0
5000001.SZ120131231201403072014061120140612201406122014030720140522201406069.520746e+090.20.2nan0
6000001.SZ120141231201503132015041020150413201504132015031320150402201504071.142489e+100.20.2nan0
7000001.SZ120151231201603102016061520160616201606162016031020160519201606081.430868e+100.20.2nan0
8000001.SZ120161231201703172017072020170721201707212017031720170629201707171.717041e+100.00.0nan0
9000001.SZ120171231201803152018071120180712201807122018031520180620201807061.717041e+100.00.0nan0
10000001.SZ120181231201903072019062520190626201906262019030720190530201906201.717041e+100.00.0nan0
11000001.SZ120191231202002142020052720200528202005282020021420200514202005221.940592e+100.00.0nan0
12000001.SZ120201231202102022021051320210514202105142021020220210408202105071.940592e+100.00.0nan0
13000001.SZ1202112312022031000020220310001.940592e+100.00.0nan0
\n", + "
" + ], + "text/plain": [ + " WIND_CODE IntCode ReportPeriod AppearAtDate EQY_RECORD_DT EX_DT \\\n", + "0 000001.SZ 1 20011231 20020418 20020722 20020723 \n", + "1 000001.SZ 1 20021231 20030424 20030926 20030929 \n", + "2 000001.SZ 1 20080630 20080821 20081030 20081031 \n", + "3 000001.SZ 1 20120630 20120816 20121018 20121019 \n", + "4 000001.SZ 1 20121231 20130308 20130619 20130620 \n", + "5 000001.SZ 1 20131231 20140307 20140611 20140612 \n", + "6 000001.SZ 1 20141231 20150313 20150410 20150413 \n", + "7 000001.SZ 1 20151231 20160310 20160615 20160616 \n", + "8 000001.SZ 1 20161231 20170317 20170720 20170721 \n", + "9 000001.SZ 1 20171231 20180315 20180711 20180712 \n", + "10 000001.SZ 1 20181231 20190307 20190625 20190626 \n", + "11 000001.SZ 1 20191231 20200214 20200527 20200528 \n", + "12 000001.SZ 1 20201231 20210202 20210513 20210514 \n", + "13 000001.SZ 1 20211231 20220310 0 0 \n", + "\n", + " DVD_PAYOUT_DT S_DIV_PRELANDATE S_DIV_SMTGDATE DVD_ANN_DT \\\n", + "0 20020723 20020418 20020523 20020717 \n", + "1 20030929 20030424 20030827 20030923 \n", + "2 20081031 20080821 20081015 20081024 \n", + "3 20121019 20120816 20120831 20121012 \n", + "4 20130620 20130308 20130523 20130614 \n", + "5 20140612 20140307 20140522 20140606 \n", + "6 20150413 20150313 20150402 20150407 \n", + "7 20160616 20160310 20160519 20160608 \n", + "8 20170721 20170317 20170629 20170717 \n", + "9 20180712 20180315 20180620 20180706 \n", + "10 20190626 20190307 20190530 20190620 \n", + "11 20200528 20200214 20200514 20200522 \n", + "12 20210514 20210202 20210408 20210507 \n", + "13 0 20220310 0 0 \n", + "\n", + " S_DIV_BASESHARE S_DIV_BONUSRATE S_DIV_CONVERSEDRATE MEMO S_DIV_PREANNDT \n", + "0 1.945822e+09 0.0 0.0 nan 0 \n", + "1 1.945822e+09 0.0 0.0 nan 0 \n", + "2 2.388795e+09 0.3 0.0 nan 0 \n", + "3 5.123350e+09 0.0 0.0 nan 0 \n", + "4 5.123350e+09 0.6 0.0 nan 0 \n", + "5 9.520746e+09 0.2 0.2 nan 0 \n", + "6 1.142489e+10 0.2 0.2 nan 0 \n", + "7 1.430868e+10 0.2 0.2 nan 0 \n", + "8 1.717041e+10 0.0 0.0 nan 0 \n", + "9 1.717041e+10 0.0 0.0 nan 0 \n", + "10 1.717041e+10 0.0 0.0 nan 0 \n", + "11 1.940592e+10 0.0 0.0 nan 0 \n", + "12 1.940592e+10 0.0 0.0 nan 0 \n", + "13 1.940592e+10 0.0 0.0 nan 0 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "df2.loc[df2['EQY_RECORD_DT'] == 0, 'EQY_RECORD_DT'] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "NaT" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(np.nan)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
WIND_CODEIntCodeReportPeriodAppearAtDateEQY_RECORD_DTEX_DTDVD_PAYOUT_DTS_DIV_PRELANDATES_DIV_SMTGDATEDVD_ANN_DTS_DIV_BASESHARES_DIV_BONUSRATES_DIV_CONVERSEDRATEMEMOS_DIV_PREANNDT
0000001.SZ1200112312002041820020722.020020723200207232002041820020523200207171.945822e+090.00.0nan0
1000001.SZ1200212312003042420030926.020030929200309292003042420030827200309231.945822e+090.00.0nan0
2000001.SZ1200806302008082120081030.020081031200810312008082120081015200810242.388795e+090.30.0nan0
3000001.SZ1201206302012081620121018.020121019201210192012081620120831201210125.123350e+090.00.0nan0
4000001.SZ1201212312013030820130619.020130620201306202013030820130523201306145.123350e+090.60.0nan0
5000001.SZ1201312312014030720140611.020140612201406122014030720140522201406069.520746e+090.20.2nan0
6000001.SZ1201412312015031320150410.020150413201504132015031320150402201504071.142489e+100.20.2nan0
7000001.SZ1201512312016031020160615.020160616201606162016031020160519201606081.430868e+100.20.2nan0
8000001.SZ1201612312017031720170720.020170721201707212017031720170629201707171.717041e+100.00.0nan0
9000001.SZ1201712312018031520180711.020180712201807122018031520180620201807061.717041e+100.00.0nan0
10000001.SZ1201812312019030720190625.020190626201906262019030720190530201906201.717041e+100.00.0nan0
11000001.SZ1201912312020021420200527.020200528202005282020021420200514202005221.940592e+100.00.0nan0
12000001.SZ1202012312021020220210513.020210514202105142021020220210408202105071.940592e+100.00.0nan0
13000001.SZ12021123120220310NaN0020220310001.940592e+100.00.0nan0
\n", + "
" + ], + "text/plain": [ + " WIND_CODE IntCode ReportPeriod AppearAtDate EQY_RECORD_DT EX_DT \\\n", + "0 000001.SZ 1 20011231 20020418 20020722.0 20020723 \n", + "1 000001.SZ 1 20021231 20030424 20030926.0 20030929 \n", + "2 000001.SZ 1 20080630 20080821 20081030.0 20081031 \n", + "3 000001.SZ 1 20120630 20120816 20121018.0 20121019 \n", + "4 000001.SZ 1 20121231 20130308 20130619.0 20130620 \n", + "5 000001.SZ 1 20131231 20140307 20140611.0 20140612 \n", + "6 000001.SZ 1 20141231 20150313 20150410.0 20150413 \n", + "7 000001.SZ 1 20151231 20160310 20160615.0 20160616 \n", + "8 000001.SZ 1 20161231 20170317 20170720.0 20170721 \n", + "9 000001.SZ 1 20171231 20180315 20180711.0 20180712 \n", + "10 000001.SZ 1 20181231 20190307 20190625.0 20190626 \n", + "11 000001.SZ 1 20191231 20200214 20200527.0 20200528 \n", + "12 000001.SZ 1 20201231 20210202 20210513.0 20210514 \n", + "13 000001.SZ 1 20211231 20220310 NaN 0 \n", + "\n", + " DVD_PAYOUT_DT S_DIV_PRELANDATE S_DIV_SMTGDATE DVD_ANN_DT \\\n", + "0 20020723 20020418 20020523 20020717 \n", + "1 20030929 20030424 20030827 20030923 \n", + "2 20081031 20080821 20081015 20081024 \n", + "3 20121019 20120816 20120831 20121012 \n", + "4 20130620 20130308 20130523 20130614 \n", + "5 20140612 20140307 20140522 20140606 \n", + "6 20150413 20150313 20150402 20150407 \n", + "7 20160616 20160310 20160519 20160608 \n", + "8 20170721 20170317 20170629 20170717 \n", + "9 20180712 20180315 20180620 20180706 \n", + "10 20190626 20190307 20190530 20190620 \n", + "11 20200528 20200214 20200514 20200522 \n", + "12 20210514 20210202 20210408 20210507 \n", + "13 0 20220310 0 0 \n", + "\n", + " S_DIV_BASESHARE S_DIV_BONUSRATE S_DIV_CONVERSEDRATE MEMO S_DIV_PREANNDT \n", + "0 1.945822e+09 0.0 0.0 nan 0 \n", + "1 1.945822e+09 0.0 0.0 nan 0 \n", + "2 2.388795e+09 0.3 0.0 nan 0 \n", + "3 5.123350e+09 0.0 0.0 nan 0 \n", + "4 5.123350e+09 0.6 0.0 nan 0 \n", + "5 9.520746e+09 0.2 0.2 nan 0 \n", + "6 1.142489e+10 0.2 0.2 nan 0 \n", + "7 1.430868e+10 0.2 0.2 nan 0 \n", + "8 1.717041e+10 0.0 0.0 nan 0 \n", + "9 1.717041e+10 0.0 0.0 nan 0 \n", + "10 1.717041e+10 0.0 0.0 nan 0 \n", + "11 1.940592e+10 0.0 0.0 nan 0 \n", + "12 1.940592e+10 0.0 0.0 nan 0 \n", + "13 1.940592e+10 0.0 0.0 nan 0 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2" + ] + }, { "cell_type": "code", "execution_count": null,