You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1975 lines
64 KiB

2 years ago
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
2 years ago
"metadata": {},
"outputs": [],
"source": [
"import sqlalchemy as sa\n",
"engine = sa.create_engine(\n",
2 years ago
" 'mssql+pyodbc://sa:passw0rd!@192.168.1.7/master?driver=ODBC+Driver+18+for+SQL+Server',\n",
" connect_args = {\n",
" \"TrustServerCertificate\": \"yes\"\n",
" }, echo=False)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"with engine.connect() as conn:\n",
" stat = \"select distinct S_INFO_WINDCODE, TRADE_DT from Level2BytesKLine.dbo.KLine\"\n",
" rs = conn.execute(stat)\n",
" stock_date_list = [(stock_name, date) for stock_name, date in rs.fetchall()]"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"stock_list, trade_list = zip(*stock_date_list)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4843"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(set(stock_list))"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'blob' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/guofu/Workspaces/dolphin-dev/mssql.ipynb Cell 3\u001b[0m in \u001b[0;36m<cell line: 4>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2247756f66752d5043227d/home/guofu/Workspaces/dolphin-dev/mssql.ipynb#W2sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mgzip\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2247756f66752d5043227d/home/guofu/Workspaces/dolphin-dev/mssql.ipynb#W2sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mProtoBuffEntitys\u001b[39;00m \u001b[39mimport\u001b[39;00m TranseMessage_pb2\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2247756f66752d5043227d/home/guofu/Workspaces/dolphin-dev/mssql.ipynb#W2sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m f_all \u001b[39m=\u001b[39m gzip\u001b[39m.\u001b[39mdecompress(blob)\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2247756f66752d5043227d/home/guofu/Workspaces/dolphin-dev/mssql.ipynb#W2sdnNjb2RlLXJlbW90ZQ%3D%3D?line=4'>5</a>\u001b[0m dataArray \u001b[39m=\u001b[39m TranseMessage_pb2\u001b[39m.\u001b[39mTranseArray()\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a2247756f66752d5043227d/home/guofu/Workspaces/dolphin-dev/mssql.ipynb#W2sdnNjb2RlLXJlbW90ZQ%3D%3D?line=5'>6</a>\u001b[0m dataArray\u001b[39m.\u001b[39mParseFromString(f_all)\n",
"\u001b[0;31mNameError\u001b[0m: name 'blob' is not defined"
]
}
],
"source": [
"import gzip\n",
"from ProtoBuffEntitys import TranseMessage_pb2\n",
"\n",
"f_all = gzip.decompress(blob)\n",
"dataArray = TranseMessage_pb2.TranseArray()\n",
"dataArray.ParseFromString(f_all)\n",
"\n",
"print(dataArray.dataArray)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"code 600843\n",
"m_nDate 20130104\n",
"m_nTime 92506510\n",
"m_nIndex 0\n",
"m_chFunctionCode 0\n",
"m_chOrderKind 0\n",
"m_chBSFlag 83\n",
"m_nTradePrice 6.8\n",
"m_nTradeVolume 200\n",
"m_nAskOrder 0\n",
"m_nBidOrder 0\n"
]
}
],
"source": [
"for item in dataArray.dataArray:\n",
" fields = item.ListFields()\n",
" for desc, val in fields:\n",
" print(desc.name, val)\n",
" break\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'sqlalchemy.engine.row.LegacyRow'>\n"
]
}
],
"source": [
"with engine.connect() as conn:\n",
" stat = \"select top 1 * from Level2BytesTickQue.dbo.TickQue\"\n",
" rs = conn.execute(stat)\n",
"\n",
" for row in rs.fetchall():\n",
" print(type(row))\n",
" blob = row[2]\n"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1499694"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import gzip\n",
"from ProtoBuffEntitys import TickQueueMessage_pb2\n",
"\n",
"f_all = gzip.decompress(blob)\n",
"dataArray = TickQueueMessage_pb2.TickQueueArray()\n",
"dataArray.ParseFromString(f_all)"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"<class 'google.protobuf.pyext._message.RepeatedScalarContainer'>\n",
"[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
]
}
],
"source": [
"import numpy as np\n",
"print(dataArray.dataArray[0].m_nABVolume)\n",
"print(type(dataArray.dataArray[0].m_nABVolume))\n",
"print(np.array(dataArray.dataArray[0].m_nABVolume))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import urllib\n",
"\n",
"server = 'serverName\\instanceName,port' # to specify an alternate port\n",
"database = 'mydb' \n",
"username = 'myusername' \n",
"password = 'mypassword'\n",
"\n",
"params = urllib.parse.quote_plus(\"'DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password\")\n",
"\n",
"engine = sa.ceate_engine(\"mssql+pyodbc:///?odbc_connect=%s\" % params)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"from ProtoBuffEntitys import KLineMessage_pb2, OrderMessage_pb2, TickMessage_pb2, TickQueueMessage_pb2, TranseMessage_pb2"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"HFT_TYPE_LIST = [\n",
" 'KLine', 'Order', 'Tick', 'TickQueue', 'Transe'\n",
"]\n",
"PROTOBUFF_NAME_LIST = [f\"{name}Message_pb2\" for name in PROTOBUFF_NAME_LIST]\n",
"\n",
"import importlib\n",
"\n",
"PROTOBUFF_MODULE_LIST = [importlib.import_module(f\".{name}\", package='ProtoBuffEntitys') for name in PROTOBUFF_MODULE_LIST]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (99226286.py, line 1)",
"output_type": "error",
"traceback": [
"\u001b[0;36m Input \u001b[0;32mIn [35]\u001b[0;36m\u001b[0m\n\u001b[0;31m from ProtoBuffEntitys import KLineMessage_pb2.KLineArray.KLineData.DESCRIPTOR\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"import ProtoBuffEntitys.KLineMessage_pb2.KLineArray.KLineData.DESCRIPTOR "
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"'code'\n",
"'m_nDate'\n",
"'m_nTime'\n",
"'m_nOpen'\n",
"'m_nHigh'\n",
"'m_nLow'\n",
"'m_nClose'\n",
"'m_iVolume'\n",
"'m_iTurover'\n",
"'m_nMatchItems'\n"
]
}
],
"source": [
"from pprint import pprint\n",
"\n",
"for field in KLineMessage_pb2.KLineArray.KLineData.DESCRIPTOR.fields:\n",
" pprint(field.name)\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\n db_ts_stock.createPartitionedTable(\\n table = t, \\n tableName = abd, \\n partitionColumns = `code`m_nDate, \\n sortColumns = `code`m_nDate`m_nTime,\\n compressMethods = {m_nDate:\"delta\", m_nTime:\"delta\"}\\n )\\n'"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
" db_ts_stock.createPartitionedTable(\n",
" table = t, \n",
" tableName = {hft_type_name}, \n",
" partitionColumns = `code`m_nDate, \n",
" sortColumns = `code`m_nDate`m_nTime,\n",
" compressMethods = {{m_nDate:\"delta\", m_nTime:\"delta\"}}\n",
" )\n",
"\"\"\".format(\n",
" hft_type_name = \"abd\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('600519.SH', '20210531', b'\\x1f\\x8b\\x08\\x00\\x00\\x00\\x00\\x00\\x04\\x00\\x8c\\xddy\\\\^Yz\\x1f\\xf8tuUw\\xed%$\\xb4K\\x15\\x12\\x97+\\xd7V}\\x9cr\\xb7\\xa7\\xd3\\xd3^\\xd2\\xe5`\\xbb\\xd3\\xe9\\xe9\\xd8\\ ... (1454851 characters truncated) ... 17\\xb2\\x87\\xe4\\xce\\x94T\\xe8\\xe2O\\xc8\\xe6\\x10\\x1c=\\r\\xd9+\\x02\\xcc\\x9c!\\xc56\\xdb\\xef\\xb3uf\\xe5,d\\x9f\\xc8\\xd1\\xbc$\\xe7~)\\xe4\\xff\\x075\\xea@$i\\x1c\\x1a\\x00')\n"
]
}
],
"source": [
"with engine.connect() as conn:\n",
" stat = \"select * from Level2BytesOrder.dbo.[Order] where S_INFO_WINDCODE='600519.SH'\"\n",
" rs = conn.execute(stat)\n",
" for row in rs.fetchall():\n",
" print(row)\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 4,
2 years ago
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(2298,)\n"
2 years ago
]
}
],
"source": [
"with engine.connect() as conn:\n",
" stat = \"select count(*) from Level2BytesTick.dbo.Tick where S_INFO_WINDCODE='002182.SZ'\"\n",
2 years ago
" rs = conn.execute(stat)\n",
" for row in rs.fetchall():\n",
" print(row)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('NonClusteredIndex-Order', 'nonclustered located on PRIMARY', 'S_INFO_WINDCODE, TRADE_DT')\n"
]
}
],
"source": [
"engine = sa.create_engine(\n",
" 'mssql+pyodbc://sa:passw0rd!@192.168.1.7/Level2BytesOrder?driver=ODBC+Driver+18+for+SQL+Server',\n",
" connect_args = {\n",
" \"TrustServerCertificate\": \"yes\"\n",
" }, echo=False)\n",
" \n",
"with engine.connect() as conn:\n",
" stat = \"EXEC sp_helpindex 'Level2BytesOrder.dbo.[Order]'\"\n",
" rs = conn.execute(stat)\n",
" for row in rs.fetchall():\n",
" print(row)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('master', 1, datetime.datetime(2003, 4, 8, 9, 13, 36, 390000))\n",
"('tempdb', 2, datetime.datetime(2022, 8, 3, 4, 47, 56, 987000))\n",
"('model', 3, datetime.datetime(2003, 4, 8, 9, 13, 36, 390000))\n",
"('msdb', 4, datetime.datetime(2022, 5, 29, 16, 33, 42, 60000))\n",
"('test', 5, datetime.datetime(2022, 8, 3, 4, 52, 46, 450000))\n",
"('Level1BytesIndexFutureKLine', 6, datetime.datetime(2022, 8, 3, 5, 2, 17, 660000))\n",
"('Level2BytesConvBondKLine', 7, datetime.datetime(2022, 8, 3, 5, 2, 30, 837000))\n",
"('Level2BytesConvBondOrder', 8, datetime.datetime(2022, 8, 3, 5, 2, 39, 987000))\n",
"('Level2BytesConvBondTick', 9, datetime.datetime(2022, 8, 3, 5, 2, 54, 587000))\n",
"('Level2BytesConvBondTickQue', 10, datetime.datetime(2022, 8, 3, 5, 3, 58, 270000))\n",
"('Level2BytesConvBondTranse', 11, datetime.datetime(2022, 8, 3, 5, 4, 14, 500000))\n",
"('Level2BytesETFKLine', 12, datetime.datetime(2022, 8, 3, 5, 4, 27, 270000))\n",
"('Level2BytesETFOrder', 13, datetime.datetime(2022, 8, 3, 5, 4, 43, 457000))\n",
"('Level2BytesTick', 14, datetime.datetime(2022, 8, 3, 8, 51, 40, 633000))\n",
"('Level2BytesTickQue', 15, datetime.datetime(2022, 8, 3, 8, 51, 58, 650000))\n",
"('Level2BytesTranse', 16, datetime.datetime(2022, 8, 3, 8, 52, 14, 103000))\n",
"('Level2BytesOrder', 17, datetime.datetime(2022, 8, 3, 8, 52, 27, 740000))\n",
"('Level2BytesKLine', 18, datetime.datetime(2022, 8, 3, 8, 52, 44, 610000))\n",
"('Level2BytesIndexTick', 19, datetime.datetime(2022, 8, 3, 9, 22, 36, 850000))\n",
"('Level2BytesIndexKLine', 20, datetime.datetime(2022, 8, 3, 9, 22, 57, 527000))\n",
"('Level2BytesETFTranse', 21, datetime.datetime(2022, 8, 3, 9, 23, 53, 713000))\n",
"('Level2BytesETFTickQue', 22, datetime.datetime(2022, 8, 3, 9, 24, 9, 87000))\n",
"('Level2BytesETFTick', 23, datetime.datetime(2022, 8, 3, 9, 24, 26, 267000))\n"
]
}
],
"source": [
"stat = \"\"\"SELECT name, database_id, create_date FROM sys.databases; \n",
"\"\"\"\n",
"\n",
"with engine.connect() as conn:\n",
" rs = conn.execute(stat)\n",
" for row in rs.fetchall():\n",
" print(row)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"a = [1,2,3]\n",
"b = {x : (x + 1 if x != 3 else x + 2) for x in a}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"\n",
"with engine.connect() as conn:\n",
" stat = \"select distinct S_INFO_WINDCODE, TRADE_DT from Level2BytesKline.dbo.KLine\"\n",
" rs = conn.execute(stat)\n",
" stock_date_list = [(stock_name, date) for stock_name, date in rs.fetchall()]\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>code</th>\n",
" <th>m_nDate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>000001.SZ</td>\n",
" <td>20130104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>000001.SZ</td>\n",
" <td>20130107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>000001.SZ</td>\n",
" <td>20130108</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>000001.SZ</td>\n",
" <td>20130109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>000001.SZ</td>\n",
" <td>20130110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7379201</th>\n",
" <td>689009.SH</td>\n",
" <td>20220704</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7379202</th>\n",
" <td>689009.SH</td>\n",
" <td>20220705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7379203</th>\n",
" <td>689009.SH</td>\n",
" <td>20220706</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7379204</th>\n",
" <td>689009.SH</td>\n",
" <td>20220707</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7379205</th>\n",
" <td>689009.SH</td>\n",
" <td>20220708</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>7379206 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" code m_nDate\n",
"0 000001.SZ 20130104\n",
"1 000001.SZ 20130107\n",
"2 000001.SZ 20130108\n",
"3 000001.SZ 20130109\n",
"4 000001.SZ 20130110\n",
"... ... ...\n",
"7379201 689009.SH 20220704\n",
"7379202 689009.SH 20220705\n",
"7379203 689009.SH 20220706\n",
"7379204 689009.SH 20220707\n",
"7379205 689009.SH 20220708\n",
"\n",
"[7379206 rows x 2 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(stock_date_list, columns=['code', 'm_nDate'])"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv('ddb_dump_journal.csv')"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>type_name</th>\n",
" <th>stock_id</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [type_name, stock_id, status]\n",
"Index: []"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_259767/4140820348.py:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
" df = df.append({'type_name':'KLine', 'stock_id':'000001.SZ', 'status':'OK'},ignore_index=True)\n"
]
}
],
"source": [
"df = df.append({'type_name':'KLine', 'stock_id':'000001.SZ', 'status':'OK'},ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th>type_name</th>\n",
" <th>stock_id</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>KLine</th>\n",
" <th>000001.SZ</th>\n",
" <th>OK</th>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: [(KLine, 000001.SZ, OK)]"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.set_index(['type_name', 'stock_id', 'status'], inplace=True)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Series([], Name: (KLine, 000001.SZ, OK), dtype: float64)"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[('KLine', '000001.SZ', 'OK')]"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"('KLine', '000001.SZ', 'OK') in df.index"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"import pickle as pkl\n",
"with open('tmp.pkl', 'rb') as fin:\n",
" stock_list, date_list = pkl.load(fin)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('20130104',\n",
" '20130107',\n",
" '20130108',\n",
" '20130109',\n",
" '20130110',\n",
" '20130111',\n",
" '20130114',\n",
" '20130115',\n",
" '20130116',\n",
" '20130117')"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"date_list[:10]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"with engine.connect() as conn:\n",
" stat = \"select * from [StockDaily].dbo.[DailyKLine] where StockID='NE430047'\"\n",
" row_list = list(conn.execute(stat).fetchall())\n",
" df = pd.DataFrame(row_list)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>StockID</th>\n",
" <th>date</th>\n",
" <th>open</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>close</th>\n",
" <th>vol</th>\n",
" <th>amount</th>\n",
" <th>cjbs</th>\n",
" <th>yclose</th>\n",
" <th>PctChg</th>\n",
" <th>IsZt</th>\n",
" <th>IsDt</th>\n",
" <th>IsST</th>\n",
" <th>IsGoDelist</th>\n",
" <th>FloatShares</th>\n",
" <th>MarketValues</th>\n",
" <th>factor</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NE430047</td>\n",
" <td>20150309</td>\n",
" <td>26.50</td>\n",
" <td>27.40</td>\n",
" <td>26.50</td>\n",
" <td>26.98</td>\n",
" <td>40000.0</td>\n",
" <td>1079070.00</td>\n",
" <td>0</td>\n",
" <td>26.48</td>\n",
" <td>1.888218</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>22504096.0</td>\n",
" <td>6.071605e+08</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NE430047</td>\n",
" <td>20150310</td>\n",
" <td>27.50</td>\n",
" <td>27.50</td>\n",
" <td>27.50</td>\n",
" <td>27.50</td>\n",
" <td>9000.0</td>\n",
" <td>247500.00</td>\n",
" <td>0</td>\n",
" <td>26.98</td>\n",
" <td>1.927354</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>22504096.0</td>\n",
" <td>6.188626e+08</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NE430047</td>\n",
" <td>20150311</td>\n",
" <td>27.40</td>\n",
" <td>27.60</td>\n",
" <td>27.40</td>\n",
" <td>27.60</td>\n",
" <td>29000.0</td>\n",
" <td>797310.00</td>\n",
" <td>0</td>\n",
" <td>27.50</td>\n",
" <td>0.363636</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>22504096.0</td>\n",
" <td>6.211130e+08</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NE430047</td>\n",
" <td>20150313</td>\n",
" <td>27.60</td>\n",
" <td>28.00</td>\n",
" <td>27.50</td>\n",
" <td>27.80</td>\n",
" <td>31000.0</td>\n",
" <td>861900.00</td>\n",
" <td>0</td>\n",
" <td>27.60</td>\n",
" <td>0.724638</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>22504096.0</td>\n",
" <td>6.256139e+08</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NE430047</td>\n",
" <td>20150316</td>\n",
" <td>28.00</td>\n",
" <td>28.80</td>\n",
" <td>28.00</td>\n",
" <td>28.00</td>\n",
" <td>110000.0</td>\n",
" <td>3099050.00</td>\n",
" <td>0</td>\n",
" <td>27.80</td>\n",
" <td>0.719424</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>22504096.0</td>\n",
" <td>6.301147e+08</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1072</th>\n",
" <td>NE430047</td>\n",
" <td>20220804</td>\n",
" <td>10.70</td>\n",
" <td>11.33</td>\n",
" <td>10.69</td>\n",
" <td>11.03</td>\n",
" <td>702842.0</td>\n",
" <td>7824240.64</td>\n",
" <td>0</td>\n",
" <td>10.68</td>\n",
" <td>3.277154</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>149259448.0</td>\n",
" <td>1.646332e+09</td>\n",
" <td>5.414924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1073</th>\n",
" <td>NE430047</td>\n",
" <td>20220805</td>\n",
" <td>11.03</td>\n",
" <td>11.36</td>\n",
" <td>10.70</td>\n",
" <td>11.14</td>\n",
" <td>458649.0</td>\n",
" <td>5037450.46</td>\n",
" <td>0</td>\n",
" <td>11.03</td>\n",
" <td>0.997280</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>149259448.0</td>\n",
" <td>1.662750e+09</td>\n",
" <td>5.414924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1074</th>\n",
" <td>NE430047</td>\n",
" <td>20220808</td>\n",
" <td>11.17</td>\n",
" <td>11.17</td>\n",
" <td>10.91</td>\n",
" <td>10.99</td>\n",
" <td>208995.0</td>\n",
" <td>2290471.20</td>\n",
" <td>0</td>\n",
" <td>11.14</td>\n",
" <td>-1.346499</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>149259448.0</td>\n",
" <td>1.640361e+09</td>\n",
" <td>5.414924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1075</th>\n",
" <td>NE430047</td>\n",
" <td>20220809</td>\n",
" <td>10.88</td>\n",
" <td>11.22</td>\n",
" <td>10.88</td>\n",
" <td>11.06</td>\n",
" <td>294810.0</td>\n",
" <td>3273892.85</td>\n",
" <td>0</td>\n",
" <td>10.99</td>\n",
" <td>0.636943</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>149259448.0</td>\n",
" <td>1.650809e+09</td>\n",
" <td>5.414924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1076</th>\n",
" <td>NE430047</td>\n",
" <td>20220810</td>\n",
" <td>10.91</td>\n",
" <td>11.11</td>\n",
" <td>10.91</td>\n",
" <td>10.95</td>\n",
" <td>236886.0</td>\n",
" <td>2596216.69</td>\n",
" <td>0</td>\n",
" <td>11.06</td>\n",
" <td>-0.994575</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>149259448.0</td>\n",
" <td>1.634391e+09</td>\n",
" <td>5.414924</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1077 rows × 18 columns</p>\n",
"</div>"
],
"text/plain": [
" StockID date open high low close vol amount \\\n",
"0 NE430047 20150309 26.50 27.40 26.50 26.98 40000.0 1079070.00 \n",
"1 NE430047 20150310 27.50 27.50 27.50 27.50 9000.0 247500.00 \n",
"2 NE430047 20150311 27.40 27.60 27.40 27.60 29000.0 797310.00 \n",
"3 NE430047 20150313 27.60 28.00 27.50 27.80 31000.0 861900.00 \n",
"4 NE430047 20150316 28.00 28.80 28.00 28.00 110000.0 3099050.00 \n",
"... ... ... ... ... ... ... ... ... \n",
"1072 NE430047 20220804 10.70 11.33 10.69 11.03 702842.0 7824240.64 \n",
"1073 NE430047 20220805 11.03 11.36 10.70 11.14 458649.0 5037450.46 \n",
"1074 NE430047 20220808 11.17 11.17 10.91 10.99 208995.0 2290471.20 \n",
"1075 NE430047 20220809 10.88 11.22 10.88 11.06 294810.0 3273892.85 \n",
"1076 NE430047 20220810 10.91 11.11 10.91 10.95 236886.0 2596216.69 \n",
"\n",
" cjbs yclose PctChg IsZt IsDt IsST IsGoDelist FloatShares \\\n",
"0 0 26.48 1.888218 0 0 0 0 22504096.0 \n",
"1 0 26.98 1.927354 0 0 0 0 22504096.0 \n",
"2 0 27.50 0.363636 0 0 0 0 22504096.0 \n",
"3 0 27.60 0.724638 0 0 0 0 22504096.0 \n",
"4 0 27.80 0.719424 0 0 0 0 22504096.0 \n",
"... ... ... ... ... ... ... ... ... \n",
"1072 0 10.68 3.277154 0 0 0 0 149259448.0 \n",
"1073 0 11.03 0.997280 0 0 0 0 149259448.0 \n",
"1074 0 11.14 -1.346499 0 0 0 0 149259448.0 \n",
"1075 0 10.99 0.636943 0 0 0 0 149259448.0 \n",
"1076 0 11.06 -0.994575 0 0 0 0 149259448.0 \n",
"\n",
" MarketValues factor \n",
"0 6.071605e+08 1.000000 \n",
"1 6.188626e+08 1.000000 \n",
"2 6.211130e+08 1.000000 \n",
"3 6.256139e+08 1.000000 \n",
"4 6.301147e+08 1.000000 \n",
"... ... ... \n",
"1072 1.646332e+09 5.414924 \n",
"1073 1.662750e+09 5.414924 \n",
"1074 1.640361e+09 5.414924 \n",
"1075 1.650809e+09 5.414924 \n",
"1076 1.634391e+09 5.414924 \n",
"\n",
"[1077 rows x 18 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import sqlalchemy as sa\n",
"\n",
"engine = sa.create_engine(\n",
" 'mssql+pyodbc://sa:xn.123@192.168.1.91/tr_statement?driver=ODBC+Driver+18+for+SQL+Server',\n",
" connect_args = {\n",
" \"TrustServerCertificate\": \"yes\"\n",
" }, echo=False)\n",
"\n",
"with engine.connect() as conn:\n",
" stat = \"\"\"exec sp_columns CBS_AFTER_ADJ \"\"\"\n",
" row_list = list(conn.execute(stat).fetchall())\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"14\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"with engine.connect() as conn:\n",
" stat = \"\"\"select * from DIV_WIND where WIND_CODE='000001.SZ' \"\"\"\n",
" row_list = list(conn.execute(stat).fetchall())\n",
" print(len(row_list))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df2 = pd.DataFrame(row_list)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>WIND_CODE</th>\n",
" <th>IntCode</th>\n",
" <th>ReportPeriod</th>\n",
" <th>AppearAtDate</th>\n",
" <th>EQY_RECORD_DT</th>\n",
" <th>EX_DT</th>\n",
" <th>DVD_PAYOUT_DT</th>\n",
" <th>S_DIV_PRELANDATE</th>\n",
" <th>S_DIV_SMTGDATE</th>\n",
" <th>DVD_ANN_DT</th>\n",
" <th>S_DIV_BASESHARE</th>\n",
" <th>S_DIV_BONUSRATE</th>\n",
" <th>S_DIV_CONVERSEDRATE</th>\n",
" <th>MEMO</th>\n",
" <th>S_DIV_PREANNDT</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20011231</td>\n",
" <td>20020418</td>\n",
" <td>20020722</td>\n",
" <td>20020723</td>\n",
" <td>20020723</td>\n",
" <td>20020418</td>\n",
" <td>20020523</td>\n",
" <td>20020717</td>\n",
" <td>1.945822e+09</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20021231</td>\n",
" <td>20030424</td>\n",
" <td>20030926</td>\n",
" <td>20030929</td>\n",
" <td>20030929</td>\n",
" <td>20030424</td>\n",
" <td>20030827</td>\n",
" <td>20030923</td>\n",
" <td>1.945822e+09</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20080630</td>\n",
" <td>20080821</td>\n",
" <td>20081030</td>\n",
" <td>20081031</td>\n",
" <td>20081031</td>\n",
" <td>20080821</td>\n",
" <td>20081015</td>\n",
" <td>20081024</td>\n",
" <td>2.388795e+09</td>\n",
" <td>0.3</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20120630</td>\n",
" <td>20120816</td>\n",
" <td>20121018</td>\n",
" <td>20121019</td>\n",
" <td>20121019</td>\n",
" <td>20120816</td>\n",
" <td>20120831</td>\n",
" <td>20121012</td>\n",
" <td>5.123350e+09</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20121231</td>\n",
" <td>20130308</td>\n",
" <td>20130619</td>\n",
" <td>20130620</td>\n",
" <td>20130620</td>\n",
" <td>20130308</td>\n",
" <td>20130523</td>\n",
" <td>20130614</td>\n",
" <td>5.123350e+09</td>\n",
" <td>0.6</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20131231</td>\n",
" <td>20140307</td>\n",
" <td>20140611</td>\n",
" <td>20140612</td>\n",
" <td>20140612</td>\n",
" <td>20140307</td>\n",
" <td>20140522</td>\n",
" <td>20140606</td>\n",
" <td>9.520746e+09</td>\n",
" <td>0.2</td>\n",
" <td>0.2</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20141231</td>\n",
" <td>20150313</td>\n",
" <td>20150410</td>\n",
" <td>20150413</td>\n",
" <td>20150413</td>\n",
" <td>20150313</td>\n",
" <td>20150402</td>\n",
" <td>20150407</td>\n",
" <td>1.142489e+10</td>\n",
" <td>0.2</td>\n",
" <td>0.2</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20151231</td>\n",
" <td>20160310</td>\n",
" <td>20160615</td>\n",
" <td>20160616</td>\n",
" <td>20160616</td>\n",
" <td>20160310</td>\n",
" <td>20160519</td>\n",
" <td>20160608</td>\n",
" <td>1.430868e+10</td>\n",
" <td>0.2</td>\n",
" <td>0.2</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20161231</td>\n",
" <td>20170317</td>\n",
" <td>20170720</td>\n",
" <td>20170721</td>\n",
" <td>20170721</td>\n",
" <td>20170317</td>\n",
" <td>20170629</td>\n",
" <td>20170717</td>\n",
" <td>1.717041e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20171231</td>\n",
" <td>20180315</td>\n",
" <td>20180711</td>\n",
" <td>20180712</td>\n",
" <td>20180712</td>\n",
" <td>20180315</td>\n",
" <td>20180620</td>\n",
" <td>20180706</td>\n",
" <td>1.717041e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20181231</td>\n",
" <td>20190307</td>\n",
" <td>20190625</td>\n",
" <td>20190626</td>\n",
" <td>20190626</td>\n",
" <td>20190307</td>\n",
" <td>20190530</td>\n",
" <td>20190620</td>\n",
" <td>1.717041e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20191231</td>\n",
" <td>20200214</td>\n",
" <td>20200527</td>\n",
" <td>20200528</td>\n",
" <td>20200528</td>\n",
" <td>20200214</td>\n",
" <td>20200514</td>\n",
" <td>20200522</td>\n",
" <td>1.940592e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20201231</td>\n",
" <td>20210202</td>\n",
" <td>20210513</td>\n",
" <td>20210514</td>\n",
" <td>20210514</td>\n",
" <td>20210202</td>\n",
" <td>20210408</td>\n",
" <td>20210507</td>\n",
" <td>1.940592e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20211231</td>\n",
" <td>20220310</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>20220310</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.940592e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" WIND_CODE IntCode ReportPeriod AppearAtDate EQY_RECORD_DT EX_DT \\\n",
"0 000001.SZ 1 20011231 20020418 20020722 20020723 \n",
"1 000001.SZ 1 20021231 20030424 20030926 20030929 \n",
"2 000001.SZ 1 20080630 20080821 20081030 20081031 \n",
"3 000001.SZ 1 20120630 20120816 20121018 20121019 \n",
"4 000001.SZ 1 20121231 20130308 20130619 20130620 \n",
"5 000001.SZ 1 20131231 20140307 20140611 20140612 \n",
"6 000001.SZ 1 20141231 20150313 20150410 20150413 \n",
"7 000001.SZ 1 20151231 20160310 20160615 20160616 \n",
"8 000001.SZ 1 20161231 20170317 20170720 20170721 \n",
"9 000001.SZ 1 20171231 20180315 20180711 20180712 \n",
"10 000001.SZ 1 20181231 20190307 20190625 20190626 \n",
"11 000001.SZ 1 20191231 20200214 20200527 20200528 \n",
"12 000001.SZ 1 20201231 20210202 20210513 20210514 \n",
"13 000001.SZ 1 20211231 20220310 0 0 \n",
"\n",
" DVD_PAYOUT_DT S_DIV_PRELANDATE S_DIV_SMTGDATE DVD_ANN_DT \\\n",
"0 20020723 20020418 20020523 20020717 \n",
"1 20030929 20030424 20030827 20030923 \n",
"2 20081031 20080821 20081015 20081024 \n",
"3 20121019 20120816 20120831 20121012 \n",
"4 20130620 20130308 20130523 20130614 \n",
"5 20140612 20140307 20140522 20140606 \n",
"6 20150413 20150313 20150402 20150407 \n",
"7 20160616 20160310 20160519 20160608 \n",
"8 20170721 20170317 20170629 20170717 \n",
"9 20180712 20180315 20180620 20180706 \n",
"10 20190626 20190307 20190530 20190620 \n",
"11 20200528 20200214 20200514 20200522 \n",
"12 20210514 20210202 20210408 20210507 \n",
"13 0 20220310 0 0 \n",
"\n",
" S_DIV_BASESHARE S_DIV_BONUSRATE S_DIV_CONVERSEDRATE MEMO S_DIV_PREANNDT \n",
"0 1.945822e+09 0.0 0.0 nan 0 \n",
"1 1.945822e+09 0.0 0.0 nan 0 \n",
"2 2.388795e+09 0.3 0.0 nan 0 \n",
"3 5.123350e+09 0.0 0.0 nan 0 \n",
"4 5.123350e+09 0.6 0.0 nan 0 \n",
"5 9.520746e+09 0.2 0.2 nan 0 \n",
"6 1.142489e+10 0.2 0.2 nan 0 \n",
"7 1.430868e+10 0.2 0.2 nan 0 \n",
"8 1.717041e+10 0.0 0.0 nan 0 \n",
"9 1.717041e+10 0.0 0.0 nan 0 \n",
"10 1.717041e+10 0.0 0.0 nan 0 \n",
"11 1.940592e+10 0.0 0.0 nan 0 \n",
"12 1.940592e+10 0.0 0.0 nan 0 \n",
"13 1.940592e+10 0.0 0.0 nan 0 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"df2.loc[df2['EQY_RECORD_DT'] == 0, 'EQY_RECORD_DT'] = np.nan"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"NaT"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.to_datetime(np.nan)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>WIND_CODE</th>\n",
" <th>IntCode</th>\n",
" <th>ReportPeriod</th>\n",
" <th>AppearAtDate</th>\n",
" <th>EQY_RECORD_DT</th>\n",
" <th>EX_DT</th>\n",
" <th>DVD_PAYOUT_DT</th>\n",
" <th>S_DIV_PRELANDATE</th>\n",
" <th>S_DIV_SMTGDATE</th>\n",
" <th>DVD_ANN_DT</th>\n",
" <th>S_DIV_BASESHARE</th>\n",
" <th>S_DIV_BONUSRATE</th>\n",
" <th>S_DIV_CONVERSEDRATE</th>\n",
" <th>MEMO</th>\n",
" <th>S_DIV_PREANNDT</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20011231</td>\n",
" <td>20020418</td>\n",
" <td>20020722.0</td>\n",
" <td>20020723</td>\n",
" <td>20020723</td>\n",
" <td>20020418</td>\n",
" <td>20020523</td>\n",
" <td>20020717</td>\n",
" <td>1.945822e+09</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20021231</td>\n",
" <td>20030424</td>\n",
" <td>20030926.0</td>\n",
" <td>20030929</td>\n",
" <td>20030929</td>\n",
" <td>20030424</td>\n",
" <td>20030827</td>\n",
" <td>20030923</td>\n",
" <td>1.945822e+09</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20080630</td>\n",
" <td>20080821</td>\n",
" <td>20081030.0</td>\n",
" <td>20081031</td>\n",
" <td>20081031</td>\n",
" <td>20080821</td>\n",
" <td>20081015</td>\n",
" <td>20081024</td>\n",
" <td>2.388795e+09</td>\n",
" <td>0.3</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20120630</td>\n",
" <td>20120816</td>\n",
" <td>20121018.0</td>\n",
" <td>20121019</td>\n",
" <td>20121019</td>\n",
" <td>20120816</td>\n",
" <td>20120831</td>\n",
" <td>20121012</td>\n",
" <td>5.123350e+09</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20121231</td>\n",
" <td>20130308</td>\n",
" <td>20130619.0</td>\n",
" <td>20130620</td>\n",
" <td>20130620</td>\n",
" <td>20130308</td>\n",
" <td>20130523</td>\n",
" <td>20130614</td>\n",
" <td>5.123350e+09</td>\n",
" <td>0.6</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20131231</td>\n",
" <td>20140307</td>\n",
" <td>20140611.0</td>\n",
" <td>20140612</td>\n",
" <td>20140612</td>\n",
" <td>20140307</td>\n",
" <td>20140522</td>\n",
" <td>20140606</td>\n",
" <td>9.520746e+09</td>\n",
" <td>0.2</td>\n",
" <td>0.2</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20141231</td>\n",
" <td>20150313</td>\n",
" <td>20150410.0</td>\n",
" <td>20150413</td>\n",
" <td>20150413</td>\n",
" <td>20150313</td>\n",
" <td>20150402</td>\n",
" <td>20150407</td>\n",
" <td>1.142489e+10</td>\n",
" <td>0.2</td>\n",
" <td>0.2</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20151231</td>\n",
" <td>20160310</td>\n",
" <td>20160615.0</td>\n",
" <td>20160616</td>\n",
" <td>20160616</td>\n",
" <td>20160310</td>\n",
" <td>20160519</td>\n",
" <td>20160608</td>\n",
" <td>1.430868e+10</td>\n",
" <td>0.2</td>\n",
" <td>0.2</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20161231</td>\n",
" <td>20170317</td>\n",
" <td>20170720.0</td>\n",
" <td>20170721</td>\n",
" <td>20170721</td>\n",
" <td>20170317</td>\n",
" <td>20170629</td>\n",
" <td>20170717</td>\n",
" <td>1.717041e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20171231</td>\n",
" <td>20180315</td>\n",
" <td>20180711.0</td>\n",
" <td>20180712</td>\n",
" <td>20180712</td>\n",
" <td>20180315</td>\n",
" <td>20180620</td>\n",
" <td>20180706</td>\n",
" <td>1.717041e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20181231</td>\n",
" <td>20190307</td>\n",
" <td>20190625.0</td>\n",
" <td>20190626</td>\n",
" <td>20190626</td>\n",
" <td>20190307</td>\n",
" <td>20190530</td>\n",
" <td>20190620</td>\n",
" <td>1.717041e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20191231</td>\n",
" <td>20200214</td>\n",
" <td>20200527.0</td>\n",
" <td>20200528</td>\n",
" <td>20200528</td>\n",
" <td>20200214</td>\n",
" <td>20200514</td>\n",
" <td>20200522</td>\n",
" <td>1.940592e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20201231</td>\n",
" <td>20210202</td>\n",
" <td>20210513.0</td>\n",
" <td>20210514</td>\n",
" <td>20210514</td>\n",
" <td>20210202</td>\n",
" <td>20210408</td>\n",
" <td>20210507</td>\n",
" <td>1.940592e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>000001.SZ</td>\n",
" <td>1</td>\n",
" <td>20211231</td>\n",
" <td>20220310</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>20220310</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.940592e+10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>nan</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" WIND_CODE IntCode ReportPeriod AppearAtDate EQY_RECORD_DT EX_DT \\\n",
"0 000001.SZ 1 20011231 20020418 20020722.0 20020723 \n",
"1 000001.SZ 1 20021231 20030424 20030926.0 20030929 \n",
"2 000001.SZ 1 20080630 20080821 20081030.0 20081031 \n",
"3 000001.SZ 1 20120630 20120816 20121018.0 20121019 \n",
"4 000001.SZ 1 20121231 20130308 20130619.0 20130620 \n",
"5 000001.SZ 1 20131231 20140307 20140611.0 20140612 \n",
"6 000001.SZ 1 20141231 20150313 20150410.0 20150413 \n",
"7 000001.SZ 1 20151231 20160310 20160615.0 20160616 \n",
"8 000001.SZ 1 20161231 20170317 20170720.0 20170721 \n",
"9 000001.SZ 1 20171231 20180315 20180711.0 20180712 \n",
"10 000001.SZ 1 20181231 20190307 20190625.0 20190626 \n",
"11 000001.SZ 1 20191231 20200214 20200527.0 20200528 \n",
"12 000001.SZ 1 20201231 20210202 20210513.0 20210514 \n",
"13 000001.SZ 1 20211231 20220310 NaN 0 \n",
"\n",
" DVD_PAYOUT_DT S_DIV_PRELANDATE S_DIV_SMTGDATE DVD_ANN_DT \\\n",
"0 20020723 20020418 20020523 20020717 \n",
"1 20030929 20030424 20030827 20030923 \n",
"2 20081031 20080821 20081015 20081024 \n",
"3 20121019 20120816 20120831 20121012 \n",
"4 20130620 20130308 20130523 20130614 \n",
"5 20140612 20140307 20140522 20140606 \n",
"6 20150413 20150313 20150402 20150407 \n",
"7 20160616 20160310 20160519 20160608 \n",
"8 20170721 20170317 20170629 20170717 \n",
"9 20180712 20180315 20180620 20180706 \n",
"10 20190626 20190307 20190530 20190620 \n",
"11 20200528 20200214 20200514 20200522 \n",
"12 20210514 20210202 20210408 20210507 \n",
"13 0 20220310 0 0 \n",
"\n",
" S_DIV_BASESHARE S_DIV_BONUSRATE S_DIV_CONVERSEDRATE MEMO S_DIV_PREANNDT \n",
"0 1.945822e+09 0.0 0.0 nan 0 \n",
"1 1.945822e+09 0.0 0.0 nan 0 \n",
"2 2.388795e+09 0.3 0.0 nan 0 \n",
"3 5.123350e+09 0.0 0.0 nan 0 \n",
"4 5.123350e+09 0.6 0.0 nan 0 \n",
"5 9.520746e+09 0.2 0.2 nan 0 \n",
"6 1.142489e+10 0.2 0.2 nan 0 \n",
"7 1.430868e+10 0.2 0.2 nan 0 \n",
"8 1.717041e+10 0.0 0.0 nan 0 \n",
"9 1.717041e+10 0.0 0.0 nan 0 \n",
"10 1.717041e+10 0.0 0.0 nan 0 \n",
"11 1.940592e+10 0.0 0.0 nan 0 \n",
"12 1.940592e+10 0.0 0.0 nan 0 \n",
"13 1.940592e+10 0.0 0.0 nan 0 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
2 years ago
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"vscode": {
"interpreter": {
"hash": "5a0c795ff324b912f12ad95b94c9d776ccc7a75bdf6a126a4f44b3067472979e"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}