Updates

3 years ago · a64a112b46
parent 083c66abb1
commit a64a112b46
11 changed files with 9456 additions and 89 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,6 +5,7 @@
 .DS_Store
 *.DS_Store
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
--- a/ipynb/ddb.ipynb
+++ b/ipynb/ddb.ipynb
--- a/ipynb/ddb_daily_factor.ipynb
+++ b/ipynb/ddb_daily_factor.ipynb
--- a/ipynb/ddb_index.ipynb
+++ b/ipynb/ddb_index.ipynb
--- a/ipynb/mssql.ipynb
+++ b/ipynb/mssql.ipynb
@ -28,16 +28,118 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "with engine.connect() as conn:\n",
-    "    stat = \"select [StockID], [date] from [StockDaily].[dbo].[DailyKLine] group by [StockID], [date]\"\n",
+    "    stat = \"select [StockID], [date] from [IndexDaily].[dbo].[DailyKLine] group by [StockID], [date]\"\n",
-    "    rs = conn.execute(statd)\n",
+    "    rs = conn.execute(stat)\n",
    "    stock_date_list = [(stock_name, date) for stock_name, date in rs.fetchall()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame(stock_date_list, columns=['code', 'm_nDate'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>code</th>\n",
       "      <th>m_nDate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">CSI000140</th>\n",
       "      <th>20120912</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20120913</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20120914</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20120917</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20120918</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">SZ399995</th>\n",
       "      <th>20220829</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20220830</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20220831</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20220901</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20220902</th>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>403101 rows × 0 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: []\n",
       "Index: [(CSI000140, 20120912), (CSI000140, 20120913), (CSI000140, 20120914), (CSI000140, 20120917), (CSI000140, 20120918), (CSI000140, 20120919), (CSI000140, 20120920), (CSI000140, 20120921), (CSI000140, 20120924), (CSI000140, 20120925), (CSI000140, 20120926), (CSI000140, 20120927), (CSI000140, 20120928), (CSI000140, 20121008), (CSI000140, 20121009), (CSI000140, 20121010), (CSI000140, 20121011), (CSI000140, 20121012), (CSI000140, 20121015), (CSI000140, 20121016), (CSI000140, 20121017), (CSI000140, 20121018), (CSI000140, 20121019), (CSI000140, 20121022), (CSI000140, 20121023), (CSI000140, 20121024), (CSI000140, 20121025), (CSI000140, 20121026), (CSI000140, 20121029), (CSI000140, 20121030), (CSI000140, 20121031), (CSI000140, 20121101), (CSI000140, 20121102), (CSI000140, 20121105), (CSI000140, 20121106), (CSI000140, 20121107), (CSI000140, 20121108), (CSI000140, 20121109), (CSI000140, 20121112), (CSI000140, 20121113), (CSI000140, 20121114), (CSI000140, 20121115), (CSI000140, 20121116), (CSI000140, 20121119), (CSI000140, 20121120), (CSI000140, 20121121), (CSI000140, 20121122), (CSI000140, 20121123), (CSI000140, 20121126), (CSI000140, 20121127), (CSI000140, 20121128), (CSI000140, 20121129), (CSI000140, 20121130), (CSI000140, 20121203), (CSI000140, 20121204), (CSI000140, 20121205), (CSI000140, 20121206), (CSI000140, 20121207), (CSI000140, 20121210), (CSI000140, 20121211), (CSI000140, 20121212), (CSI000140, 20121213), (CSI000140, 20121214), (CSI000140, 20121217), (CSI000140, 20121218), (CSI000140, 20121219), (CSI000140, 20121220), (CSI000140, 20121221), (CSI000140, 20121224), (CSI000140, 20121225), (CSI000140, 20121226), (CSI000140, 20121227), (CSI000140, 20121228), (CSI000140, 20121231), (CSI000140, 20130104), (CSI000140, 20130107), (CSI000140, 20130108), (CSI000140, 20130109), (CSI000140, 20130110), (CSI000140, 20130111), (CSI000140, 20130115), (CSI000140, 20130116), (CSI000140, 20130117), (CSI000140, 20130118), (CSI000140, 20130121), (CSI000140, 20130122), (CSI000140, 20130123), (CSI000140, 20130124), (CSI000140, 20130125), (CSI000140, 20130128), (CSI000140, 20130129), (CSI000140, 20130130), (CSI000140, 20130131), (CSI000140, 20130201), (CSI000140, 20130204), (CSI000140, 20130205), (CSI000140, 20130206), (CSI000140, 20130207), (CSI000140, 20130208), (CSI000140, 20130218), ...]\n",
       "\n",
       "[403101 rows x 0 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.set_index(['code', 'm_nDate']).sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
--- a/ipynb/未命名.ipynb
+++ b/ipynb/未命名.ipynb
@ -0,0 +1,173 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "118641f9-96c3-4fd3-aea2-7ecdae17492e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import dolphindb as ddb\n",
    "\n",
    "sess = ddb.session('localhost', 8848)\n",
    "sess.login('admin', '123456')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "44e3d35e-2e84-44fc-a3d2-00eaa0135460",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = sess.run(\"\"\"\n",
    "    select code from loadTable(\"dfs://daily_stock_ts\", \"daily_kline\")\n",
    "    group by code order by code asc\n",
    "\"\"\")\n",
    "\n",
    "df[\"entity_id\"] = df.index\n",
    "df.set_index(\"code\", inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "20a6066d-8efb-40fd-8a3b-3d848c8c0073",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>entity_id</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>code</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>000001.SZ</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>000002.SZ</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>000004.SZ</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>000005.SZ</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>000006.SZ</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>871970.NE</th>\n",
       "      <td>5010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>871981.NE</th>\n",
       "      <td>5011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>872925.NE</th>\n",
       "      <td>5012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>873169.NE</th>\n",
       "      <td>5013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>873223.NE</th>\n",
       "      <td>5014</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5015 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           entity_id\n",
       "code                \n",
       "000001.SZ          0\n",
       "000002.SZ          1\n",
       "000004.SZ          2\n",
       "000005.SZ          3\n",
       "000006.SZ          4\n",
       "...              ...\n",
       "871970.NE       5010\n",
       "871981.NE       5011\n",
       "872925.NE       5012\n",
       "873169.NE       5013\n",
       "873223.NE       5014\n",
       "\n",
       "[5015 rows x 1 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cc934d77-3cc1-458f-9e28-3f0715e9b87b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/src/loader/DDBDailyLoader.py
+++ b/src/loader/DDBDailyLoader.py
@ -18,8 +18,7 @@ import dolphindb as ddb
 import dolphindb.settings as keys
 import sqlalchemy as sa
-
+from .DDBLoader import DDBLoader
 import ProtoBuffEntitys
 class DDBDailyLoader(DDBLoader):
@ -45,6 +44,8 @@ class DDBDailyLoader(DDBLoader):
        'DOUBLE'
    ]
    memory_table_name = 'daily_kline_mem'
    partition_table_name = 'daily_kline'
    def create_ddb_database(self):
        # TODO: daily数据库已经在DDBDailyFactor中被创建了
@ -62,7 +63,7 @@ class DDBDailyLoader(DDBLoader):
        print('Did load database from', self.ddb_path)
-    def create_ddb_partition_table(self, memory_table_name, partition_table_name):
+    def create_ddb_partition_table(self):
        # TODO: 现在只做一个日频行情数据表，今后可能考虑把基本面数据也迁移过来
        # 由于日频行情数据的表结构相对简单，所以直接把表结构写在这里代码里即可
@ -84,37 +85,35 @@ class DDBDailyLoader(DDBLoader):
        """.format(
            ddb_daily_path = self.ddb_path,
            ddb_daily_dbname = self.ddb_dbname,
-            memory_table_name = memory_table_name,
+            memory_table_name = self.memory_table_name,
-            partition_table_name = partition_table_name,
+            partition_table_name = self.partition_table_name,
        ))
-    def create_ddb_memory_table(self, memory_table_name, capacity):
+    def create_ddb_memory_table(self, capacity):
        self.ddb_sess.run("""
            // 先创建一个空的内存表用来表征结构，如果无需插入数据，capacity可以设为10
            {memory_table_name} = table({capacity}:0, {col_names}, [{col_types}]);
        """.format(
-            memory_table_name = memory_table_name,
+            memory_table_name = self.memory_table_name,
            capacity = capacity,
            col_names =  '`' + '`'.join(self.daily_kline_cols),
            col_types = ', '.join(self.daily_kline_col_types)
        ))
-    def dump_daily_kline_to_ddb(self):
+    def dump_to_ddb(self):
        # 先创建一个分区表，然后再逐个股票的数据插入
        # 1. 需要额外控制在插入第一个股票数据的时候创建分区表比较麻烦
        # 2. python程序中的dataframe直接上传到dolphindb内存表，不需要考虑内存表字段类型，分区表中设置好即可
        memory_table_name = 'daily_kline_mem'
        partition_table_name = 'daily_kline'
-        self.create_ddb_memory_table(memory_table_name, 10)
+        self.create_ddb_memory_table(10)
        print('Did create ddb memory table.')
-        pprint(self.ddb_sess.run(f"schema({memory_table_name})"))
+        pprint(self.ddb_sess.run(f"schema({self.memory_table_name})"))
-        self.create_ddb_partition_table(memory_table_name, partition_table_name)
+        self.create_ddb_partition_table()
        print('Did create ddb partition table.')
-        pprint(self.ddb_sess.run(f"schema({partition_table_name})"))
+        pprint(self.ddb_sess.run(f"schema({self.partition_table_name})"))
        with self.mssql_engine.connect() as conn:
            stat = "select distinct [StockID] from [StockDaily].dbo.[DailyKLine]"
@ -137,24 +136,133 @@ class DDBDailyLoader(DDBLoader):
                    df = pd.DataFrame(row_list)
                    df['date'] = DDBLoader.make_date(df['date'])
                    df['StockID'] = DDBLoader.tscode_to_windcode(df['StockID'])
-                    self.ddb_sess.upload({memory_table_name : df})
+                    self.ddb_sess.upload({self.memory_table_name : df})
                    #print('Did upload dataframe to ddb.')
-                    #pprint(self.ddb_sess.run(f"schema({memory_table_name})"))
+                    #pprint(self.ddb_sess.run(f"schema({self.memory_table_name})"))
                    #break
-                    self.ddb_sess.run(f"{partition_table_name}.tableInsert({memory_table_name})")
+                    self.ddb_sess.run(f"{self.partition_table_name}.tableInsert({self.memory_table_name})")
 class DDBDailyFactorLoader(DDBDailyLoader):
    daily_kline_cols = [
        'code', 'm_nDate',
        # 4种量价配合的因子
        'trend_with_turnover', 'trend_with_amount',
        "abs_trend_with_turnover", "abs_trend_with_amount",
        # Alpha101中，量价背离的因子
        "alpha101_22"
    ]
    daily_kline_col_types = [
        'SYMBOL', 'DATE',
        'DOUBLE', 'DOUBLE',
        'DOUBLE', 'DOUBLE',
        'DOUBLE'
    ]
    memory_table_name = 'daily_factor_mem'
    partition_table_name = 'daily_factor'
    def dump_to_ddb(self):
        self.create_ddb_memory_table(10)
        print('Did create ddb memory table.')
        pprint(self.ddb_sess.run(f"schema({self.memory_table_name})"))
        self.create_ddb_partition_table()
        print('Did create ddb partition table.')
        pprint(self.ddb_sess.run(f"schema({self.partition_table_name})"))
        df_list = [
            self.alpha101_22,
            self.trend_with_amount,
        ]
        df = pd.concat(df_list, axis=1)
        df.reset_index(inplace=True)
        print('Did prepare the dataframe for insertion:')
        print(df.head())
        self.ddb_sess.upload({"tbl": df})
        self.ddb_sess.run("tableInsert(loadTable('dfs://daily_stock_ts', 'daily_factor'), tbl)")
        pprint("Did dump data to partition table.")
    @property
    def alpha101_22(self):
        sql = """
            vol_20 = select code, m_nDate, mstdp(close, 20, 9) as vol_20
                from loadTable("dfs://daily_stock_ts", "daily_kline")
                context by code;
            rank_vol_20 = select code, m_nDate, rank(vol_20, tiesMethod='average', percent=true)
                from vol_20 context by m_nDate;
            corr_5 = select code, m_nDate, mcorr(high, vol, 5) as corr_5
                from loadTable("dfs://daily_stock_ts", "daily_kline")
                context by code;
            delta_corr_5 = select code, m_nDate, mfirst(corr_5, 5) - corr_5 as delta_corr_5
                from corr_5 context by code;
            alpha101_22 = select code, m_nDate, delta_corr_5 * rank_vol_20 as alpha101_22
                from ej(rank_vol_20, delta_corr_5, `code`m_nDate);
            alpha101_22;
            """
        df = self.ddb_sess.run(sql)
        df.set_index(["code", "m_nDate"], inplace=True)
        return df
    @property
    def trend_with_amount(self):
        factor_list = [
            'trend_with_turnover', 'trend_with_amount',
            "abs_trend_with_turnover", "abs_trend_with_amount",
        ]
        factor_def = {
            "trend_with_turnover": "(rank(amount/MarketValues, tiesMethod='average', percent=true) - 0.5) * winsorize(PctChg/20, 0.05)",
            "trend_with_amount": "(rank(amount, tiesMethod='average', percent=true) - 0.5) * winsorize(PctChg/20, 0.05)",
            "abs_trend_with_turnover": "(rank(amount/MarketValues, tiesMethod='average', percent=true) - 0.5) * abs(winsorize(PctChg/20, 0.05))",
            "abs_trend_with_amount": "(rank(amount, tiesMethod='average', percent=true) - 0.5) * abs(winsorize(PctChg/20, 0.05))",
        }
        cols = ", ".join([
            f"{factor_def[factor_name]} as {factor_name}" \
                    for factor_name in factor_list
        ])
        sql = f"""
            select code, m_nDate, {cols} 
                from loadTable("dfs://daily_stock_ts", "daily_kline")
                context by m_nDate
        """
        print("factor sql for trend with amount: " + sql)
        df = self.ddb_sess.run(sql)
        df.set_index(["code", "m_nDate"], inplace=True)
        return df
 def main():
-	# TODO: 
+    # TODO: 
-	# 可以使用`Fire`库，对函数调用再做一次封装，就可以避免每次运行不同参数时候需要修改内部多处的代码。
+    # 可以使用`Fire`库，对函数调用再做一次封装，就可以避免每次运行不同参数时候需要修改内部多处的代码。
    # 日频行情数据
-    loader = DDBDailyLoader()
+    #loader = DDBDailyLoader()
    loader = DDBDailyFactorLoader()
    loader.load_ddb_database()
-    #loader.dump_daily_kline_to_ddb()
+    loader.dump_to_ddb()
 if __name__ == '__main__':
    main() 
--- a/src/loader/DDBEntityLoader.py
+++ b/src/loader/DDBEntityLoader.py
@ -0,0 +1,12 @@
 from .DDBLoader import DDBLoader
 class DDBEntityLoader(DDBLoader):
    ddb_path = "dfs://daily_stock_ts"
    ddb_dbname = "db_daily_stock_ts"
    def __init__(self, dtype, **kwargs):
        pass
--- a/src/loader/DDBIndexLoader.py
+++ b/src/loader/DDBIndexLoader.py
@ -0,0 +1,286 @@
 import pickle
 import functools
 import warnings
 from pprint import pprint
 from pathlib import Path
 from tqdm import tqdm
 from multiprocessing import Pool
 import numpy as np
 import pandas as pd
 import dolphindb as ddb
 import dolphindb.settings as keys
 import sqlalchemy as sa
 from .DDBLoader import DDBLoader
 class DDBIndexLoader(DDBLoader):
    ddb_path = "dfs://daily_stock_ts"
    ddb_dbname = "db_daily_stock_ts"
    def __init__(self, dtype, **kwargs):
        # TODO: 后续版本中，父类的构造函数里可能会增加一些设置项
        super().__init__(**kwargs)
        self.dtype = dtype
        if dtype == "concept":
            self.mem_tbl_name = "mem_idx_daily_concept"
            self.part_tbl_name ="idx_daily_concept"
        elif dtype == "kline":
            self.mem_tbl_name = "mem_idx_daily_kline"
            self.part_tbl_name ="idx_daily_kline"
        else:
            raise NotImplementedError(f"Unsupported `dtype` argument: {dtype}")
        self.make_fields()
        self.make_calendar_df()
    def make_fields(self):
        if self.dtype == "concept":
            with self.mssql_engine.connect() as conn:
                rs = conn.execute("select IndexID from [IndexInfo].[dbo].[Constituents] group by IndexID")
                self.fields = [index_id for (index_id,) in rs.fetchall()]
        elif self.dtype == "kline":
            self.fields = ['open', 'high', 'low', 'close', 'vol', 'amount', 'yclose']
    def make_calendar_df(self):
        # 这里我们使用天软日K先数据表来构造交易日历
        with self.mssql_engine.connect() as conn:
            if self.dtype == "concept":
                stat = "select [StockID], [date] from [StockDaily].[dbo].[DailyKLine] group by [StockID], [date]"
            elif self.dtype == "kline":
                stat = "select [StockID], [date] from [IndexDaily].[dbo].[DailyKLine] group by [StockID], [date]"
            else:
                raise NotImplementedError(f"Unsupported dtype: {self.dtype}")
            rs = conn.execute(stat)
            stock_date_list = [(stock_name, date) for stock_name, date in rs.fetchall()]
        self.df_calendar = pd.DataFrame(stock_date_list, columns=['code', 'm_nDate'])
        self.df_calendar['m_nDate'] = self.make_date(self.df_calendar['m_nDate'])
        self.df_calendar['code'] = self.tscode_to_windcode(self.df_calendar['code'])
        print('Did make the DataFrame for calendar')
        print(self.df_calendar.head())
    def load_ddb_database(self):
        self.ddb_sess.run("""
            {dbName} = database(directory='{dbPath}')
        """.format(
            dbName = self.ddb_dbname,
            dbPath = self.ddb_path
        ))
        print('Did load database from', self.ddb_path)
    def create_ddb_partition_table(self):
        if self.dtype == "concept":
            self._create_ddb_memory_table_concept()
        elif self.dtype == "kline":
            self._create_ddb_memory_table_kline()
        pprint(f"Did create memory table: {self.mem_tbl_name}")
        #res = self.ddb_sess.run(f"schema({mem_tbl_name}).colDefs")
        if self.ddb_sess.existsTable(self.ddb_path, self.part_tbl_name):
            pprint(f"Will drop partition table: {self.part_tbl_name}")
            self.ddb_sess.dropTable(self.ddb_path, self.part_tbl_name)
        self.ddb_sess.run("""
            {part_tbl_name} = {ddb_dbname}.createPartitionedTable(
                table = {mem_tbl_name},
                tableName = `{part_tbl_name},
                partitionColumns = `code,
                sortColumns = `code`m_nDate,
                compressMethods = {{m_nDate:"delta"}}
            )
        """.format(
            ddb_dbname      = self.ddb_dbname,
            part_tbl_name   = self.part_tbl_name,
            mem_tbl_name    = self.mem_tbl_name
        ))
    def _create_ddb_memory_table_concept(self):
        concept_list = self.fields
        col_name_list = ['code', 'm_nDate'] + concept_list
        col_type_list = ['SYMBOL', 'DATE'] + ['BOOL'] * len(concept_list)
        code = """
            {mem_tbl_name} = table(
                {capacity}:0, 
                {col_names}, 
                [{col_types}]
            );
        """.format(
            mem_tbl_name    =   self.mem_tbl_name,
            capacity        =   10,
            col_names       =   '`' + '`'.join(col_name_list),
            col_types       =   ','.join(col_type_list)
        )
        pprint(f"Will create mem table by:\n{code}")
        self.ddb_sess.run(code)
    def _create_ddb_memory_table_kline(self):
        col_name_list = ['code', 'm_nDate'] + self.fields
        col_type_list = ['SYMBOL', 'DATE'] + ['DOUBLE'] * len(self.fields)
        code = """
            {mem_tbl_name} = table(
                {capacity} : 0,
                {col_names},
                [{col_types}]
            );
        """.format(
            mem_tbl_name = self.mem_tbl_name,
            capacity = 10,
            col_names = '`' + '`'.join(col_name_list),
            col_types = ','.join(col_type_list)
        )
        pprint(f"Will create mem table by:\n{code}")
        self.ddb_sess.run(code)
    def _make_idx_daily_kline(self):
        with tqdm(self.df_calendar.groupby('code')) as pbar:
            for wind_code, df_calendar_stock in pbar:
                pbar.set_description(f"Will work on {wind_code}")
                 # 生成ts-code，用于查询Sql-Server中天软的概念板块指数
                ts_code = wind_code[-2:] + wind_code[:-3]
                df_calendar_stock.set_index(['code', 'm_nDate'], inplace=True)
                with self.mssql_engine.connect() as conn:
                    code = """
                        select 
                            {field_list}
                        from 
                            [IndexDaily].[dbo].[DailyKLine]
                        where
                            StockID='{index_id}'
                    """.format(
                        field_list = ','.join([f"[{field}]" for field in (['StockID', 'date'] + self.fields)]),
                        index_id = ts_code
                    )
                    rs = conn.execute(code)
                    row_list = rs.fetchall()
                df = pd.DataFrame(row_list, columns=['code', 'm_nDate'] + self.fields)
                df['code'] = self.tscode_to_windcode(df['code'])
                df['m_nDate'] = self.make_date(df['m_nDate'])
                df.set_index(['code', 'm_nDate'], inplace=True)
                yield wind_code, df
    def dump_idx_daily_kline_to_ddb(self):
        for idx_id, df in self._make_idx_daily_kline():
            df.reset_index(inplace=True)
            #pprint(f"Will append to partiton table: \n{df}")
            self.ddb_sess.upload({self.mem_tbl_name : df})
            self.ddb_sess.run("""
                append!(loadTable('{dbPath}', `{part_tbl_name}), {mem_tbl_name})
            """.format(
                dbPath          = self.ddb_path,
                part_tbl_name   = self.part_tbl_name,
                mem_tbl_name    = self.mem_tbl_name
            ))
    @staticmethod
    def _mark_stock2concept_onehot(df_stock2concept, concept_id, start_date, end_date):
        # 个股成为某个概念（指数）的起始日期是必定会提供的
        #   但是截止日期可能缺失，确实一般意味着当前仍然是在此概念板块中
        #   因此会通过将日期填充至最后一日来表示当前仍然在此概念板块内
        if end_date is None or end_date == 0:
            start_date = pd.to_datetime(str(start_date), format='%Y%m%d')
            df_stock2concept.loc[df_stock2concept.index.get_level_values('m_nDate') >= start_date] = True
        else:
            start_date = pd.to_datetime(str(start_date), format='%Y%m%d')
            end_date = pd.to_datetime(str(end_date), format='%Y%m%d')
            df_stock2concept.loc[
                (df_stock2concept.index.get_level_values('m_nDate') >= start_date) & 
                (df_stock2concept.index.get_level_values('m_nDate') <= end_date)
            ] = True
    def _make_stock2concept_onehot(self):
        # 从calendar中截取出与当前stock有关的日期，然后设置成index
        # 此处calendar使用的是海通高频数据构建，因此股票代码为WIND-CODE
        # 对calendar根据股票代码进行分组
        with tqdm(self.df_calendar.groupby('code')) as pbar:
            for wind_code, df_calendar_stock in pbar:
                pbar.set_description(f"Will work on {wind_code}")
                # 生成ts-code，用于查询Sql-Server中天软的概念板块指数
                ts_code = wind_code[-2:] + wind_code[:-3]
                df_calendar_stock.set_index(['code', 'm_nDate'], inplace=True)
                # 纵表转横表，`concept_list`作为列名
                df_stock2concept = pd.DataFrame(
                    False,  # one-hot横表，初始化都是0，后续根据Sql-Server的进出日期标注1
                    index=df_calendar_stock.index, 
                    columns=concept_list,
                    dtype="bool"
                )
                # 从Sql-Server中读取`stock_id`所对应的概念板块进出日期
                # 此数据是从天软指数数据中提取，因此需要使用TSCODE
                with self.mssql_engine.connect() as conn:
                    code = """
                        select 
                            SecId, IndexID, EnterDate, ExitDate 
                        from 
                            [IndexInfo].[dbo].[Constituents] 
                        where 
                            SecID='{stock_id}'
                    """.format(
                        stock_id = ts_code
                    )
                    rs = conn.execute(code)
                    row_list = rs.fetchall()
                # 从Sql-Server读取出单个股票的所有板块进出日期后，开始进行标记
                for (stock_id, concept_id, start_date, end_date) in row_list:
                    # mark the one-hot position one-by-one
                    self._mark_stock2concept_onehot(
                        df_stock2concept[concept_id], 
                        concept_id, 
                        start_date, end_date
                    )
                # yield the marked one-hot dataframe for one stock
                yield wind_code, df_stock2concept
    def dump_idx_concept_to_ddb(self):
        concept = self.fields
        for stock_id, df in self._make_stock2concept_onehot(concept_list):
            df.reset_index(inplace=True)
            self.ddb_sess.upload({self.mem_tbl_name : df})
            self.ddb_sess.run("""
                append!(loadTable('{dbPath}', `{part_tbl_name}), {mem_tbl_name})
            """.format(
                dbPath          = self.ddb_path,
                part_tbl_name   = self.part_tbl_name,
                mem_tbl_name    = self.mem_tbl_name
            ))
--- a/src/loader/DDBIndexLoaderWind.py
+++ b/src/loader/DDBIndexLoaderWind.py
@ -0,0 +1,52 @@
 from DDBIndexLoader import DDBIndexLoader
 class DDBIndexLoaderWind(DDBIndexLoader):
    def __init__(self, dtype, **kwargs):
        # TODO: 后续版本中，父类的构造函数里可能会增加一些设置项
        super().__init__(**kwargs)
        self.dtype = dtype
        if dtype == "concept":
            self.mem_tbl_name = "mem_idx_daily_concept_wind"
            self.part_tbl_name ="idx_daily_concept_wind"
        elif dtype == "kline":
            self.mem_tbl_name = "mem_idx_daily_kline_wind"
            self.part_tbl_name ="idx_daily_kline_wind"
        else:
            raise NotImplementedError(f"Unsupported `dtype` argument: {dtype}")
        self.make_fields()
        self.make_calendar_df()
    def make_fields(self):
        if self.dtype == "concept":
            with self.mssql_engine.connect() as conn:
                rs = conn.execute("select [WIND_SEC_CODE] from [IndexInfo].[dbo].[Constituents] group by IndexID")
                self.fields = [index_id for (index_id,) in rs.fetchall()]
        elif self.dtype == "kline":
            self.fields = ['open', 'high', 'low', 'close', 'vol', 'amount', 'yclose']
    def make_calendar_df(self):
        # 这里我们使用天软日K先数据表来构造交易日历
        with self.mssql_engine.connect() as conn:
            if self.dtype == "concept":
                stat = "select [StockID], [date] from [StockDaily].[dbo].[DailyKLine] group by [StockID], [date]"
            elif self.dtype == "kline":
                stat = "select [StockID], [date] from [IndexDaily].[dbo].[DailyKLine] group by [StockID], [date]"
            else:
                raise NotImplementedError(f"Unsupported dtype: {self.dtype}")
            rs = conn.execute(stat)
            stock_date_list = [(stock_name, date) for stock_name, date in rs.fetchall()]
        self.df_calendar = pd.DataFrame(stock_date_list, columns=['code', 'm_nDate'])
        self.df_calendar['m_nDate'] = self.make_date(self.df_calendar['m_nDate'])
        self.df_calendar['code'] = self.tscode_to_windcode(self.df_calendar['code'])
        print('Did make the DataFrame for calendar')
        print(self.df_calendar.head())
--- a/src/run.py
+++ b/src/run.py
@ -3,14 +3,20 @@ from loader.DDBPITLoader import DDBPITLoader
 from loader.DDBHFTLoader import DDBHFTLoader
 from loader.DDBBasicInfoLoader import DDBBasicInfoLoader
 from loader.DDBIndexLoader import DDBIndexLoader
 from loader.DDBDailyLoader import DDBDailyLoader, DDBDailyFactorLoader
 def create_index_data():
    # 板块指数数据
-    loader = DDBIndexLoader(host='192.168.1.7')
+    loader = DDBIndexLoader(dtype="kline", host='localhost')
    loader.load_ddb_database()
    #mem_tbl_name, part_tbl_name, fields = loader.create_ddb_partition_table("concept")
    #loader.dump_idx_concept_to_ddb(mem_tbl_name, part_tbl_name, concept_list)
    # 指数日K线数据
    loader.create_ddb_partition_table()
-    loader.dump_idx_concept_to_ddb()
+    loader.dump_idx_daily_kline_to_ddb()
 def create_hft_data():
@ -46,7 +52,13 @@ def create_daily_kline_data():
    # 日频行情数据
    loader = DDBDailyLoader()
    loader.load_ddb_database()
-    loader.dump_daily_kline_to_ddb()
+    loader.dump_to_ddb()
 def create_daily_factor_data():
    loader = DDBDailyFactorLoader(host="localhost")
    loader.load_ddb_database()
    loader.dump_to_ddb()
@ -54,7 +66,8 @@ def main():
 	# TODO: 
 	# 可以使用`Fire`库，对函数调用再做一次封装，就可以避免每次运行不同参数时候需要修改内部多处的代码。
-    create_index_data()
+    create_daily_factor_data()
    #create_index_data()
    #create_hft_data()
    #create_pit_data()