You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1451 lines
46 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "75629605-1f71-4f90-b46b-243052fc8a05",
"metadata": {},
"outputs": [],
"source": [
"from src.qlib_scripts.dump_bin import *"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "18a16bed-cda3-472d-86c8-35c674219c56",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "784cb0ea-1699-4261-9c4a-dd75af648b07",
"metadata": {},
"outputs": [],
"source": [
"df_tinysoft = pd.read_csv('tinysoft-data/行情数据/20210101-20220531/k-daily/NE830799.csv')"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "4b10c52d-70cc-4a4b-963d-874444b83e06",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['StockID',\n",
" 'date',\n",
" 'close',\n",
" 'vol',\n",
" 'low',\n",
" 'yclose',\n",
" 'cjbs',\n",
" 'amount',\n",
" 'high',\n",
" 'open',\n",
" 'IsST',\n",
" 'Factor',\n",
" 'PctChg',\n",
" 'FloatShares']"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_tinysoft.columns.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "841bf388-96f3-4732-8a92-c3740af96b15",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>StockID</th>\n",
" <th>date</th>\n",
" <th>close</th>\n",
" <th>vol</th>\n",
" <th>low</th>\n",
" <th>yclose</th>\n",
" <th>cjbs</th>\n",
" <th>amount</th>\n",
" <th>high</th>\n",
" <th>open</th>\n",
" <th>IsST</th>\n",
" <th>Factor</th>\n",
" <th>PctChg</th>\n",
" <th>FloatShares</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NE830799</td>\n",
" <td>20210104</td>\n",
" <td>22.98</td>\n",
" <td>208835.0</td>\n",
" <td>22.90</td>\n",
" <td>23.00</td>\n",
" <td>0.0</td>\n",
" <td>5082436.35</td>\n",
" <td>23.07</td>\n",
" <td>23.00</td>\n",
" <td>0.0</td>\n",
" <td>0.104658</td>\n",
" <td>-0.086957</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NE830799</td>\n",
" <td>20210105</td>\n",
" <td>23.12</td>\n",
" <td>59729.0</td>\n",
" <td>22.91</td>\n",
" <td>22.98</td>\n",
" <td>0.0</td>\n",
" <td>1374072.92</td>\n",
" <td>23.13</td>\n",
" <td>23.13</td>\n",
" <td>0.0</td>\n",
" <td>0.104658</td>\n",
" <td>0.609225</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NE830799</td>\n",
" <td>20210106</td>\n",
" <td>23.29</td>\n",
" <td>32594.0</td>\n",
" <td>23.00</td>\n",
" <td>23.12</td>\n",
" <td>0.0</td>\n",
" <td>753744.84</td>\n",
" <td>23.31</td>\n",
" <td>23.03</td>\n",
" <td>0.0</td>\n",
" <td>0.104658</td>\n",
" <td>0.735294</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NE830799</td>\n",
" <td>20210107</td>\n",
" <td>22.90</td>\n",
" <td>91805.0</td>\n",
" <td>22.90</td>\n",
" <td>23.29</td>\n",
" <td>0.0</td>\n",
" <td>2119783.36</td>\n",
" <td>23.30</td>\n",
" <td>23.30</td>\n",
" <td>0.0</td>\n",
" <td>0.104658</td>\n",
" <td>-1.674538</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NE830799</td>\n",
" <td>20210108</td>\n",
" <td>23.33</td>\n",
" <td>202341.0</td>\n",
" <td>22.56</td>\n",
" <td>22.90</td>\n",
" <td>0.0</td>\n",
" <td>4662152.94</td>\n",
" <td>23.94</td>\n",
" <td>23.00</td>\n",
" <td>0.0</td>\n",
" <td>0.104658</td>\n",
" <td>1.877729</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>334</th>\n",
" <td>NE830799</td>\n",
" <td>20220525</td>\n",
" <td>11.33</td>\n",
" <td>48637.0</td>\n",
" <td>11.12</td>\n",
" <td>11.20</td>\n",
" <td>0.0</td>\n",
" <td>548426.46</td>\n",
" <td>11.61</td>\n",
" <td>11.61</td>\n",
" <td>0.0</td>\n",
" <td>0.054229</td>\n",
" <td>1.160714</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>335</th>\n",
" <td>NE830799</td>\n",
" <td>20220526</td>\n",
" <td>11.55</td>\n",
" <td>103856.0</td>\n",
" <td>11.30</td>\n",
" <td>11.33</td>\n",
" <td>0.0</td>\n",
" <td>1200104.83</td>\n",
" <td>11.65</td>\n",
" <td>11.62</td>\n",
" <td>0.0</td>\n",
" <td>0.054229</td>\n",
" <td>1.941748</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>336</th>\n",
" <td>NE830799</td>\n",
" <td>20220527</td>\n",
" <td>11.78</td>\n",
" <td>119210.0</td>\n",
" <td>11.53</td>\n",
" <td>11.55</td>\n",
" <td>0.0</td>\n",
" <td>1402552.58</td>\n",
" <td>11.88</td>\n",
" <td>11.68</td>\n",
" <td>0.0</td>\n",
" <td>0.054229</td>\n",
" <td>1.991342</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>337</th>\n",
" <td>NE830799</td>\n",
" <td>20220530</td>\n",
" <td>11.74</td>\n",
" <td>122101.0</td>\n",
" <td>11.44</td>\n",
" <td>11.78</td>\n",
" <td>0.0</td>\n",
" <td>1429026.66</td>\n",
" <td>11.91</td>\n",
" <td>11.91</td>\n",
" <td>0.0</td>\n",
" <td>0.054229</td>\n",
" <td>-0.339559</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338</th>\n",
" <td>NE830799</td>\n",
" <td>20220531</td>\n",
" <td>11.79</td>\n",
" <td>116001.0</td>\n",
" <td>11.70</td>\n",
" <td>11.74</td>\n",
" <td>0.0</td>\n",
" <td>1366318.64</td>\n",
" <td>11.84</td>\n",
" <td>11.74</td>\n",
" <td>0.0</td>\n",
" <td>0.054229</td>\n",
" <td>0.425894</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>339 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" StockID date close vol low yclose cjbs amount \\\n",
"0 NE830799 20210104 22.98 208835.0 22.90 23.00 0.0 5082436.35 \n",
"1 NE830799 20210105 23.12 59729.0 22.91 22.98 0.0 1374072.92 \n",
"2 NE830799 20210106 23.29 32594.0 23.00 23.12 0.0 753744.84 \n",
"3 NE830799 20210107 22.90 91805.0 22.90 23.29 0.0 2119783.36 \n",
"4 NE830799 20210108 23.33 202341.0 22.56 22.90 0.0 4662152.94 \n",
".. ... ... ... ... ... ... ... ... \n",
"334 NE830799 20220525 11.33 48637.0 11.12 11.20 0.0 548426.46 \n",
"335 NE830799 20220526 11.55 103856.0 11.30 11.33 0.0 1200104.83 \n",
"336 NE830799 20220527 11.78 119210.0 11.53 11.55 0.0 1402552.58 \n",
"337 NE830799 20220530 11.74 122101.0 11.44 11.78 0.0 1429026.66 \n",
"338 NE830799 20220531 11.79 116001.0 11.70 11.74 0.0 1366318.64 \n",
"\n",
" high open IsST Factor PctChg FloatShares \n",
"0 23.07 23.00 0.0 0.104658 -0.086957 117222839.0 \n",
"1 23.13 23.13 0.0 0.104658 0.609225 117222839.0 \n",
"2 23.31 23.03 0.0 0.104658 0.735294 117222839.0 \n",
"3 23.30 23.30 0.0 0.104658 -1.674538 117222839.0 \n",
"4 23.94 23.00 0.0 0.104658 1.877729 117222839.0 \n",
".. ... ... ... ... ... ... \n",
"334 11.61 11.61 0.0 0.054229 1.160714 117222839.0 \n",
"335 11.65 11.62 0.0 0.054229 1.941748 117222839.0 \n",
"336 11.88 11.68 0.0 0.054229 1.991342 117222839.0 \n",
"337 11.91 11.91 0.0 0.054229 -0.339559 117222839.0 \n",
"338 11.84 11.74 0.0 0.054229 0.425894 117222839.0 \n",
"\n",
"[339 rows x 14 columns]"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"float_cols = list(set(df_tinysoft.columns.tolist()) - set(['StockID', 'date']))\n",
"df_tinysoft[float_cols] = df_tinysoft[float_cols].astype('float')\n",
"df_tinysoft"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a07ef854-5810-4ca2-b4fc-4f130dd95423",
"metadata": {},
"outputs": [],
"source": [
"df_tinysoft.rename(columns={'vol':'volume', 'Factor':'factor'}, inplace=True)\n",
"df_tinysoft['date'] = pd.to_datetime(df_tinysoft['date'].astype(str), format='%Y%m%d')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "1d3d8626-2eba-40e9-9cff-796c534e5f08",
"metadata": {},
"outputs": [],
"source": [
"df_tinysoft.drop('StockID', axis=1, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "8b35bc1c-c667-41da-8163-713482fc654f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>close</th>\n",
" <th>volume</th>\n",
" <th>low</th>\n",
" <th>yclose</th>\n",
" <th>cjbs</th>\n",
" <th>amount</th>\n",
" <th>high</th>\n",
" <th>open</th>\n",
" <th>IsST</th>\n",
" <th>factor</th>\n",
" <th>PctChg</th>\n",
" <th>FloatShares</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2021-01-04</td>\n",
" <td>22.98</td>\n",
" <td>208835.0</td>\n",
" <td>22.90</td>\n",
" <td>23.00</td>\n",
" <td>0.0</td>\n",
" <td>5082436.35</td>\n",
" <td>23.07</td>\n",
" <td>23.00</td>\n",
" <td>0</td>\n",
" <td>0.104658</td>\n",
" <td>-0.086957</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2021-01-05</td>\n",
" <td>23.12</td>\n",
" <td>59729.0</td>\n",
" <td>22.91</td>\n",
" <td>22.98</td>\n",
" <td>0.0</td>\n",
" <td>1374072.92</td>\n",
" <td>23.13</td>\n",
" <td>23.13</td>\n",
" <td>0</td>\n",
" <td>0.104658</td>\n",
" <td>0.609225</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2021-01-06</td>\n",
" <td>23.29</td>\n",
" <td>32594.0</td>\n",
" <td>23.00</td>\n",
" <td>23.12</td>\n",
" <td>0.0</td>\n",
" <td>753744.84</td>\n",
" <td>23.31</td>\n",
" <td>23.03</td>\n",
" <td>0</td>\n",
" <td>0.104658</td>\n",
" <td>0.735294</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2021-01-07</td>\n",
" <td>22.90</td>\n",
" <td>91805.0</td>\n",
" <td>22.90</td>\n",
" <td>23.29</td>\n",
" <td>0.0</td>\n",
" <td>2119783.36</td>\n",
" <td>23.30</td>\n",
" <td>23.30</td>\n",
" <td>0</td>\n",
" <td>0.104658</td>\n",
" <td>-1.674538</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2021-01-08</td>\n",
" <td>23.33</td>\n",
" <td>202341.0</td>\n",
" <td>22.56</td>\n",
" <td>22.90</td>\n",
" <td>0.0</td>\n",
" <td>4662152.94</td>\n",
" <td>23.94</td>\n",
" <td>23.00</td>\n",
" <td>0</td>\n",
" <td>0.104658</td>\n",
" <td>1.877729</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>334</th>\n",
" <td>2022-05-25</td>\n",
" <td>11.33</td>\n",
" <td>48637.0</td>\n",
" <td>11.12</td>\n",
" <td>11.20</td>\n",
" <td>0.0</td>\n",
" <td>548426.46</td>\n",
" <td>11.61</td>\n",
" <td>11.61</td>\n",
" <td>0</td>\n",
" <td>0.054229</td>\n",
" <td>1.160714</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>335</th>\n",
" <td>2022-05-26</td>\n",
" <td>11.55</td>\n",
" <td>103856.0</td>\n",
" <td>11.30</td>\n",
" <td>11.33</td>\n",
" <td>0.0</td>\n",
" <td>1200104.83</td>\n",
" <td>11.65</td>\n",
" <td>11.62</td>\n",
" <td>0</td>\n",
" <td>0.054229</td>\n",
" <td>1.941748</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>336</th>\n",
" <td>2022-05-27</td>\n",
" <td>11.78</td>\n",
" <td>119210.0</td>\n",
" <td>11.53</td>\n",
" <td>11.55</td>\n",
" <td>0.0</td>\n",
" <td>1402552.58</td>\n",
" <td>11.88</td>\n",
" <td>11.68</td>\n",
" <td>0</td>\n",
" <td>0.054229</td>\n",
" <td>1.991342</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>337</th>\n",
" <td>2022-05-30</td>\n",
" <td>11.74</td>\n",
" <td>122101.0</td>\n",
" <td>11.44</td>\n",
" <td>11.78</td>\n",
" <td>0.0</td>\n",
" <td>1429026.66</td>\n",
" <td>11.91</td>\n",
" <td>11.91</td>\n",
" <td>0</td>\n",
" <td>0.054229</td>\n",
" <td>-0.339559</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338</th>\n",
" <td>2022-05-31</td>\n",
" <td>11.79</td>\n",
" <td>116001.0</td>\n",
" <td>11.70</td>\n",
" <td>11.74</td>\n",
" <td>0.0</td>\n",
" <td>1366318.64</td>\n",
" <td>11.84</td>\n",
" <td>11.74</td>\n",
" <td>0</td>\n",
" <td>0.054229</td>\n",
" <td>0.425894</td>\n",
" <td>117222839.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>339 rows × 13 columns</p>\n",
"</div>"
],
"text/plain": [
" date close volume low yclose cjbs amount high \\\n",
"0 2021-01-04 22.98 208835.0 22.90 23.00 0.0 5082436.35 23.07 \n",
"1 2021-01-05 23.12 59729.0 22.91 22.98 0.0 1374072.92 23.13 \n",
"2 2021-01-06 23.29 32594.0 23.00 23.12 0.0 753744.84 23.31 \n",
"3 2021-01-07 22.90 91805.0 22.90 23.29 0.0 2119783.36 23.30 \n",
"4 2021-01-08 23.33 202341.0 22.56 22.90 0.0 4662152.94 23.94 \n",
".. ... ... ... ... ... ... ... ... \n",
"334 2022-05-25 11.33 48637.0 11.12 11.20 0.0 548426.46 11.61 \n",
"335 2022-05-26 11.55 103856.0 11.30 11.33 0.0 1200104.83 11.65 \n",
"336 2022-05-27 11.78 119210.0 11.53 11.55 0.0 1402552.58 11.88 \n",
"337 2022-05-30 11.74 122101.0 11.44 11.78 0.0 1429026.66 11.91 \n",
"338 2022-05-31 11.79 116001.0 11.70 11.74 0.0 1366318.64 11.84 \n",
"\n",
" open IsST factor PctChg FloatShares \n",
"0 23.00 0 0.104658 -0.086957 117222839.0 \n",
"1 23.13 0 0.104658 0.609225 117222839.0 \n",
"2 23.03 0 0.104658 0.735294 117222839.0 \n",
"3 23.30 0 0.104658 -1.674538 117222839.0 \n",
"4 23.00 0 0.104658 1.877729 117222839.0 \n",
".. ... ... ... ... ... \n",
"334 11.61 0 0.054229 1.160714 117222839.0 \n",
"335 11.62 0 0.054229 1.941748 117222839.0 \n",
"336 11.68 0 0.054229 1.991342 117222839.0 \n",
"337 11.91 0 0.054229 -0.339559 117222839.0 \n",
"338 11.74 0 0.054229 0.425894 117222839.0 \n",
"\n",
"[339 rows x 13 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_tinysoft"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "42011d0c-bc95-4ff5-a96b-c4af09375d4d",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"path = Path('qlib-data/source/market/20210101-20220531/k-daily/')\n",
"path.mkdir(parents=True)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "5584b39b-6c92-4b73-ac00-5e424050b654",
"metadata": {},
"outputs": [],
"source": [
"df_tinysoft.to_csv('qlib-data/source/market/20210101-20220531/k-daily/NE830799.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "09e91453-df05-4a8c-8d09-ff07a7083819",
"metadata": {},
"outputs": [],
"source": [
"# create the directory for the target\n",
"# no need for the date range in this context\n",
"path = Path('qlib-data/target/market/k-daily/')\n",
"path.mkdir(parents=True)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "d30df9ef-30bc-4a2d-80af-34603a803aef",
"metadata": {},
"outputs": [],
"source": [
"d = DumpDataAll(\n",
" csv_path='qlib-data/source/market/20210101-20220531/k-daily/',\n",
" qlib_dir='qlib-data/target/market/k-daily/',\n",
" max_workers=4,\n",
" symbol_field_name='StockID'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "31b2ecdb-f242-495e-93e7-7218c3ea0fc7",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-06-22 11:01:12.794 | INFO | src.qlib_scripts.dump_bin:_get_all_date:272 - start get all date......\n",
"100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.82it/s]\n",
"2022-06-22 11:01:13.343 | INFO | src.qlib_scripts.dump_bin:_get_all_date:291 - end of get all date.\n",
"\n",
"2022-06-22 11:01:13.343 | INFO | src.qlib_scripts.dump_bin:_dump_calendars:294 - start dump calendars......\n",
"2022-06-22 11:01:13.343 | INFO | src.qlib_scripts.dump_bin:_dump_calendars:297 - end of calendars dump.\n",
"\n",
"2022-06-22 11:01:13.343 | INFO | src.qlib_scripts.dump_bin:_dump_instruments:300 - start dump instruments......\n",
"2022-06-22 11:01:13.343 | INFO | src.qlib_scripts.dump_bin:_dump_instruments:302 - end of instruments dump.\n",
"\n",
"2022-06-22 11:01:13.343 | INFO | src.qlib_scripts.dump_bin:_dump_features:305 - start dump features......\n",
"100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.78it/s]\n",
"2022-06-22 11:01:13.905 | INFO | src.qlib_scripts.dump_bin:_dump_features:312 - end of features dump.\n",
"\n"
]
}
],
"source": [
"d()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "21fc1650-c20c-4b34-ae94-1abfaa23ed70",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('qlib-data/source/market/k-daily/NE430198.csv')\n",
"df['date'] = pd.to_datetime(df['date'], format=\"%Y-%m-%d\")\n",
"# should not set index, otherwise it is hard to do division with dis-alignment\n",
"# df.set_index('date', inplace=True)\n",
"df = df[['date', 'close', 'yclose', 'factor']]\n",
"df.sort_values('date', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "d73b686b-a2c1-495a-abe4-921673623985",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>close</th>\n",
" <th>yclose</th>\n",
" <th>factor</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>2011-01-04</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>2011-01-05</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>2011-01-06</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>2011-01-07</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>2011-01-10</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2766</th>\n",
" <td>2022-05-25</td>\n",
" <td>5.72</td>\n",
" <td>5.70</td>\n",
" <td>0.318452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2767</th>\n",
" <td>2022-05-26</td>\n",
" <td>5.77</td>\n",
" <td>5.72</td>\n",
" <td>0.318452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2768</th>\n",
" <td>2022-05-27</td>\n",
" <td>5.43</td>\n",
" <td>5.47</td>\n",
" <td>0.301895</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2769</th>\n",
" <td>2022-05-30</td>\n",
" <td>5.49</td>\n",
" <td>5.43</td>\n",
" <td>0.301895</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2770</th>\n",
" <td>2022-05-31</td>\n",
" <td>5.50</td>\n",
" <td>5.49</td>\n",
" <td>0.301895</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2771 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" date close yclose factor\n",
"82 2011-01-04 NaN NaN 1.000000\n",
"83 2011-01-05 NaN NaN 1.000000\n",
"84 2011-01-06 NaN NaN 1.000000\n",
"85 2011-01-07 NaN NaN 1.000000\n",
"86 2011-01-10 NaN NaN 1.000000\n",
"... ... ... ... ...\n",
"2766 2022-05-25 5.72 5.70 0.318452\n",
"2767 2022-05-26 5.77 5.72 0.318452\n",
"2768 2022-05-27 5.43 5.47 0.301895\n",
"2769 2022-05-30 5.49 5.43 0.301895\n",
"2770 2022-05-31 5.50 5.49 0.301895\n",
"\n",
"[2771 rows x 4 columns]"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "875c8c7a-af6e-4939-bbf1-35198b6e73ec",
"metadata": {},
"outputs": [],
"source": [
"df['yclose_t_by_close_t1'] = 1.\n",
"df = df[~df['close'].isna()]"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "0b62eb6a-a6b0-45d3-864a-f791ad433a99",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>close</th>\n",
" <th>yclose</th>\n",
" <th>factor</th>\n",
" <th>yclose_t_by_close_t1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-04-10</td>\n",
" <td>22.93</td>\n",
" <td>8.00</td>\n",
" <td>1.000000</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-04-22</td>\n",
" <td>20.00</td>\n",
" <td>22.93</td>\n",
" <td>1.000000</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-06-18</td>\n",
" <td>8.00</td>\n",
" <td>19.75</td>\n",
" <td>0.861317</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-06-19</td>\n",
" <td>8.00</td>\n",
" <td>8.00</td>\n",
" <td>0.861317</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-06-23</td>\n",
" <td>15.88</td>\n",
" <td>8.00</td>\n",
" <td>0.861317</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2766</th>\n",
" <td>2022-05-25</td>\n",
" <td>5.72</td>\n",
" <td>5.70</td>\n",
" <td>0.318452</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2767</th>\n",
" <td>2022-05-26</td>\n",
" <td>5.77</td>\n",
" <td>5.72</td>\n",
" <td>0.318452</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2768</th>\n",
" <td>2022-05-27</td>\n",
" <td>5.43</td>\n",
" <td>5.47</td>\n",
" <td>0.301895</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2769</th>\n",
" <td>2022-05-30</td>\n",
" <td>5.49</td>\n",
" <td>5.43</td>\n",
" <td>0.301895</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2770</th>\n",
" <td>2022-05-31</td>\n",
" <td>5.50</td>\n",
" <td>5.49</td>\n",
" <td>0.301895</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1032 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" date close yclose factor yclose_t_by_close_t1\n",
"0 2015-04-10 22.93 8.00 1.000000 1.0\n",
"1 2015-04-22 20.00 22.93 1.000000 1.0\n",
"2 2015-06-18 8.00 19.75 0.861317 1.0\n",
"3 2015-06-19 8.00 8.00 0.861317 1.0\n",
"4 2015-06-23 15.88 8.00 0.861317 1.0\n",
"... ... ... ... ... ...\n",
"2766 2022-05-25 5.72 5.70 0.318452 1.0\n",
"2767 2022-05-26 5.77 5.72 0.318452 1.0\n",
"2768 2022-05-27 5.43 5.47 0.301895 1.0\n",
"2769 2022-05-30 5.49 5.43 0.301895 1.0\n",
"2770 2022-05-31 5.50 5.49 0.301895 1.0\n",
"\n",
"[1032 rows x 5 columns]"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "0ec6b9c1-c736-4a25-a5cb-f9b60508a7eb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1.000000\n",
"1 0.987500\n",
"2 1.000000\n",
"3 1.000000\n",
"4 1.000000\n",
" ... \n",
"1026 1.000000\n",
"1027 1.000000\n",
"1028 0.948007\n",
"1029 1.000000\n",
"1030 1.000000\n",
"Length: 1031, dtype: float64"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['yclose'].iloc[1:].reset_index(drop=True) / df['close'].iloc[:-1].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "8d4df052-2567-4017-b88c-55c1c62864a3",
"metadata": {},
"outputs": [],
"source": [
"df['yclose_t_by_close_t1'].copy(deep=True).iloc[1:] = \\\n",
" df['yclose'].iloc[1:].reset_index(drop=True) / \\\n",
" df['close'].iloc[:-1].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "c135882d-c6bc-44a5-9cfb-688b954ffc2a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>close</th>\n",
" <th>yclose</th>\n",
" <th>factor</th>\n",
" <th>yclose_t_by_close_t1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-04-10</td>\n",
" <td>22.93</td>\n",
" <td>8.00</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-04-22</td>\n",
" <td>20.00</td>\n",
" <td>22.93</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-06-18</td>\n",
" <td>8.00</td>\n",
" <td>19.75</td>\n",
" <td>0.861317</td>\n",
" <td>0.987500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-06-19</td>\n",
" <td>8.00</td>\n",
" <td>8.00</td>\n",
" <td>0.861317</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-06-23</td>\n",
" <td>15.88</td>\n",
" <td>8.00</td>\n",
" <td>0.861317</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2766</th>\n",
" <td>2022-05-25</td>\n",
" <td>5.72</td>\n",
" <td>5.70</td>\n",
" <td>0.318452</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2767</th>\n",
" <td>2022-05-26</td>\n",
" <td>5.77</td>\n",
" <td>5.72</td>\n",
" <td>0.318452</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2768</th>\n",
" <td>2022-05-27</td>\n",
" <td>5.43</td>\n",
" <td>5.47</td>\n",
" <td>0.301895</td>\n",
" <td>0.948007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2769</th>\n",
" <td>2022-05-30</td>\n",
" <td>5.49</td>\n",
" <td>5.43</td>\n",
" <td>0.301895</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2770</th>\n",
" <td>2022-05-31</td>\n",
" <td>5.50</td>\n",
" <td>5.49</td>\n",
" <td>0.301895</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1032 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" date close yclose factor yclose_t_by_close_t1\n",
"0 2015-04-10 22.93 8.00 1.000000 1.000000\n",
"1 2015-04-22 20.00 22.93 1.000000 1.000000\n",
"2 2015-06-18 8.00 19.75 0.861317 0.987500\n",
"3 2015-06-19 8.00 8.00 0.861317 1.000000\n",
"4 2015-06-23 15.88 8.00 0.861317 1.000000\n",
"... ... ... ... ... ...\n",
"2766 2022-05-25 5.72 5.70 0.318452 1.000000\n",
"2767 2022-05-26 5.77 5.72 0.318452 1.000000\n",
"2768 2022-05-27 5.43 5.47 0.301895 0.948007\n",
"2769 2022-05-30 5.49 5.43 0.301895 1.000000\n",
"2770 2022-05-31 5.50 5.49 0.301895 1.000000\n",
"\n",
"[1032 rows x 5 columns]"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "ad4ffd06-4f88-4b0f-97d9-f02b20b6ede3",
"metadata": {},
"outputs": [],
"source": [
"df['adjFactor'] = df['yclose_t_by_close_t1'].cumprod()"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "ca05ce7c-c6c9-425e-af48-25827fa9d176",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>close</th>\n",
" <th>yclose</th>\n",
" <th>factor</th>\n",
" <th>yclose_t_by_close_t1</th>\n",
" <th>adjFactor</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-04-10</td>\n",
" <td>22.93</td>\n",
" <td>8.00</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-04-22</td>\n",
" <td>20.00</td>\n",
" <td>22.93</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-06-18</td>\n",
" <td>8.00</td>\n",
" <td>19.75</td>\n",
" <td>0.861317</td>\n",
" <td>0.987500</td>\n",
" <td>0.987500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-06-19</td>\n",
" <td>8.00</td>\n",
" <td>8.00</td>\n",
" <td>0.861317</td>\n",
" <td>1.000000</td>\n",
" <td>0.987500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-06-23</td>\n",
" <td>15.88</td>\n",
" <td>8.00</td>\n",
" <td>0.861317</td>\n",
" <td>1.000000</td>\n",
" <td>0.987500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2766</th>\n",
" <td>2022-05-25</td>\n",
" <td>5.72</td>\n",
" <td>5.70</td>\n",
" <td>0.318452</td>\n",
" <td>1.000000</td>\n",
" <td>0.294950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2767</th>\n",
" <td>2022-05-26</td>\n",
" <td>5.77</td>\n",
" <td>5.72</td>\n",
" <td>0.318452</td>\n",
" <td>1.000000</td>\n",
" <td>0.294950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2768</th>\n",
" <td>2022-05-27</td>\n",
" <td>5.43</td>\n",
" <td>5.47</td>\n",
" <td>0.301895</td>\n",
" <td>0.948007</td>\n",
" <td>0.279615</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2769</th>\n",
" <td>2022-05-30</td>\n",
" <td>5.49</td>\n",
" <td>5.43</td>\n",
" <td>0.301895</td>\n",
" <td>1.000000</td>\n",
" <td>0.279615</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2770</th>\n",
" <td>2022-05-31</td>\n",
" <td>5.50</td>\n",
" <td>5.49</td>\n",
" <td>0.301895</td>\n",
" <td>1.000000</td>\n",
" <td>0.279615</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1032 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" date close yclose factor yclose_t_by_close_t1 adjFactor\n",
"0 2015-04-10 22.93 8.00 1.000000 1.000000 1.000000\n",
"1 2015-04-22 20.00 22.93 1.000000 1.000000 1.000000\n",
"2 2015-06-18 8.00 19.75 0.861317 0.987500 0.987500\n",
"3 2015-06-19 8.00 8.00 0.861317 1.000000 0.987500\n",
"4 2015-06-23 15.88 8.00 0.861317 1.000000 0.987500\n",
"... ... ... ... ... ... ...\n",
"2766 2022-05-25 5.72 5.70 0.318452 1.000000 0.294950\n",
"2767 2022-05-26 5.77 5.72 0.318452 1.000000 0.294950\n",
"2768 2022-05-27 5.43 5.47 0.301895 0.948007 0.279615\n",
"2769 2022-05-30 5.49 5.43 0.301895 1.000000 0.279615\n",
"2770 2022-05-31 5.50 5.49 0.301895 1.000000 0.279615\n",
"\n",
"[1032 rows x 6 columns]"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 97,
"id": "e444db45-e4ab-4633-9ae7-15f7b5d9444b",
"metadata": {},
"outputs": [],
"source": [
"from pandas.core.common import SettingWithCopyWarning\n",
"import warnings\n",
"warnings.simplefilter('error', SettingWithCopyWarning)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d0c8d58-36e4-4eb1-a1c1-a4cdc9de659d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"toc-autonumbering": false,
"toc-showcode": false,
"toc-showmarkdowntxt": false
},
"nbformat": 4,
"nbformat_minor": 5
}