{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ac9e74d6-d890-42a3-80e1-3b84a18c5c36", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from datetime import datetime\n", "\n", "export_cols_bs = {\n", " '存货' : 'INVENTORIES',\n", " '负债合计' : 'TOT_LIAB',\n", " '流动资产合计' : 'TOT_CUR_ASSETS'\n", "}\n", "\n", "export_cols_pl = {\n", " '净利润' : 'NET_PROFIT_INCL_MIN_INT_INC',\n", " '营业外收入' : 'PLUS_NON_OPER_REV',\n", " '营业收入' : 'OPER_REV'\n", "}\n", "\n", "meta_rename_dict = {\n", " 'StockID':'symbol', # \n", " '数据报告期':'appear_in_period', # 财报对应的报告期,但同一期财报中可能对历史财报进行修正\n", " '公布日':'date', # 公布日之后,公开信息才能看到当期报告中的所有数据\n", " '截止日':'period', # 公布的数据对应的报告期,如果`截止日 < 报告期`,则为修正财报\n", " '预案公布日':'date'\n", "}\n", "\n", "def adj_split(df):\n", " df.reset_index(inplace=True)\n", "\n", " df_ori = df[df['period'] == df['appear_in_period']]\n", " df_adj = df[df['period'] != df['appear_in_period']]\n", " return df_ori, df_adj\n", "\n", "\n", "def to_qlib_format_pl(df):\n", " df.rename(columns=meta_rename_dict, inplace=True)\n", " \n", " def _date_format(df, col): \n", " df[col] = pd.to_datetime(df[col].astype(str), format='%Y%m%d')\n", " df[col] = df[col].dt.strftime('%Y-%m-%d')\n", " \n", " index_cols = ['period', 'appear_in_period', 'date']\n", " for col in index_cols:\n", " _date_format(df, col)\n", " \n", " sel_cols = index_cols + list(export_cols_pl.keys()) \n", " df_export = df[sel_cols]\n", " df_export.set_index(index_cols, inplace=True)\n", "\n", " df_export_ori, df_export_adj = adj_split(df_export)\n", " df_export_ori.set_index(index_cols, inplace=True)\n", " df_export_adj.set_index(index_cols, inplace=True)\n", "\n", " adj_col_rename = {name : name+'(调整)' for name in export_cols_pl.keys()}\n", " df_export_adj.rename(columns=adj_col_rename, inplace=True)\n", "\n", " df_list = []\n", "\n", " def _T(df, df_list):\n", " for col in list(df.columns):\n", " df_tmp = df[[col]].copy(deep=True)\n", " df_tmp['field'] = col\n", " df_tmp.rename(columns={col:'value'}, inplace=True)\n", " df_list.append(df_tmp)\n", " \n", " _T(df_export_adj, df_list)\n", " _T(df_export_ori, df_list)\n", "\n", " df = pd.concat(df_list, axis=0)\n", "\n", " return df" ] }, { "cell_type": "code", "execution_count": 2, "id": "cd9eb5ec-0074-4b0d-8308-ca17ae49c31f", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('_data/tinysoft-data/基础报表/46.合并利润分配表/SH600000.csv')" ] }, { "cell_type": "code", "execution_count": 3, "id": "2480a86d-2b9c-43a5-8554-b54b62cca168", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_2946/1755015898.py:52: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_export_adj.rename(columns=adj_col_rename, inplace=True)\n" ] } ], "source": [ "df2 = to_qlib_format_pl(df)" ] }, { "cell_type": "code", "execution_count": 4, "id": "5b1ce032-2e90-4d60-a1a2-5bb1614eac1b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valuefield
periodappear_in_perioddate
2000-06-302001-06-302001-08-225.009706e+08净利润(调整)
2001-08-222.662224e+09营业收入(调整)
2000-06-302000-07-295.302665e+08净利润
2000-07-292.161194e+09营业收入
2000-07-297.108678e+05营业外收入
...............
2021-12-312021-12-312022-04-285.376600e+10净利润
2022-04-281.909820e+11营业收入
2022-03-312022-03-312022-04-281.959700e+10净利润
2022-04-289.000000e+06营业外收入
2022-04-285.000200e+10营业收入
\n", "

504 rows × 2 columns

\n", "
" ], "text/plain": [ " value field\n", "period appear_in_period date \n", "2000-06-30 2001-06-30 2001-08-22 5.009706e+08 净利润(调整)\n", " 2001-08-22 2.662224e+09 营业收入(调整)\n", " 2000-06-30 2000-07-29 5.302665e+08 净利润\n", " 2000-07-29 2.161194e+09 营业收入\n", " 2000-07-29 7.108678e+05 营业外收入\n", "... ... ...\n", "2021-12-31 2021-12-31 2022-04-28 5.376600e+10 净利润\n", " 2022-04-28 1.909820e+11 营业收入\n", "2022-03-31 2022-03-31 2022-04-28 1.959700e+10 净利润\n", " 2022-04-28 9.000000e+06 营业外收入\n", " 2022-04-28 5.000200e+10 营业收入\n", "\n", "[504 rows x 2 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.sort_values('period')" ] }, { "cell_type": "code", "execution_count": 5, "id": "383e0b65-f01f-403f-a649-e4f857df2ab1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valuefield
perioddate
2000-06-302000-07-295.302665e+08净利润
2000-12-312001-04-299.579047e+08净利润
2001-06-302001-08-225.349113e+08净利润
2001-09-302001-10-292.870819e+08净利润
2001-12-312002-03-211.061878e+09净利润
............
2020-03-312021-04-305.542400e+10营业收入(调整)
2020-06-302021-08-281.014070e+11营业收入(调整)
2020-09-302021-10-301.487310e+11营业收入(调整)
2020-12-312022-04-281.963840e+11营业收入(调整)
2021-03-312022-04-284.952200e+10营业收入(调整)
\n", "

504 rows × 2 columns

\n", "
" ], "text/plain": [ " value field\n", "period date \n", "2000-06-30 2000-07-29 5.302665e+08 净利润\n", "2000-12-31 2001-04-29 9.579047e+08 净利润\n", "2001-06-30 2001-08-22 5.349113e+08 净利润\n", "2001-09-30 2001-10-29 2.870819e+08 净利润\n", "2001-12-31 2002-03-21 1.061878e+09 净利润\n", "... ... ...\n", "2020-03-31 2021-04-30 5.542400e+10 营业收入(调整)\n", "2020-06-30 2021-08-28 1.014070e+11 营业收入(调整)\n", "2020-09-30 2021-10-30 1.487310e+11 营业收入(调整)\n", "2020-12-31 2022-04-28 1.963840e+11 营业收入(调整)\n", "2021-03-31 2022-04-28 4.952200e+10 营业收入(调整)\n", "\n", "[504 rows x 2 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.reset_index('appear_in_period', drop=True).sort_values(['field', 'period', 'date'])" ] }, { "cell_type": "code", "execution_count": 6, "id": "f98e15f7-d114-47f1-ba36-38c7a8a08d1c", "metadata": {}, "outputs": [], "source": [ "df2.reset_index('appear_in_period', drop=True).sort_values(['field', 'period', 'date']).to_csv('_data/test/source/SH600000.csv')" ] }, { "cell_type": "code", "execution_count": 7, "id": "a3c0a785-061a-45e8-9fe9-aaa39b00a60d", "metadata": {}, "outputs": [], "source": [ "import qlib\n", "from qlib.data import D\n", "from qlib.constant import REG_CN" ] }, { "cell_type": "code", "execution_count": 8, "id": "13ea0543-7d04-4456-92a1-4652feae7044", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[2946:MainThread](2022-07-08 17:42:44,996) INFO - qlib.Initialization - [config.py:413] - default_conf: client.\n", "[2946:MainThread](2022-07-08 17:42:46,508) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.\n", "[2946:MainThread](2022-07-08 17:42:46,511) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/home/guofu/Workspaces/guofu/TslDataFeed/_data/test/target')}\n" ] } ], "source": [ "qlib.init(provider_uri='_data/test/target/', region=REG_CN)" ] }, { "cell_type": "code", "execution_count": 21, "id": "e8a3b391-47b1-484b-b359-3ebaa303ade3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
$open$closeP($$净利润_q)PRef($$净利润_q, 202004)PRef($$净利润(调整)_q, 201804)
instrumentdatetime
sh6000002021-03-2610.6810.624.519000e+10NaN5.651500e+10
2021-03-2910.6010.485.899300e+105.899300e+105.651500e+10
2021-03-3010.6511.015.899300e+105.899300e+105.651500e+10
2021-03-3111.0110.995.899300e+105.899300e+105.651500e+10
2021-04-0110.9910.895.899300e+105.899300e+105.651500e+10
2021-04-0210.9410.765.899300e+105.899300e+105.651500e+10
2021-04-0610.7610.705.899300e+105.899300e+105.651500e+10
2021-04-0710.7410.655.899300e+105.899300e+105.651500e+10
2021-04-0810.6310.665.899300e+105.899300e+105.651500e+10
2021-04-0910.6410.595.899300e+105.899300e+105.651500e+10
2021-04-1210.5810.735.899300e+105.899300e+105.651500e+10
2021-04-1310.7410.765.899300e+105.899300e+105.651500e+10
2021-04-1410.8210.595.899300e+105.899300e+105.651500e+10
2021-04-1510.5810.435.899300e+105.899300e+105.651500e+10
2021-04-1610.4510.545.899300e+105.899300e+105.651500e+10
2021-04-1910.5510.585.899300e+105.899300e+105.651500e+10
2021-04-2010.5510.505.899300e+105.899300e+105.651500e+10
2021-04-2110.4710.585.899300e+105.899300e+105.651500e+10
2021-04-2210.6010.435.899300e+105.899300e+105.651500e+10
2021-04-2310.4210.435.899300e+105.899300e+105.651500e+10
2021-04-2610.4510.305.899300e+105.899300e+105.651500e+10
2021-04-2710.3310.275.899300e+105.899300e+105.651500e+10
2021-04-2810.3310.255.899300e+105.899300e+105.651500e+10
2021-04-2910.2810.545.899300e+105.899300e+105.651500e+10
2021-04-3010.2810.051.885700e+105.899300e+105.651500e+10
\n", "
" ], "text/plain": [ " $open $close P($$净利润_q) PRef($$净利润_q, 202004) \\\n", "instrument datetime \n", "sh600000 2021-03-26 10.68 10.62 4.519000e+10 NaN \n", " 2021-03-29 10.60 10.48 5.899300e+10 5.899300e+10 \n", " 2021-03-30 10.65 11.01 5.899300e+10 5.899300e+10 \n", " 2021-03-31 11.01 10.99 5.899300e+10 5.899300e+10 \n", " 2021-04-01 10.99 10.89 5.899300e+10 5.899300e+10 \n", " 2021-04-02 10.94 10.76 5.899300e+10 5.899300e+10 \n", " 2021-04-06 10.76 10.70 5.899300e+10 5.899300e+10 \n", " 2021-04-07 10.74 10.65 5.899300e+10 5.899300e+10 \n", " 2021-04-08 10.63 10.66 5.899300e+10 5.899300e+10 \n", " 2021-04-09 10.64 10.59 5.899300e+10 5.899300e+10 \n", " 2021-04-12 10.58 10.73 5.899300e+10 5.899300e+10 \n", " 2021-04-13 10.74 10.76 5.899300e+10 5.899300e+10 \n", " 2021-04-14 10.82 10.59 5.899300e+10 5.899300e+10 \n", " 2021-04-15 10.58 10.43 5.899300e+10 5.899300e+10 \n", " 2021-04-16 10.45 10.54 5.899300e+10 5.899300e+10 \n", " 2021-04-19 10.55 10.58 5.899300e+10 5.899300e+10 \n", " 2021-04-20 10.55 10.50 5.899300e+10 5.899300e+10 \n", " 2021-04-21 10.47 10.58 5.899300e+10 5.899300e+10 \n", " 2021-04-22 10.60 10.43 5.899300e+10 5.899300e+10 \n", " 2021-04-23 10.42 10.43 5.899300e+10 5.899300e+10 \n", " 2021-04-26 10.45 10.30 5.899300e+10 5.899300e+10 \n", " 2021-04-27 10.33 10.27 5.899300e+10 5.899300e+10 \n", " 2021-04-28 10.33 10.25 5.899300e+10 5.899300e+10 \n", " 2021-04-29 10.28 10.54 5.899300e+10 5.899300e+10 \n", " 2021-04-30 10.28 10.05 1.885700e+10 5.899300e+10 \n", "\n", " PRef($$净利润(调整)_q, 201804) \n", "instrument datetime \n", "sh600000 2021-03-26 5.651500e+10 \n", " 2021-03-29 5.651500e+10 \n", " 2021-03-30 5.651500e+10 \n", " 2021-03-31 5.651500e+10 \n", " 2021-04-01 5.651500e+10 \n", " 2021-04-02 5.651500e+10 \n", " 2021-04-06 5.651500e+10 \n", " 2021-04-07 5.651500e+10 \n", " 2021-04-08 5.651500e+10 \n", " 2021-04-09 5.651500e+10 \n", " 2021-04-12 5.651500e+10 \n", " 2021-04-13 5.651500e+10 \n", " 2021-04-14 5.651500e+10 \n", " 2021-04-15 5.651500e+10 \n", " 2021-04-16 5.651500e+10 \n", " 2021-04-19 5.651500e+10 \n", " 2021-04-20 5.651500e+10 \n", " 2021-04-21 5.651500e+10 \n", " 2021-04-22 5.651500e+10 \n", " 2021-04-23 5.651500e+10 \n", " 2021-04-26 5.651500e+10 \n", " 2021-04-27 5.651500e+10 \n", " 2021-04-28 5.651500e+10 \n", " 2021-04-29 5.651500e+10 \n", " 2021-04-30 5.651500e+10 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "D.features(['sh600000'], ['$open', '$close', 'P($$净利润_q)', 'PRef($$净利润_q, 202004)', 'PRef($$净利润(调整)_q, 201804)'], start_time='2021-03-26', end_time='2021-05-02', freq=\"day\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "5af83fe1-b435-4f6d-aab1-9f3f4406c965", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
$open$close
instrumentdatetime
sh6000002021-03-2610.6810.62
2021-03-2910.6010.48
2021-03-3010.6511.01
2021-03-3111.0110.99
2021-04-0110.9910.89
2021-04-0210.9410.76
2021-04-0610.7610.70
2021-04-0710.7410.65
2021-04-0810.6310.66
2021-04-0910.6410.59
2021-04-1210.5810.73
2021-04-1310.7410.76
2021-04-1410.8210.59
2021-04-1510.5810.43
2021-04-1610.4510.54
2021-04-1910.5510.58
2021-04-2010.5510.50
2021-04-2110.4710.58
2021-04-2210.6010.43
2021-04-2310.4210.43
2021-04-2610.4510.30
2021-04-2710.3310.27
2021-04-2810.3310.25
2021-04-2910.2810.54
2021-04-3010.2810.05
\n", "
" ], "text/plain": [ " $open $close\n", "instrument datetime \n", "sh600000 2021-03-26 10.68 10.62\n", " 2021-03-29 10.60 10.48\n", " 2021-03-30 10.65 11.01\n", " 2021-03-31 11.01 10.99\n", " 2021-04-01 10.99 10.89\n", " 2021-04-02 10.94 10.76\n", " 2021-04-06 10.76 10.70\n", " 2021-04-07 10.74 10.65\n", " 2021-04-08 10.63 10.66\n", " 2021-04-09 10.64 10.59\n", " 2021-04-12 10.58 10.73\n", " 2021-04-13 10.74 10.76\n", " 2021-04-14 10.82 10.59\n", " 2021-04-15 10.58 10.43\n", " 2021-04-16 10.45 10.54\n", " 2021-04-19 10.55 10.58\n", " 2021-04-20 10.55 10.50\n", " 2021-04-21 10.47 10.58\n", " 2021-04-22 10.60 10.43\n", " 2021-04-23 10.42 10.43\n", " 2021-04-26 10.45 10.30\n", " 2021-04-27 10.33 10.27\n", " 2021-04-28 10.33 10.25\n", " 2021-04-29 10.28 10.54\n", " 2021-04-30 10.28 10.05" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "D.features(['sh600000'], ['$open', '$close'], start_time='2021-03-26', end_time='2021-05-02', freq=\"day\")" ] }, { "cell_type": "code", "execution_count": null, "id": "4e74b079-8c02-47f3-bc06-15081ecf91fe", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 }