You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
alpha_lab/cta_1d/03_baseline_xgb_executed.ipynb

736 lines
368 KiB

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# CTA 1D Baseline XGBoost Model\n",
"\n",
"Train and evaluate a baseline XGBoost model for CTA 1-day return prediction.\n",
"\n",
"**Purpose**: Establish a baseline performance benchmark with standard configuration."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"execution": {
"iopub.execute_input": "2026-02-14T08:12:19.244972Z",
"iopub.status.busy": "2026-02-14T08:12:19.244658Z",
"iopub.status.idle": "2026-02-14T08:12:20.730424Z",
"shell.execute_reply": "2026-02-14T08:12:20.729462Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import json\n",
"from datetime import datetime\n",
"\n",
"# Use the new API from src/\n",
"from src.loader_parquet import CTA1DLoaderParquet\n",
"from src.train import train_model, TrainConfig\n",
"from src.backtest import run_backtest, BacktestConfig\n",
"from src.labels import get_blend_weights\n",
"\n",
"import sys\n",
"sys.path.insert(0, '../')\n",
"from common.plotting import setup_plot_style, plot_ic_series, plot_cumulative_returns\n",
"from common.paths import create_experiment_dir\n",
"\n",
"setup_plot_style()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Configuration\n",
"\n",
"Edit this cell to modify experiment parameters."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2026-02-14T08:12:20.733937Z",
"iopub.status.busy": "2026-02-14T08:12:20.733741Z",
"iopub.status.idle": "2026-02-14T08:12:20.739463Z",
"shell.execute_reply": "2026-02-14T08:12:20.738798Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Configuration:\n",
" Experiment: baseline_xgb\n",
" Train: 2020-01-01 to 2021-12-31\n",
" Valid: 2022-01-01 to 2022-06-30\n",
" Test: 2022-07-01 to 2023-12-31\n",
" Fit: 2020-01-01 to 2021-12-31 (normalization)\n",
" Blend: default: [0.2, 0.1, 0.3, 0.4]\n"
]
}
],
"source": [
"CONFIG = {\n",
" # Experiment\n",
" 'experiment_name': 'baseline_xgb', # Will be appended with timestamp\n",
" \n",
" # Date ranges (YYYYMMDD format for Parquet loader)\n",
" 'dt_range': ['2020-01-01', '2023-12-31'],\n",
" 'train_range': ['2020-01-01', '2021-12-31'],\n",
" 'valid_range': ['2022-01-01', '2022-06-30'],\n",
" 'test_range': ['2022-07-01', '2023-12-31'],\n",
" 'fit_range': ['2020-01-01', '2021-12-31'], # MUST match train_range - prevents data leakage\n",
" \n",
" # Data\n",
" 'feature_sets': ['alpha158', 'hffactor'],\n",
" 'return_type': 'o2c_twap1min',\n",
" 'normalization': 'dual',\n",
" 'blend_weights': None, # Use default [0.2, 0.1, 0.3, 0.4] or specify name/list\n",
" 'weight_factors': {'positive': 1.0, 'negative': 2.0},\n",
" \n",
" # Model (XGBoost parameters with regularization)\n",
" 'model_params': {\n",
" 'objective': 'reg:squarederror',\n",
" 'eval_metric': 'rmse',\n",
" 'eta': 0.05,\n",
" 'max_depth': 4, # Reduced to prevent overfitting\n",
" 'subsample': 0.8,\n",
" 'colsample_bytree': 0.8,\n",
" 'lambda': 1.0, # L2 regularization\n",
" 'alpha': 0.1, # L1 regularization\n",
" 'seed': 42\n",
" },\n",
" \n",
" # Backtest\n",
" 'num_trades': 4,\n",
" 'signal_dist': 'normal',\n",
" 'pos_weight': True,\n",
" \n",
" # Output\n",
" 'save_results': True,\n",
"}\n",
"\n",
"print(\"Configuration:\")\n",
"print(f\" Experiment: {CONFIG['experiment_name']}\")\n",
"print(f\" Train: {CONFIG['train_range'][0]} to {CONFIG['train_range'][1]}\")\n",
"print(f\" Valid: {CONFIG['valid_range'][0]} to {CONFIG['valid_range'][1]}\")\n",
"print(f\" Test: {CONFIG['test_range'][0]} to {CONFIG['test_range'][1]}\")\n",
"print(f\" Fit: {CONFIG['fit_range'][0]} to {CONFIG['fit_range'][1]} (normalization)\")\n",
"blend_desc = \"default: [0.2, 0.1, 0.3, 0.4]\" if CONFIG['blend_weights'] is None else str(CONFIG['blend_weights'])\n",
"print(f\" Blend: {blend_desc}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Load Dataset and Train Model"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2026-02-14T08:12:20.797194Z",
"iopub.status.busy": "2026-02-14T08:12:20.796882Z",
"iopub.status.idle": "2026-02-14T08:13:57.887738Z",
"shell.execute_reply": "2026-02-14T08:13:57.886964Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading dataset and training model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:src.train:Loaded 175 features\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:src.train:Train size: 29749, Valid: 7527, Test: 23799\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset loaded with 175 features\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:src.train:Training XGBoost model...\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0]\ttrain-rmse:0.45902\tvalid-rmse:0.41803\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[50]\ttrain-rmse:0.44688\tvalid-rmse:0.41783\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[65]\ttrain-rmse:0.44472\tvalid-rmse:0.41812\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:src.train:train - IC: 0.2707, R²: 0.0596\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:src.train:valid - IC: 0.0799, R²: -0.0001\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:src.train:test - IC: 0.0898, R²: 0.0014\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Training metrics:\n",
" train_ic: 0.2707\n",
" train_r2: 0.0596\n",
" valid_ic: 0.0799\n",
" valid_r2: -0.0001\n",
" test_ic: 0.0898\n",
" test_r2: 0.0014\n"
]
}
],
"source": [
"print(\"Loading dataset and training model...\")\n",
"\n",
"# Load dataset first - we'll use this for both training and prediction\n",
"loader = CTA1DLoaderParquet(\n",
" return_type=CONFIG['return_type'],\n",
" normalization=CONFIG['normalization'],\n",
" feature_sets=CONFIG['feature_sets'],\n",
" blend_weights=CONFIG['blend_weights'],\n",
")\n",
"\n",
"dataset = loader.load(\n",
" dt_range=CONFIG['dt_range'],\n",
" fit_range=CONFIG['train_range'] # Use train range for normalization - prevents data leakage\n",
")\n",
"feature_cols = dataset.features\n",
"df_full = dataset.to_pandas().data\n",
"print(f\"Dataset loaded with {len(feature_cols)} features\")\n",
"\n",
"# Create training config\n",
"train_config = TrainConfig(\n",
" dt_range=CONFIG['dt_range'],\n",
" feature_sets=CONFIG['feature_sets'],\n",
" normalization=CONFIG['normalization'],\n",
" blend_weights=CONFIG['blend_weights'],\n",
" model_type='xgb',\n",
" model_params=CONFIG['model_params'],\n",
" segments={\n",
" 'train': (CONFIG['train_range'][0], CONFIG['train_range'][1]),\n",
" 'valid': (CONFIG['valid_range'][0], CONFIG['valid_range'][1]),\n",
" 'test': (CONFIG['test_range'][0], CONFIG['test_range'][1]),\n",
" }\n",
")\n",
"\n",
"# Train model using the pre-loaded dataset to ensure feature consistency\n",
"model, metrics = train_model(\n",
" config=train_config,\n",
" output_dir=None,\n",
" dataset=dataset # Pass pre-loaded dataset\n",
")\n",
"\n",
"print(f\"\\nTraining metrics:\")\n",
"for key, value in metrics.items():\n",
" print(f\" {key}: {value:.4f}\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2026-02-14T08:13:57.889494Z",
"iopub.status.busy": "2026-02-14T08:13:57.889304Z",
"iopub.status.idle": "2026-02-14T08:13:58.092001Z",
"shell.execute_reply": "2026-02-14T08:13:58.091114Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Top 10 Features:\n",
" feature importance\n",
"57 f58 15.151920\n",
"22 f22 14.377318\n",
"23 f23 10.843798\n",
"21 f21 10.386007\n",
"64 f68 8.473670\n",
"99 f119 7.243452\n",
"96 f116 6.621992\n",
"97 f117 5.903418\n",
"19 f19 4.761685\n",
"101 f121 4.529542\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA94AAAJOCAYAAABBfN/cAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAZ6NJREFUeJzt3X98zfX///H7dmZbmCERU6Te+XnszK+FZaI3vSvxJqVs+PhRjETNSH5GUfmVX/1Q1LZkoqhI3uiHaumg1SHsnZAfUWkT5sd2tu8fvs67NbTZXnud83K7Xi4un7PX67mz+zmPPu/tvteP+eXl5eUJAAAAAAAYwt/sAAAAAAAAWBnFGwAAAAAAA1G8AQAAAAAwEMUbAAAAAAADUbwBAAAAADAQxRsAAAAAAANRvAEAAAAAMBDFGwAAAAAAA1G8AQAAAAAwUIDZAQAAkKRRo0bp3XffveSaFi1aKCkpybAMhw4d0owZM/TVV1/p5MmTuuGGG/Twww+rY8eOnjV5eXl69dVXtXTpUv3888+69tprdf/996t///7y8/O76HO3a9dOBw8evOj+N998U82aNSvR1+MN5syZo7lz5+q7775TUFCQ2XEAADAFxRsA4BWefPJJPf74456Px48fr+3bt2vZsmWebWXKlDHs6x87dkyxsbG65pprNGfOHIWGhuqtt97S0KFD9dprrykqKkqSNH/+fL3yyiuaOHGimjVrpq1bt2rs2LFyu90aOHDgJb9G+/btNXHixAvuq1ixYkm/JN12222aOnWqIiMjS/y5rWjkyJGqWbOmHnnkEbOjAAAshuINAPAKISEhCgkJ8XwcFBQkm82ma665plS+/urVq3XgwAElJiYqLCxMkjR69GitX79e7777rqKionT69Gm9+uqr6tOnj7p06SJJqlmzptLT0/XKK6/o//7v/y55VDcoKKjUXs+RI0d06NChYj/P2bNnFRgYWAKJvFd2drbKlCmjb775RjVr1jQ7DgDAgrjGGwDgUz7++GPdd999aty4sRwOhx544AGlpqZ69n/xxReqW7euPv/8cw0ZMkQRERFq2rSpRo0apVOnTl30ebt166bPPvvMU7olyc/PT5UrV9Yvv/wiSdq6dauysrLUpk2bfJ/btm1bnTx5Ulu3bi3268vJydGcOXPUvn17NWrUSNHR0Xruued09uxZzxq3263Zs2erY8eOaty4sVq3bq2hQ4fqwIEDkqRNmzZ5Mvbq1Uvt2rWTJMXGxuq+++7L9/U2bdqkunXr6rPPPpMkvfPOO56P27dvn2/96tWr1aVLF9ntdrVo0ULDhw/XkSNHivT6zs9n06ZN6tevnxo3bqzo6GitWLFCR44c0YABA+RwOBQdHa3k5GTP5y1dulR169aVy+VS7969FR4erltuuUVTp06V2+32rDt+/LjGjx+vqKgoz/s3efLkfLOPjY1VXFycZs+erYiICL355puqW7eu9u3bp7lz56pu3bqe9/KDDz5Q165d1aRJEzVt2lQPPPCAvv76a89z7d27V3Xr1tXq1as1efJkRUZGqmnTpnrooYfyvTd5eXl65ZVX1L59e9ntdnXs2FGJiYn53pvt27erX79+ioiIUHh4uGJiYkrkvykAgPko3gAAn/Hll19q0KBBuvnmm7V06VItXrxY1apVU//+/fX9999Lkmw2myRp8uTJ6tixo9577z2NHj1aH3zwgZ577rmLPndgYKCqVauWb9uhQ4e0c+dORURESJL27NkjSbruuuvyrTv/8fn9xfHUU09pwYIFGjBggFatWqWRI0dq2bJlGj9+vGfNyy+/rJdfflmPPvqo1qxZoxdffFEHDhzQ0KFDJUkRERGaPn26pHPXWP/5dP3CWrBggZ555hm9/PLLks4V0OHDh6tFixZasWKF5s2bpx9++EF9+vTJ90uBvxMQcO5ku5kzZyomJkYrVqzQDTfcoAkTJmjUqFG6//77tWLFCjVt2lTPPPOMpwCfn+v48ePVt29fvf/+++rfv78WLVqkRYsWeZ5/4MCBWr9+vcaPH69Vq1YpISFBK1euVEJCQr4cP/zwg/bs2aPly5era9eu2rBhgySpb9+++vzzz1W9enVt3rxZjz/+uFq3bq0VK1bo7bff1nXXXaeHH37YU6rPv5758+erWrVqevvttzVz5kx9/fXXmjVrlufrzZ07Vy+++KIeeeQRrVq1SgMGDNDUqVP15ptvSpL27dunmJgY5eTkKDExUUuXLlWVKlX0f//3f/rxxx8L/f4CALwTxRsA4DNee+01XXfddZo0aZLq1aunBg0aaOrUqSpbtqzn6Oj5G5xFR0erU6dOuu6669StWzd16tRJ77//vvLy8gr1tc6cOaPHH39coaGh6tOnjyTpjz/+kCSVL18+39rzH5/ff7l+/fVXLVu2TH369FGPHj1Uq1Yt3XnnnYqLi9OKFSs8R95jYmK0du1a3XnnnapRo4YaN26se++9V9u3b9fvv/+uwMBAVahQQZIUGhqqypUrFzlLx44dFRkZ6fllxIsvvqiIiAiNHj1aN954o5o3b65nn31WP/74o9atW1fk57/tttt02223qU6dOurRo4dOnTqlFi1a6Pbbb1ft2rXVp08fud1u7dy5U9L/5tqlSxdFR0fr+uuvV//+/dWiRQu99957kqRvvvlGmzdvVnx8vP75z3+qVq1auuuuuzRgwACtXbs236n3hw4d0vjx41WnTh1VqFBBVapUkSSVLVtW11xzjWw2mxo1aqR169bp0Ucf1fXXX686dero4YcfVlZWVoEj0XXq1NGAAQN0/fXXq02bNrrlllv03XffSTp3uv7rr7+unj17qkuXLrr++ut17733avDgwTpx4oQk6fXXX5ckvfDCC7Lb7apbt66effZZlS9fXm+88UaR318AgHfhGm8AgM9wuVxq3759vruHBwcHq169ep6Cdl6TJk3yfdygQQO98847+uWXXwoc2f6rEydOaNCgQfrvf/+rhQsXeorrpe5aXpj9a9eu9Rw9/6svvvhCLpdLbre7wM3QbrnlFuXm5iotLU0dOnSQJC1atEgbN27U0aNH5Xa7lZOTI0nKyMi4rKL9V40aNfI8PnHihH744YcCN49r0KCBKlasqK1bt+rOO+8s0vPXq1fP8/j8jeUaNGjg2VapUiVJ504d/7O/zrVhw4aeX7q4XC5JUvPmzfOtcTgckqSdO3eqRo0aks5dm/93N7QLCgrSunXr9N577+ngwYPKzs72/OImMzMz39rw8PB8H4eGhmrbtm2Szp0JceLECTVs2DDfmsGDB3sef/vtt6pbt26+TEFBQYqIiOB0cwCwAIo3AMBnnDhxQqGhoQW2h4aGFrjW+M83apOkq666SpJ0+vTpS36NX375RQMGDNDRo0eVlJSk+vXre/adP4p8/PhxlS1bNl+u8zkuJSoqSqNHj77gvquuuspTMuPi4uTv/7+T0s6XvV9//VXSuTvAf/755xo5cqQiIiIUHBystWvXatq0aZf8+kXx5/fv/Ot77bXXClyXfOrUKU+uoggODvY8Pv8Liwtt++sZCheaa3Z2tnJyci46h/NzO7//z9suJTk5WVOnTlW/fv30r3/9SxUqVNCRI0cUGxtbYO2f/3s4n/989vNz/fPr+6vjx4/r4MGDBX4xc/bs2UJlBQB4N4o3AMBnhISEFDjSKJ07+vjXQnbs2LF8H588eVKSVK5cuYs+f2Zmpue08qVLl3qOjp534403SpJ++umnfEfNz1/bfX7/xZQtW1a1atW66P7zhfH5559X3bp1C+yvXLmyzp49q/Xr16tfv37q0aOHZ9/fHW2/mL/7RYT0v7Lbu3fvAjdnkwqWTiNdaK5BQUEKCAjw5MzMzMx3OcD5/2aKWmBXr14th8OR7/rwv379wvhzroupUKGCrr32Wk2ePLnAvj//EgYA4Jv4X3IAgM8IDw+X0+nMdxT05MmT+v7772W32/Ot/eabb/J9vGPHDlWuXNlzLe9f5eXlaejQocrNzVVycnKB0i2du2lZaGio50Zc523YsEEVK1b0nNJ8uRo1aiSbzabDhw+rVq1ann/
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Feature importance\n",
"importance = model.get_score(importance_type='gain')\n",
"importance_df = pd.DataFrame([\n",
" {'feature': k, 'importance': v}\n",
" for k, v in importance.items()\n",
"]).sort_values('importance', ascending=False)\n",
"\n",
"print(\"\\nTop 10 Features:\")\n",
"print(importance_df.head(10))\n",
"\n",
"# Plot\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"importance_df.head(20).plot(x='feature', y='importance', kind='barh', ax=ax)\n",
"ax.set_title('Top 20 Feature Importance')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Feature Importance"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2026-02-14T08:13:58.094128Z",
"iopub.status.busy": "2026-02-14T08:13:58.093959Z",
"iopub.status.idle": "2026-02-14T08:13:58.526368Z",
"shell.execute_reply": "2026-02-14T08:13:58.525387Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running backtest on test set...\n",
"\n",
"Predictions generated: 23799 samples\n",
"Signal statistics:\n",
" Mean: 0.0242\n",
" Std: 0.0614\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABWgAAAGGCAYAAADmcmilAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXmYFOW9/U9V9+wzzMIuCBiQfQZQVNwVNSJxi4EIXqO5GkJcYlwSl1wNJvqLiQY10eiVRIWbRYxo3OKCSqIh4i6ygwuI7AwzA8Ns3V31/v6ovbqql5neZvp8noeH6arqqrf2rlPnPV9JCCFACCGEEEIIIYQQQgghJOPI2W4AIYQQQgghhBBCCCGE5CsUaAkhhBBCCCGEEEIIISRLUKAlhBBCCCGEEEIIIYSQLEGBlhBCCCGEEEIIIYQQQrIEBVpCCCGEEEIIIYQQQgjJEhRoCSGEEEIIIYQQQgghJEtQoCWEEEIIIYQQQgghhJAsQYGWEEIIIYQQQgghhBBCsgQFWkIIIYQQQgghhBBCCMkSFGgJITnFqlWrcM0112Dq1Kmora3FlClTMHv2bDz77LOO6aZOnYrrrrsuK23ctm0bRo0ahSeeeMJ3mgceeACjRo0y/40bNw4nnHACfvCDH2Dp0qVR03/nO9/Bt7/97ZS31WhHR0eHuZzvfOc7KV+O17IIIYQQQnoqN998M0aNGoVbbrnFd5qrr74ao0aNwgMPPJDBlsWnqakJ999/P77xjW/giCOOwKRJk3DmmWfiV7/6FQ4cOGBOl+3fdvF+Hz/zzDMYNWoUtm3blvJlT5061fFbfuLEiZg2bRpuv/12bN68OeXLI4QQCrSEkJzh3XffxezZs1FYWIj58+fj1VdfxaOPPoqxY8fipptuwsKFC81plyxZgl/84hfZa2yCLFu2DMuXL8frr7+O3/3udzjssMNwww034JprrkEkEjGne+CBB7BgwYKE5/vOO+9g6tSpcae77LLLsHz5chQVFXWq/bG46aabHA8c6VwWIYQQQkiuUVpaiqVLl6K9vT1q3IEDB/Dmm2+ipKQkCy3zJxQK4eKLL8bLL7+MH/3oR3j22WfxzDPPYO7cuXj22Wdx6aWXQlVVAPxtd9ppp2H58uVYvnw5nnvuOdxwww344osvcN555+Gll15Ken6J/n4nhOQnwWw3gBBCDJ544gn07dsX99xzDyRJAgAccsghGDduHNra2rB27Vpz2pqammw1Myn69Olj/qgdOHAgjjjiCJx88sm4/PLL8eCDD+Laa68FAFRVVSU1348//jjm+EgkgkAggLKyMpSVlXWm6Qm1YfDgwebndC6LEEIIISTXGDNmDD777DO88cYb+MY3vuEY9+qrr2LIkCFoa2vLUuu8WbFiBT799FM8/vjjOO6448zhhx12GKqrq/Hggw/iiy++wIgRI/L+t11RURH69u1rfh46dChOP/103HbbbbjxxhsxcuRIjBgxIuH5xfv9TgjJb+igJYTkDOFwGIqiIBwOR4375S9/iXvuucf87I44+Oyzz/Cd73wHdXV1OOGEE/Dwww9j4cKFGDVqlDm/WbNm4corr8Rrr72Gs846C7W1tTjrrLPwxhtvOJb14osv4oILLsARRxyBI488ErNnz8Z7772XsvWcMmUKLrjgAixatMgRPWDvwvXaa6/hW9/6Fo444ggcccQRmDVrFt5++20AWpe6+++/H9u3bze7zRmxC0899RRmzZqFuro6NDc3+3ZNe+GFF/D1r38d48ePx/Tp0/Hmm2+a4/y+M2rUKPzmN78x//7yyy/x4IMPml3LvL73zDPP4JxzzkFtbS2OPPJIXH755Q6h/W9/+xtGjRqFTz/9FHPnzsXEiRNx/PHH4+c//7nDYUwIIYQQkmsEAgGccsopeO6556LGPf/88zjttNOihkciETzwwAM47bTTMH78eJx88sm4++67EQqFzGkURcHvfvc7nHnmmairq8Pxxx+Pa665xtGVv7O/oYzfxfblGZx66ql4+umnTdHR/dsuHA7jzjvvxJQpUzBp0iT84Ac/wGeffYZRo0bhb3/7W1Lt+vLLL/HDH/4Qxx13HGpra3HGGWfg4YcfNt27ybB9+3b893//NyZMmIBjjjkGv/rVr6AoChoaGlBbW+sZMXH55Zdj5syZSS9LkiT89Kc/RUlJCR5//PGE18fr9zsAtLa24s4778RJJ52E8ePH44wzzsCCBQsghEi6bYSQ7g0FWkJIznDKKadgz549mD17Nl577TU0Nzcn9L1QKITvf//72LVrF/7whz9g0aJF2LRpk5kRW1BQYP7/2Wef4W9/+xvmz5+PZ599Fn369MGNN96IgwcPAgA++OAD3HDDDTj++OPx7LPP4qmnnsKhhx6KuXPnYvfu3Slb11NPPRWtra1YvXp11LjNmzfj2muvxZlnnonnnnsOTz31FGpra/H9738fO3fuxP/8z//gtNNOw4ABA7B8+XJcdtll5ncff/xxzJgxA6+++irKy8s9l71582b8/e9/x29+8xssWbIE/fv3xw9/+EPs2rUr4fYvW7YMgNX1beDAgVHTLFmyBLfccgumTp2KZ599Fo899hhCoRAuueQSc1sGg1pHjnnz5uGb3/wmXnzxRVx22WX461//ihdeeCHh9hBCCCGEZINvfOMb+M9//oN9+/aZw3bu3In3338f06dPj5r+F7/4Bf7whz9gzpw5+Mc//oGbbroJS5Yswbx588xpHnnkETzyyCP40Y9+hFdeeQUPP/wwtm3bhmuuucacprO/oY444gj06tUL119/Pf74xz/iyy+/THhdH3zwQfzlL3/BFVdcgWeffRannXaaaZgw2pNIu4QQ+P73v4/t27fjkUcewSuvvIJrrrkGv//97/GXv/wl4fYY3HnnnZg5cyaee+45zJkzBwsXLsSiRYtQU1ODr3/963j22WcdgmdDQwPeeeedTgm0gBZtMWXKFLz77rsJr4/f7/cf/vCHeOGFF3DzzTfjH//4By6//HI88MAD+P3vf9+pthFCui8UaAkhOcOMGTPwgx/8AJ9++imuvvpqHH300bjgggtw7733xgzjf//997F9+3bccMMNOOaYYzB8+HDcfffdnnlgu3btwq9//WuMHTsWw4cPx8UXX4yDBw/iiy++AACMHz8er7/+On70ox9hyJAh+NrXvoa5c+eitbUVH330UcrW9ZBDDgEA7NmzJ2rc+vXrEYlEcMEFF+DQQw/F8OHDccstt+BPf/oTevXqhYqKChQVFSEQCKBv376OrmcjRozAjBkzcOihh0KWvS/xjY2N+PWvf426ujqMHj0ad955Jzo6OvDKK68k3P4+ffoA0H6g9u3bF4FAIGqaP/zhDzjmmGNw3XXXYfjw4ZgwYQJ+85vfoK2tDUuWLHFMO336dEybNg2DBw/GZZddhrKyMqxatSrh9hBCCCGEZIPjjz8elZWVDlH0xRdfxOGHH47Ro0c7pt27dy+WLFmC7373u5g1axaGDh2K6dOn48orr8Szzz5r/i68+OKLsXTpUkyfPh2HHHII6urqMGPGDKxduxYNDQ2OeSb7G6qmpgYPPvggampqcM899+DrX/86TjrpJNx0001YtmxZTOfmM888g5NOOgmXXnophg4dipkzZ+Lkk0/2nDZeu/7v//4Pjz76KGprazFo0CCcc845GDNmDP7973/7b2wfzj//fEyfPh3Dhg3D9773PRx11FGmq3n27NnYtm2bKaYCwCuvvILCwkJPAT1RBg4c6PgdH299vH6/r1q1CsuXL8f111+P6dOnY+jQoZg1axYuvPBCLFy40NPlTAjpuVCgJYTkDJIk4brrrsPy5ctx33334dvf/jba2trwyCOPYPr06b5v1D/99FMAwIQJE8xhBQUFOPHEE6OmHTJkiCO/trKyEgCwf/9+AFrW1Ouvv46ZM2fi6KOPxqRJk/Ctb30LgFbxNlUYP7gKCwujxh155JHo3bs3LrnkEixcuBAbNmyALMuYNGlS3Byw8ePHx132kCFDHHlagwYNQmVlpSlSp4KDBw9iy5YtOProox3D+/fvjwEDBmDDhg2
"text/plain": [
"<Figure size 1400x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print(\"Running backtest on test set...\")\n",
"\n",
"# Filter test data by date (datetime index is now datetime64 type)\n",
"# Use string dates for comparison since pandas will convert them appropriately\n",
"test_start = CONFIG['test_range'][0]\n",
"test_end = CONFIG['test_range'][1]\n",
"\n",
"# Get test data by filtering the pandas DataFrame\n",
"df_test = df_full.loc[\n",
" (df_full.index.get_level_values(0) >= test_start) &\n",
" (df_full.index.get_level_values(0) <= test_end)\n",
"].copy()\n",
"\n",
"# Extract features and labels\n",
"X_test = df_test[feature_cols].values\n",
"y_test = df_test['label'].values\n",
"\n",
"# Get predictions\n",
"import xgboost as xgb\n",
"dtest = xgb.DMatrix(X_test)\n",
"predictions = model.predict(dtest)\n",
"\n",
"# Create signal Series with proper index for backtest\n",
"signal_series = pd.Series(predictions, index=df_test.index)\n",
"\n",
"print(f\"\\nPredictions generated: {len(predictions)} samples\")\n",
"print(f\"Signal statistics:\")\n",
"print(f\" Mean: {predictions.mean():.4f}\")\n",
"print(f\" Std: {predictions.std():.4f}\")\n",
"\n",
"# Plot signal distribution\n",
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
"\n",
"axes[0].hist(predictions, bins=100, edgecolor='black')\n",
"axes[0].set_title('Signal Distribution')\n",
"axes[0].axvline(x=0, color='red', linestyle='--')\n",
"\n",
"# Group by date for mean signal plot\n",
"signal_by_date = signal_series.groupby(level=0).mean()\n",
"axes[1].plot(signal_by_date.index, signal_by_date.values)\n",
"axes[1].set_title('Mean Signal by Date')\n",
"axes[1].axhline(y=0, color='red', linestyle='--')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Generate Predictions"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2026-02-14T08:13:58.528619Z",
"iopub.status.busy": "2026-02-14T08:13:58.528462Z",
"iopub.status.idle": "2026-02-14T08:13:58.798553Z",
"shell.execute_reply": "2026-02-14T08:13:58.797346Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Backtest Summary:\n",
" 0: ic_mean 0.074090\n",
"ic_std 0.196369\n",
"ic_ir 0.377298\n",
"ic_positive_ratio 0.659401\n",
"annual_return 20.731708\n",
"annual_volatility 3.728406\n",
"sharpe_ratio 5.560475\n",
"max_drawdown -0.758262\n",
"Name: 0, dtype: float64\n"
]
}
],
"source": [
"# Run backtest using CTABacktester\n",
"from qshare.eval.cta.backtest import CTABacktester\n",
"\n",
"# Create return Series from test data (need actual returns, not normalized labels)\n",
"# The 'return' column should have the raw returns if available\n",
"if 'return' in df_test.columns:\n",
" return_series = df_test['return']\n",
"else:\n",
" # Use label as proxy (it's normalized returns)\n",
" return_series = df_test['label']\n",
"\n",
"backtester = CTABacktester(\n",
" num_trades=CONFIG['num_trades'],\n",
" signal_dist=CONFIG['signal_dist'],\n",
" pos_weight=CONFIG['pos_weight']\n",
")\n",
"\n",
"results = backtester.run(return_series, signal_series)\n",
"summary = backtester.summary()\n",
"\n",
"print(\"\\nBacktest Summary:\")\n",
"for key, value in summary.items():\n",
" if isinstance(value, float):\n",
" print(f\" {key}: {value:.4f}\")\n",
" else:\n",
" print(f\" {key}: {value}\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2026-02-14T08:13:58.801046Z",
"iopub.status.busy": "2026-02-14T08:13:58.800882Z",
"iopub.status.idle": "2026-02-14T08:13:58.965447Z",
"shell.execute_reply": "2026-02-14T08:13:58.964134Z"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW0AAAGGCAYAAAAAW6PhAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXd4HMX9/99XdOrNVXJv2LgigynGlFBCNx1imoEvLRhTEmIgYAIJvYaAQ34mQGw6mGpCCcXg0NwwtuXebRVLsqRTO12//f1xmfXc3O7d7t3e3Ur6vJ7Hj6XT3uzs7uyU97znMxZJkiQQBEEQBEEQBEEQBEEQBEEQpsCa6QwQBEEQBEEQBEEQBEEQBEEQByDRliAIgiAIgiAIgiAIgiAIwkSQaEsQBEEQBEEQBEEQBEEQBGEiSLQlCIIgCIIgCIIgCIIgCIIwESTaEgRBEARBEARBEARBEARBmAgSbQmCIAiCIAiCIAiCIAiCIEwEibYEQRAEQRAEQRAEQRAEQRAmgkRbgiAIgiAIgiAIgiAIgiAIE0GiLUEQBEEQBEEQBEEQBEEQhImwZzoDBEEQBEEQROp47rnnMG/ePKxbtw7Z2dny5263G6+88go+++wz7NmzBwAwYMAAnHTSSbjqqqvQq1evuGlXVlbiX//6F1atWoXm5mYUFxdjzJgxuOiii3D66aen7JriceKJJ6KmpibmMbNnz0ZNTQ2+++47/PDDD2nKmTJ/+tOfsH79evzud7/DtddeG/f4r7/+GoMGDUpDzoCWlhYsWLAAX375Jfbt2wdJktCvXz+ccMIJmDVrFoqKihJO++uvv8acOXPw5ptvYsyYMQbmmiAIgiAIoutjkSRJynQmCIIgCIIgiNSgJNo6nU5cddVVaGlpwezZs3H44YdDkiT8/PPPeO6552C1WvHKK69g8ODBqum+8847uP/++3HqqadixowZGDx4MFpbW/Hll1/ixRdfxOmnn45HH30UFoslXZcq09zcjGAwKP9+9tln48gjj8Q999wjf5aXl4dQKAS/369JoE4Vb775Jp566il89NFH6Nu3L1pbW+W/LV++HLfffjuee+45TJ48Wf68V69esNlsSZ132bJluPvuu7FkyRLVY3w+H84//3z4/X7cfvvtOPjggxEMBvHLL7/g8ccfR3l5Od577z1YrdoX711++eU4//zzcf755wMAnnjiCXz22Wf48MMPkxKACYIgCIIguhvktCUIgiAIguhh/PnPf0Z9fT0+/PBDlJWVyZ8PHz4cxxxzDM466yzMmzcPjz32mOL3N2/ejD//+c+YOXMm7rrrLvnzAQMGYOzYsTjkkENw/fXXY9KkSbjssstSdh2BQAA2my1KGBZFWKvVipycHPTt2zdleUmElpYWPP300/i///s/DBw4EAAi8shEzOLiYsPz/ssvv8Q95qeffsK2bdvwr3/9C0cffbT8+fDhw1FaWop58+Zh586dGDVqlKZzBgIBrF+/XhZsAWDWrFl47733MH/+fMyZM0f/hRAEQRAEQXRTKKYtQRAEQRBED6Kmpgaff/45rrnmmgjBllFWVoYPP/wQjzzyiGoar7zyCvLz8/G73/1O8e/HH388TjjhBCxcuBAAMGfOHBx33HEQF3h98sknGDNmDDZu3AgA2Lt3L26++WYcfvjhmDhxIs4//3x888038vHV1dUYM2YMFi1ahBkzZmDSpElob2/XfQ8Yd911F6ZNmyb/fuyxx+LJJ5/EvHnzMHXqVEyePBl/+MMf4PV6MW/ePEybNg2HHnoobr755ojzBgIBPPfcczjppJMwYcIEHH/88Xj88cfh8/linv+VV15BIBDAzJkzdeW7qakJf/zjHzF16lRMmDABZ555Jt59992IY7788ktccMEFOPTQQ3HooYdixowZ+PHHH+XrfuaZZ1BTU4MxY8bgueeeUzyP3+8HAMXrOOGEE/Dee+9FCLYbNmzANddcg8mTJ+OQQw7B5ZdfjtWrVwMIP7vx48fD7Xbjj3/8oxwOIT8/H1dccQVee+21CJcxQRAEQRBET4dEW4IgCIIgiB7EypUrIUkSjj/+eNVjBg0aFHPJ+/Lly3HUUUdFxMgVOeGEE7Bnzx5UVVVh+vTpqK+vj3J3fvrppzjooIMwbtw4tLS04NJLL8XevXvxj3/8Ax9++CEOPfRQzJo1C8uWLYv43r/+9S9ceOGF+M9//oOCggKNVx6frKwsfPnll/B6vXjzzTdx66234uOPP8bVV18Nl8uF1157DX/+85/xxRdf4JVXXpG/95e//AX//Oc/cd111+GTTz7BnXfeiXfffRf33XdfzPN9+eWXOOKII3Rdg8/nw1VXXYVly5bh0Ucfxccff4zp06fjnnvuwYcffggA2LVrF2677Taceuqp+Oijj7Bo0SJMnDgR119/Pfbt24d77rkHJ510EsrKyvD999/j//7v/xTPdeihh6KoqAi///3v8eKLL8qxj5XYs2cPLr/8cgQCAbzyyit455130KdPH1x99dXYuXMnysvL8frrrwMA7r77bnz//ffyd0866SR4PJ6IzwiCIAiCIHo6JNoSBEEQBEH0IBoaGgCEQxkkk0Z5eXnMY1j69fX1OProo9G7d298/vnn8t87Ojrw3//+F2effTYA4N1330VjYyP++te/YsqUKRg5ciTuuecejB49Gi+++GJE2qNGjcKFF16IwYMH64qnqpXbb78dw4YNw5VXXom8vDw0NTXhjjvuwPDhwzF9+nSMGjVKdgfv378f7777Lq666irMmDEDQ4cOxRlnnIFZs2bhww8/lO+3SGtrK7Zu3YopU6boytvXX3+NrVu34oEHHsDxxx+P4cOH47e//S1OPPFEzJ8/HwCwadMmBAIBnH/++Rg8eDBGjhyJP/7xj3j11VdRVFSEwsJCZGdnw2azoW/fvsjPz1c8V69evTBv3jz06tULTzzxBE455RQcd9xxuPPOO7FkyZII5/SCBQsAAH/7298wceJEjBkzBo899hgKCgqwcOFC2Gw2lJaWAgAKCwsjwj0cfPDBKCoqwvLly3XdC4IgCIIgiO4MibYEQRAEQRA9CLs9vKVBKBRKOI2srKy432d/t9lssNvtOP300/HFF1/IQt9XX32FQCAgi7Zr165F3759MWLECDkNi8WCo446Sl5iz5gwYULCeY/HwQcfHHH+kpISHHzwwRFxc0tLS9HW1gYAqKysRDAYxJFHHhmRzlFHHYVQKIQ1a9Yonmf//v0AoDtW7dq1a2GxWHDEEUdEfD516lTs3LkTLS0tOOyww9C7d2/MnDkTCxYswObNm2G1WjF58mRVgVaNI488El988QVef/11zJ49G0OHDsUnn3yCG2+8EVdeeSW8Xq+crzFjxqCkpET+bnZ2NiZPnhz1/JTo27evfE8IgiAIgiAI2oiMIAiCIAiiR9G/f38A4fixiYqfZWVlqKqqinlMbW0tAMiO3OnTp+O1117D2rVrUVFRgc8++wxHHHGEHFe3vb0djY2NmDx5ckQ6fr8ffr8fbrdb/qywsDChfGshNzc34neLxaL4GYPFtp01a1aE65eJ02pCJIvfyjYb00p7ezskSYoSiQOBAACgsbERo0aNwqJFi/Dyyy9jwYIFeOSRRzBw4EDcdNNNuOCCC3SdDwhv5DZlyhTZFdzS0oJnnnkGb775Jt58801cddVVaG9vR01NTdTz8/l8mq6xqKhIFsIJgiAIgiAIEm0JgiAIgiB6FFOnToXNZsOXX36pKtp+//33KCoqwqRJkxT/fuyxx+Kdd95BR0eHajzWpUuXYsyYMbIoW1FRgSFDhuDzzz/H8OHD8cMPP+Avf/mLfHxRUREGDRoUFQqBESt+biYpLi4GADzxxBPy5lo8vXr1ivk9vUJlUVERsrOz5fi1IkwkHzhwIO69917ce++92L59O1577TXcfffdGDx4cJRLVw1JkuB0OqOuoaSkBPfddx8+++wzbNmyRc5XWVkZHnzwwah0tISwaGtrw9ChQzXliyAIgiAIoidA4REIgiAIgiB6EL169cL06dPxyiuvYMeOHVF/r62txZ133okXXnh
"text/plain": [
"<Figure size 1400x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"IC Statistics:\n",
" Mean: 0.0741\n",
" Std: 0.1964\n",
" IR: 0.3773\n"
]
}
],
"source": [
"# IC Analysis\n",
"# The results from CTABacktester is a dict, extract the IC series\n",
"if isinstance(results, dict):\n",
" # Results is a dict with various metrics\n",
" ic_by_date = results.get('df_ic', pd.Series())\n",
"else:\n",
" # Try the old approach if results is a DataFrame\n",
" ic_by_date = results.groupby(results.index.get_level_values(0))['ic'].mean()\n",
"\n",
"if len(ic_by_date) > 0:\n",
" fig = plot_ic_series(ic_by_date, title=\"IC Over Time (Test Set)\")\n",
" plt.show()\n",
"\n",
" print(f\"\\nIC Statistics:\")\n",
" print(f\" Mean: {ic_by_date.mean():.4f}\")\n",
" print(f\" Std: {ic_by_date.std():.4f}\")\n",
" print(f\" IR: {ic_by_date.mean() / ic_by_date.std():.4f}\")\n",
"else:\n",
" print(\"No IC data available in results\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2026-02-14T08:13:58.968285Z",
"iopub.status.busy": "2026-02-14T08:13:58.968127Z",
"iopub.status.idle": "2026-02-14T08:13:59.499839Z",
"shell.execute_reply": "2026-02-14T08:13:59.498582Z"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKEAAAJOCAYAAABvBRRKAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAw7tJREFUeJzs3Xd8VFX+//HXvXcmnSQEQgsQSiC00AVpKooFsbuWxYqIbfWrrr/ddVdXV1fX7tp2XcF1sa0Nsa0FBZWiKCK9IxAgBEJLII3MzL3398fAYAyBBCbMJLyfjwcPMufee+7n5jAk+eSczzFc13URERERERERERGpQ2akAxARERERERERkYZPSSgREREREREREalzSkKJiIiIiIiIiEidUxJKRERERERERETqnJJQIiIiIiIiIiJS55SEEhERERERERGROqcklIiIiIiIiIiI1DkloUREREREREREpM4pCSUiIiIiIiIiInVOSSgREZEwmzFjBtdddx3HH388PXr04KSTTuLWW29l7ty5kQ6tku+//57s7GxmzJhxRP08++yzZGdnU1FREabIqldUVMRTTz3FqFGj6Nu3L3369OH000/n4YcfZvfu3XV+/2gxefJksrOzq/wZMGAAl1xyCZ988kmkQxQRERGpQkkoERGRMHrqqae4/vrradeuHePHj2fKlCk89NBDlJeXc8UVV/DWW29FOsQj9oc//IFnn3029Pqaa65h1qxZxMbG1ul9fT4fl19+OZ9++im33nor77//PpMnT+b666/n/fff56qrrsJxnNC5PXr0IC8vLyz3fvfdd7niiivC0lc4vf7668yaNSv055VXXqFv377cfvvt/Oc//6l1f8OHD+f777+vg0hFREREwBPpAERERBqK6dOn8/zzz/OXv/yFX//616H2jIwMjj/+eG699VYef/xxzjjjDFJSUiIY6ZGZP38+rVu3Dr1OTEwkMTGxzu87e/ZsVq9ezX/+8x8GDx4cam/fvj2NGzfmueeeY+3atWRlZbF48WL8fn+1fbmui23beDw1+1Zo/vz5Rxx/XWjcuDHp6emh1+np6XTp0oVVq1bxn//8hzFjxtS4r4KCAvLz8484Jp/PR0xMzBH3IyIiIg2PZkKJiIiEyUsvvURWVlalBNQ+hmFw//33M23atFAC6uSTT+b222+vdN6+ZVZr1qwB4O9//ztDhgxh/vz5nH322eTk5HDOOeewdOlS5s6dy3nnnUfPnj0566yz+OGHH0L93HnnnQwZMqRS33l5eWRnZ/PGG29U+wz/+9//uOCCC+jbty/9+vXj17/+NXPmzAkdz87OZv369Tz33HNkZ2eTl5dXaTne7373O0444QRc163U78cff0x2djbLli0DYMOGDdxyyy0cd9xx5OTkcMEFF/DVV18d9PO7L6nk8/mqHBs+fDjvvvsuWVlZTJ48mdGjRwNwyimnhGYwnXzyyTzwwAP86U9/olevXnz99dcALF68mLFjxzJw4EB69erFmWeeyZtvvhnq+4orruCdd95hzpw5ZGdnM3nyZAB27NjBH//4RwYNGkSPHj0YNWoUkyZNqhRXQUEBN9xwA71792bgwIE8+OCDfPLJJ2RnZ5Obm8sjjzxCnz59KC0trXTdggULyM7OZvr06Qf9nFQnOzubrVu3hmaGAXz77bdceuml9OrVi759+3LdddeF/p19//33nHDCCQBceeWVnHzyyaFnv/jiiyv1/ctlnPv+zc6YMYNTTjkldP6ll17KTTfdxBdffMHIkSPJyclh5MiRTJs2LdTXrl27uOuuuxg2bBg9evTgxBNP5IEHHmDPnj2H9dwiIiIS3ZSEEhERCQO/38+8efNCP8gfSGpqKsnJybXq1+PxsGfPHp5//nkeeughXnvtNQoLC7nzzjt5+umnuf/++3nrrbdwXZc//vGPR/QMc+fO5Y477mDIkCG8//77vPPOO7Rp04brr7+egoICAL788ktg/xK8li1bVurj7LPPpqCgoMrMoU8++YROnTrRrVs3ioqKGD16NBs2bOD555/n/fffp2/fvtx0001899131cbXt29fkpOT+e1vf8uLL77I+vXrD3jemWeeyf/7f/8PgHfeeafS0sGZM2eSmJjIRx99xKBBgygtLWXMmDGYpskrr7zCxx9/zCWXXMK9994betZnn32W7t2706dPH2bNmsWZZ56Jz+fj6quv5rvvvuPhhx/mo48+4uyzz+auu+7i/fffD93v1ltvZcGCBTz55JO8/fbbuK7L008/DQTH9uKLL6a8vJwpU6ZUeob//e9/tGzZkmHDhlX7+TiYtWvX0rJlS0wz+K3e3Llzufbaa2nVqhVvv/02EydOpLy8nMsvv5ydO3fSp08fnnjiidDz/jKZVhMTJkzgb3/7Gy+88AIAXq+Xn376ibfffpsnnniC999/n6ZNm/L73/+ekpISAB544AEWLVrEM888w+eff84DDzzAtGnTeOihhw7ruUVERCS6KQklIiISBkVFRfh8vipJmXAoKSnhxhtvpEePHvTq1YtTTz2VVatWcdttt9GzZ0+6du3K+eefz8aNGykuLj7s+/To0YOpU6dy66230rZtWzp06MD1119PWVkZ8+bNA6Bp06YAJCQkkJ6ejmVZlfoYPHgwTZo04bPPPqsU/4wZMzjnnHMAmDRpEtu3b+fvf/87/fv3p2PHjtx111107tyZF198sdr40tLSeO6550hLS+Oxxx7jtNNO44QTTuAPf/gDX375ZWj2VVxcHElJSaFrUlNTK8Vy5513kpmZSWJiInFxcXz00Uf8/e9/Jzs7m9atW3PVVVfRpEkTZs6cCQSThx6PB6/XS3p6OnFxcUybNo1Vq1bx17/+lRNPPJH27dtzww03cPLJJ4eSMLm5ucyfP59x48Zx8sknk5mZyd13311pKWb79u0ZOHBgaHYVgOM4fPbZZ1xwwQWhJFJNlZWV8fLLL/PVV19x2WWXhdrHjx9P8+bNeeSRR8jOzqZnz548+eSTFBcXM2nSJGJiYkIJ0pSUFNLS0mp1X4DTTz+dgQMH0rx581Dbli1beOSRR+jWrRsdO3bk8ssvp6SkhLVr1wKwdOnSUIH5Vq1aMWzYMF5++eVaLSMUERGR+kM1oURERMJgX22hny9/CqcuXbqEPt6XVOnWrVuorXHjxgDs3r2bRo0aHdY9YmNjmTp1Kh9++CGbNm3C7/eHEjtFRUU16sPj8TBy5Eg+//xz/vjHP2IYBlOnTiUQCISSUAsXLiQ9PZ0OHTqErjMMg+OPP5533nnnoP0PHDiQzz//nHnz5jF79mzmzJnDxx9/zPvvv8/AgQOZMGHCQQukd+nSpVLizLIsVq5cyUsvvcRPP/1EeXk5AOXl5Qd95oULF2IYBgMGDKjUPmjQIL788kuKior46aefAOjVq1elc4YPH87ChQtDry+99FJuv/12Nm7cSJs2bZgzZw47duzgwgsvPOjnAuDCCy/EMIzQ6/Lycpo1a8Ydd9zBtddeWyneoUOH4vV6Q23p6el06tQplGA8Uj169KjS1rZt20oJrX0JuF27dgFw6qmnMmHCBGzb5qSTTmLgwIG0bds2LPGIiIhI9FESSkREJAxSU1OJjY1l48aNddJ/fHx86ON9SYcDtf2yFlNtvPbaazz88MOMHTuWkSNHkpycTEFBQa13hTv77LN57bXXWLhwIb179+bTTz9lwIABtGjRAoDi4mK2b99Onz59Kl3n9/vx+/2Ul5dXerZfMk2T/v37079/fyCYIHvqqad44403eOONN7j66qurvfaXyyGXLVvGb37zG4YOHcrTTz9N06ZNMU3zkM9cXFyM67oMHDiwUnsgEABg+/btoSVnv0wKNmnSpNLrESNG0KRJEyZPnsytt97KJ598wuDBg8nIyDhoDADPPfccbdq0AaC0tJQrr7ySk046ieuuu65KvFOmTAktMdynoqKiymy2w3Wg5GdCQkKl17/8d3r77bfTvn17Jk+ezG233YbrupxyyincddddlWZUiYiISMOgJJSIiEgYGIbB4MGD+eqrr/jTn/50wB/sd+3axZQpU7j
"text/plain": [
"<Figure size 1200x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Return Statistics:\n",
" Total Return: 2990.33%\n",
" Annual Return: 954.65%\n",
" Sharpe Ratio: 5.50\n"
]
}
],
"source": [
"# Cumulative returns\n",
"# Get returns from the backtest results\n",
"daily_returns = results.get('df_return_accum', pd.Series())\n",
"if len(daily_returns) > 0:\n",
" # If it's cumulative, calculate daily from it\n",
" if hasattr(daily_returns, 'diff'):\n",
" daily_rets = daily_returns.diff().fillna(daily_returns.iloc[0] if len(daily_returns) > 0 else 0)\n",
" else:\n",
" daily_rets = daily_returns\n",
"else:\n",
" # Fallback to simple IC-based approximation\n",
" daily_rets = results.get('df_ic', pd.Series())\n",
" daily_rets = daily_rets * 0.01 # Rough approximation\n",
"\n",
"fig = plot_cumulative_returns(daily_rets, title=\"Cumulative Strategy Returns\")\n",
"plt.show()\n",
"\n",
"if len(daily_rets) > 0:\n",
" total_return = daily_rets.sum() if not hasattr(daily_returns, 'iloc') else daily_returns.iloc[-1] if len(daily_returns) > 0 else 0\n",
" annual_return = (1 + total_return) ** (252 / len(daily_rets)) - 1 if len(daily_rets) > 0 else 0\n",
" sharpe = daily_rets.mean() / daily_rets.std() * np.sqrt(252) if daily_rets.std() > 0 else 0\n",
"\n",
" print(f\"\\nReturn Statistics:\")\n",
" print(f\" Total Return: {total_return:.2%}\")\n",
" print(f\" Annual Return: {annual_return:.2%}\")\n",
" print(f\" Sharpe Ratio: {sharpe:.2f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6. Save Results\n",
"\n",
"Save model, predictions, and metrics for later analysis."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"execution": {
"iopub.execute_input": "2026-02-14T08:13:59.502588Z",
"iopub.status.busy": "2026-02-14T08:13:59.502439Z",
"iopub.status.idle": "2026-02-14T08:13:59.564977Z",
"shell.execute_reply": "2026-02-14T08:13:59.564063Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saving results to: /home/guofu/Workspaces/alpha_lab/cta_1d/../results/cta_1d/baseline_xgb\n",
"\n",
"Files saved:\n",
" - model.json\n",
" - backtest_df_short_accum.csv\n",
" - summary.json\n",
" - backtest_df_return_accum.csv\n",
" - backtest_df_long_accum.csv\n",
" - backtest_df_signal_dist.csv\n",
" - predictions.csv\n",
" - feature_importance.csv\n",
" - backtest_df_num_trade_short.csv\n",
" - backtest_df_return_per_trade.csv\n",
" - backtest_df_num_trade_long.csv\n",
" - backtest_df_ic.csv\n",
" - backtest_df_num_trade.csv\n",
" - config.json\n",
" - backtest_df_ic_test.csv\n"
]
}
],
"source": [
"if CONFIG['save_results']:\n",
" # Create output directory\n",
" output_dir = create_experiment_dir('cta_1d', CONFIG['experiment_name'])\n",
" print(f\"Saving results to: {output_dir}\")\n",
" \n",
" # Save config\n",
" with open(output_dir / 'config.json', 'w') as f:\n",
" json.dump(CONFIG, f, indent=2, default=str)\n",
" \n",
" # Save model\n",
" model.save_model(str(output_dir / 'model.json'))\n",
" \n",
" # Save feature importance\n",
" importance_df.to_csv(output_dir / 'feature_importance.csv', index=False)\n",
" \n",
" # Save predictions\n",
" signal_series.to_csv(output_dir / 'predictions.csv')\n",
" \n",
" # Save backtest results (handle dict or DataFrame)\n",
" if isinstance(results, dict):\n",
" # Save each DataFrame in the results dict separately\n",
" for key, value in results.items():\n",
" if isinstance(value, pd.DataFrame):\n",
" value.to_csv(output_dir / f'backtest_{key}.csv')\n",
" elif isinstance(value, pd.Series):\n",
" value.to_csv(output_dir / f'backtest_{key}.csv')\n",
" else:\n",
" results.to_csv(output_dir / 'backtest_results.csv')\n",
" \n",
" # Save summary\n",
" with open(output_dir / 'summary.json', 'w') as f:\n",
" json.dump(summary, f, indent=2, default=str)\n",
" \n",
" print(\"\\nFiles saved:\")\n",
" for f in output_dir.iterdir():\n",
" print(f\" - {f.name}\")\n",
"else:\n",
" print(\"Results not saved (save_results=False)\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}