# CTA 1D Parquet Dataset Requirements # This file specifies the required Parquet tables for alpha_lab CTA 1D task # Table 1: Alpha158 Features cta_alpha158_1d: source: database: dfs://daily_stock_run table: stg_1day_tinysoft_cta_alpha159_0_7_beta host: 192.168.1.146 port: 8848 target: path: cta_alpha158_1d/ partition_freq: 1D col_datetime: m_nDate code_format: tscode description: Alpha158 features for CTA futures (~163 columns) priority: medium # Table 2: HFFactor Features (requires pivot) cta_hffactor_1d: source: database: dfs://daily_stock_run table: stg_1day_tinysoft_cta_hffactor host: 192.168.1.146 port: 8848 # Long format: code, m_nDate, factor_name, value # Pivot to wide format during export pivot: index: [code, m_nDate] columns: factor_name values: value filter: # Only these 8 columns needed - vol_1min - skew_1min - volp_1min - volp_ratio_1min - voln_ratio_1min - trend_strength_1min - pv_corr_1min - flowin_ratio_1min target: path: cta_hffactor_1d/ partition_freq: 1D col_datetime: m_nDate code_format: tscode description: High-frequency factor features (8 columns, pivoted from long format) priority: medium notes: Requires pivot transformation from long to wide format # Table 3: Dominant Contract Mapping cta_dom_1d: source: database: dfs://daily_stock_run table: dwm_1day_cta_dom host: 192.168.1.146 port: 8848 # Group and aggregate during export group_by: [m_nDate, code_init] filter: "version='vp_csmax_roll2_cummax'" agg: "first(code) as code" target: path: cta_dom_1d/ partition_freq: 1D col_datetime: m_nDate code_format: tscode description: Dominant contract mapping for continuous contracts priority: medium notes: Requires group_by + aggregation, filter by version # Table 4: Return Labels cta_labels_1d: source: database: dfs://daily_stock_run table: stg_1day_tinysoft_cta_hfvalue host: 192.168.1.146 port: 8848 # Filter for specific indicators indicators: - twap_open1m@1_twap_close1m@1 # o2c_twap1min - twap_open1m@1_twap_open1m@2 # o2o_twap1min target: path: cta_labels_1d/ partition_freq: 1D col_datetime: m_nDate code_format: tscode description: Return labels for different return types priority: medium notes: Filter indicator column for specific return types