You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
2.5 KiB

# CTA 1D Parquet Dataset Requirements
# This file specifies the required Parquet tables for alpha_lab CTA 1D task
# Table 1: Alpha158 Features
cta_alpha158_1d:
source:
database: dfs://daily_stock_run
table: stg_1day_tinysoft_cta_alpha159_0_7_beta
host: 192.168.1.146
port: 8848
target:
path: cta_alpha158_1d/
partition_freq: 1D
col_datetime: m_nDate
code_format: tscode
description: Alpha158 features for CTA futures (~163 columns)
priority: medium
# Table 2: HFFactor Features (requires pivot)
cta_hffactor_1d:
source:
database: dfs://daily_stock_run
table: stg_1day_tinysoft_cta_hffactor
host: 192.168.1.146
port: 8848
# Long format: code, m_nDate, factor_name, value
# Pivot to wide format during export
pivot:
index: [code, m_nDate]
columns: factor_name
values: value
filter: # Only these 8 columns needed
- vol_1min
- skew_1min
- volp_1min
- volp_ratio_1min
- voln_ratio_1min
- trend_strength_1min
- pv_corr_1min
- flowin_ratio_1min
target:
path: cta_hffactor_1d/
partition_freq: 1D
col_datetime: m_nDate
code_format: tscode
description: High-frequency factor features (8 columns, pivoted from long format)
priority: medium
notes: Requires pivot transformation from long to wide format
# Table 3: Dominant Contract Mapping
cta_dom_1d:
source:
database: dfs://daily_stock_run
table: dwm_1day_cta_dom
host: 192.168.1.146
port: 8848
# Group and aggregate during export
group_by: [m_nDate, code_init]
filter: "version='vp_csmax_roll2_cummax'"
agg: "first(code) as code"
target:
path: cta_dom_1d/
partition_freq: 1D
col_datetime: m_nDate
code_format: tscode
description: Dominant contract mapping for continuous contracts
priority: medium
notes: Requires group_by + aggregation, filter by version
# Table 4: Return Labels
cta_labels_1d:
source:
database: dfs://daily_stock_run
table: stg_1day_tinysoft_cta_hfvalue
host: 192.168.1.146
port: 8848
# Filter for specific indicators
indicators:
- twap_open1m@1_twap_close1m@1 # o2c_twap1min
- twap_open1m@1_twap_open1m@2 # o2o_twap1min
target:
path: cta_labels_1d/
partition_freq: 1D
col_datetime: m_nDate
code_format: tscode
description: Return labels for different return types
priority: medium
notes: Filter indicator column for specific return types