trading-platform-ml-engine/config/phase2.yaml

# Phase 2 Configuration
# Trading-oriented prediction system with R:R focus

# General Phase 2 settings
phase2:
  version: "2.0.0"
  description: "Range prediction and TP/SL classification for intraday trading"
  primary_instrument: "XAUUSD"

# Horizons for Phase 2 (applied to all instruments unless overridden)
horizons:
  - id: 0
    name: "15m"
    bars: 3
    minutes: 15
    weight: 0.6
    enabled: true

  - id: 1
    name: "1h"
    bars: 12
    minutes: 60
    weight: 0.4
    enabled: true

# Target configuration
targets:
  # Delta (range) targets
  delta:
    enabled: true
    # Calculate: delta_high = future_high - close, delta_low = close - future_low
    # Starting from t+1 (NOT including current bar)
    start_offset: 1  # CRITICAL: Start from t+1, not t

  # ATR-based bins
  atr_bins:
    enabled: true
    n_bins: 4
    thresholds:
      - 0.25  # Bin 0: < 0.25 * ATR
      - 0.50  # Bin 1: 0.25-0.50 * ATR
      - 1.00  # Bin 2: 0.50-1.00 * ATR
      # Bin 3: >= 1.00 * ATR

  # TP vs SL labels
  tp_sl:
    enabled: true
    # Default R:R configurations to generate labels for
    rr_configs:
      - sl: 5.0
        tp: 10.0
        name: "rr_2_1"
      - sl: 5.0
        tp: 15.0
        name: "rr_3_1"

# Model configurations
models:
  # Range predictor (regression)
  range_predictor:
    enabled: true
    algorithm: "xgboost"
    task: "regression"

    xgboost:
      n_estimators: 200
      max_depth: 5
      learning_rate: 0.05
      subsample: 0.8
      colsample_bytree: 0.8
      min_child_weight: 3
      gamma: 0.1
      reg_alpha: 0.1
      reg_lambda: 1.0
      tree_method: "hist"
      device: "cuda"

    # Output: delta_high, delta_low for each horizon
    outputs:
      - "delta_high_15m"
      - "delta_low_15m"
      - "delta_high_1h"
      - "delta_low_1h"

  # Range classifier (bin classification)
  range_classifier:
    enabled: true
    algorithm: "xgboost"
    task: "classification"

    xgboost:
      n_estimators: 150
      max_depth: 4
      learning_rate: 0.05
      num_class: 4
      objective: "multi:softprob"
      tree_method: "hist"
      device: "cuda"

    outputs:
      - "delta_high_bin_15m"
      - "delta_low_bin_15m"
      - "delta_high_bin_1h"
      - "delta_low_bin_1h"

  # TP vs SL classifier
  tp_sl_classifier:
    enabled: true
    algorithm: "xgboost"
    task: "binary_classification"

    xgboost:
      n_estimators: 200
      max_depth: 5
      learning_rate: 0.05
      scale_pos_weight: 1.0  # Adjust based on class imbalance
      objective: "binary:logistic"
      eval_metric: "auc"
      tree_method: "hist"
      device: "cuda"

    # Threshold for generating signals
    probability_threshold: 0.55

    # Use range predictions as input features (stacking)
    use_range_predictions: true

    outputs:
      - "tp_first_15m_rr_2_1"
      - "tp_first_1h_rr_2_1"
      - "tp_first_15m_rr_3_1"
      - "tp_first_1h_rr_3_1"

  # AMD phase classifier
  amd_classifier:
    enabled: true
    algorithm: "xgboost"
    task: "multiclass_classification"

    xgboost:
      n_estimators: 150
      max_depth: 4
      learning_rate: 0.05
      num_class: 4  # accumulation, manipulation, distribution, neutral
      objective: "multi:softprob"
      tree_method: "hist"
      device: "cuda"

    # Phase labels
    phases:
      - name: "accumulation"
        label: 0
      - name: "manipulation"
        label: 1
      - name: "distribution"
        label: 2
      - name: "neutral"
        label: 3

# Feature configuration for Phase 2
features:
  # Base features (from Phase 1)
  use_minimal_set: true

  # Additional features for Phase 2
  phase2_additions:
    # Microstructure features
    microstructure:
      enabled: true
      features:
        - "body"           # |close - open|
        - "upper_wick"     # high - max(open, close)
        - "lower_wick"     # min(open, close) - low
        - "body_ratio"     # body / range
        - "upper_wick_ratio"
        - "lower_wick_ratio"

    # Explicit lags
    lags:
      enabled: true
      columns: ["close", "high", "low", "volume", "atr"]
      periods: [1, 2, 3, 5, 10]

    # Volatility regime
    volatility:
      enabled: true
      features:
        - "atr_normalized"     # ATR / close
        - "volatility_regime"  # categorical: low, medium, high
        - "returns_std_20"     # Rolling std of returns

    # Session features
    sessions:
      enabled: true
      features:
        - "session_progress"   # 0-1 progress through session
        - "minutes_to_close"   # Minutes until session close
        - "is_session_open"    # Binary: is a major session open
        - "is_overlap"         # Binary: London-NY overlap

# Evaluation metrics
evaluation:
  # Prediction metrics
  prediction:
    regression:
      - "mae"
      - "mape"
      - "rmse"
      - "r2"
    classification:
      - "accuracy"
      - "precision"
      - "recall"
      - "f1"
      - "roc_auc"

  # Trading metrics (PRIMARY for Phase 2)
  trading:
    - "winrate"
    - "profit_factor"
    - "max_drawdown"
    - "sharpe_ratio"
    - "sortino_ratio"
    - "avg_rr_achieved"
    - "max_consecutive_losses"

  # Segmentation for analysis
  segmentation:
    - "by_instrument"
    - "by_horizon"
    - "by_amd_phase"
    - "by_volatility_regime"
    - "by_session"

# Backtesting configuration
backtesting:
  # Capital and risk
  initial_capital: 10000
  risk_per_trade: 0.02      # 2% risk per trade
  max_concurrent_trades: 1   # Only 1 trade at a time initially

  # Costs
  costs:
    commission_pct: 0.0     # Usually spread-only for forex/gold
    slippage_pct: 0.0005    # 0.05%
    spread_included: true   # Spread already in data

  # Filters
  filters:
    min_confidence: 0.55    # Minimum probability to trade
    favorable_amd_phases: ["accumulation", "distribution"]
    min_atr_percentile: 20  # Don't trade in very low volatility

# Signal generation
signal_generation:
  # Minimum requirements to generate a signal
  requirements:
    min_prob_tp_first: 0.55
    min_confidence: 0.50
    min_expected_rr: 1.5

  # Filters
  filters:
    check_amd_phase: true
    check_volatility: true
    check_session: true

  # Output format
  output:
    format: "json"
    include_metadata: true
    include_features: false  # Don't include raw features in signal

# Logging for LLM fine-tuning
logging:
  enabled: true
  log_dir: "logs/signals"

  # What to log
  log_content:
    market_context: true
    model_predictions: true
    decision_made: true
    actual_result: true     # After trade closes

  # Export format for fine-tuning
  export:
    format: "jsonl"
    conversational: true    # Format as conversation for fine-tuning