# Phase 2 Configuration # Trading-oriented prediction system with R:R focus # General Phase 2 settings phase2: version: "2.0.0" description: "Range prediction and TP/SL classification for intraday trading" primary_instrument: "XAUUSD" # Horizons for Phase 2 (applied to all instruments unless overridden) horizons: - id: 0 name: "15m" bars: 3 minutes: 15 weight: 0.6 enabled: true - id: 1 name: "1h" bars: 12 minutes: 60 weight: 0.4 enabled: true # Target configuration targets: # Delta (range) targets delta: enabled: true # Calculate: delta_high = future_high - close, delta_low = close - future_low # Starting from t+1 (NOT including current bar) start_offset: 1 # CRITICAL: Start from t+1, not t # ATR-based bins atr_bins: enabled: true n_bins: 4 thresholds: - 0.25 # Bin 0: < 0.25 * ATR - 0.50 # Bin 1: 0.25-0.50 * ATR - 1.00 # Bin 2: 0.50-1.00 * ATR # Bin 3: >= 1.00 * ATR # TP vs SL labels tp_sl: enabled: true # Default R:R configurations to generate labels for rr_configs: - sl: 5.0 tp: 10.0 name: "rr_2_1" - sl: 5.0 tp: 15.0 name: "rr_3_1" # Model configurations models: # Range predictor (regression) range_predictor: enabled: true algorithm: "xgboost" task: "regression" xgboost: n_estimators: 200 max_depth: 5 learning_rate: 0.05 subsample: 0.8 colsample_bytree: 0.8 min_child_weight: 3 gamma: 0.1 reg_alpha: 0.1 reg_lambda: 1.0 tree_method: "hist" device: "cuda" # Output: delta_high, delta_low for each horizon outputs: - "delta_high_15m" - "delta_low_15m" - "delta_high_1h" - "delta_low_1h" # Range classifier (bin classification) range_classifier: enabled: true algorithm: "xgboost" task: "classification" xgboost: n_estimators: 150 max_depth: 4 learning_rate: 0.05 num_class: 4 objective: "multi:softprob" tree_method: "hist" device: "cuda" outputs: - "delta_high_bin_15m" - "delta_low_bin_15m" - "delta_high_bin_1h" - "delta_low_bin_1h" # TP vs SL classifier tp_sl_classifier: enabled: true algorithm: "xgboost" task: "binary_classification" xgboost: n_estimators: 200 max_depth: 5 learning_rate: 0.05 scale_pos_weight: 1.0 # Adjust based on class imbalance objective: "binary:logistic" eval_metric: "auc" tree_method: "hist" device: "cuda" # Threshold for generating signals probability_threshold: 0.55 # Use range predictions as input features (stacking) use_range_predictions: true outputs: - "tp_first_15m_rr_2_1" - "tp_first_1h_rr_2_1" - "tp_first_15m_rr_3_1" - "tp_first_1h_rr_3_1" # AMD phase classifier amd_classifier: enabled: true algorithm: "xgboost" task: "multiclass_classification" xgboost: n_estimators: 150 max_depth: 4 learning_rate: 0.05 num_class: 4 # accumulation, manipulation, distribution, neutral objective: "multi:softprob" tree_method: "hist" device: "cuda" # Phase labels phases: - name: "accumulation" label: 0 - name: "manipulation" label: 1 - name: "distribution" label: 2 - name: "neutral" label: 3 # Feature configuration for Phase 2 features: # Base features (from Phase 1) use_minimal_set: true # Additional features for Phase 2 phase2_additions: # Microstructure features microstructure: enabled: true features: - "body" # |close - open| - "upper_wick" # high - max(open, close) - "lower_wick" # min(open, close) - low - "body_ratio" # body / range - "upper_wick_ratio" - "lower_wick_ratio" # Explicit lags lags: enabled: true columns: ["close", "high", "low", "volume", "atr"] periods: [1, 2, 3, 5, 10] # Volatility regime volatility: enabled: true features: - "atr_normalized" # ATR / close - "volatility_regime" # categorical: low, medium, high - "returns_std_20" # Rolling std of returns # Session features sessions: enabled: true features: - "session_progress" # 0-1 progress through session - "minutes_to_close" # Minutes until session close - "is_session_open" # Binary: is a major session open - "is_overlap" # Binary: London-NY overlap # Evaluation metrics evaluation: # Prediction metrics prediction: regression: - "mae" - "mape" - "rmse" - "r2" classification: - "accuracy" - "precision" - "recall" - "f1" - "roc_auc" # Trading metrics (PRIMARY for Phase 2) trading: - "winrate" - "profit_factor" - "max_drawdown" - "sharpe_ratio" - "sortino_ratio" - "avg_rr_achieved" - "max_consecutive_losses" # Segmentation for analysis segmentation: - "by_instrument" - "by_horizon" - "by_amd_phase" - "by_volatility_regime" - "by_session" # Backtesting configuration backtesting: # Capital and risk initial_capital: 10000 risk_per_trade: 0.02 # 2% risk per trade max_concurrent_trades: 1 # Only 1 trade at a time initially # Costs costs: commission_pct: 0.0 # Usually spread-only for forex/gold slippage_pct: 0.0005 # 0.05% spread_included: true # Spread already in data # Filters filters: min_confidence: 0.55 # Minimum probability to trade favorable_amd_phases: ["accumulation", "distribution"] min_atr_percentile: 20 # Don't trade in very low volatility # Signal generation signal_generation: # Minimum requirements to generate a signal requirements: min_prob_tp_first: 0.55 min_confidence: 0.50 min_expected_rr: 1.5 # Filters filters: check_amd_phase: true check_volatility: true check_session: true # Output format output: format: "json" include_metadata: true include_features: false # Don't include raw features in signal # Logging for LLM fine-tuning logging: enabled: true log_dir: "logs/signals" # What to log log_content: market_context: true model_predictions: true decision_made: true actual_result: true # After trade closes # Export format for fine-tuning export: format: "jsonl" conversational: true # Format as conversation for fine-tuning