trading-platform-ml-engine/config/phase2.yaml

290 lines
6.8 KiB
YAML

# Phase 2 Configuration
# Trading-oriented prediction system with R:R focus
# General Phase 2 settings
phase2:
version: "2.0.0"
description: "Range prediction and TP/SL classification for intraday trading"
primary_instrument: "XAUUSD"
# Horizons for Phase 2 (applied to all instruments unless overridden)
horizons:
- id: 0
name: "15m"
bars: 3
minutes: 15
weight: 0.6
enabled: true
- id: 1
name: "1h"
bars: 12
minutes: 60
weight: 0.4
enabled: true
# Target configuration
targets:
# Delta (range) targets
delta:
enabled: true
# Calculate: delta_high = future_high - close, delta_low = close - future_low
# Starting from t+1 (NOT including current bar)
start_offset: 1 # CRITICAL: Start from t+1, not t
# ATR-based bins
atr_bins:
enabled: true
n_bins: 4
thresholds:
- 0.25 # Bin 0: < 0.25 * ATR
- 0.50 # Bin 1: 0.25-0.50 * ATR
- 1.00 # Bin 2: 0.50-1.00 * ATR
# Bin 3: >= 1.00 * ATR
# TP vs SL labels
tp_sl:
enabled: true
# Default R:R configurations to generate labels for
rr_configs:
- sl: 5.0
tp: 10.0
name: "rr_2_1"
- sl: 5.0
tp: 15.0
name: "rr_3_1"
# Model configurations
models:
# Range predictor (regression)
range_predictor:
enabled: true
algorithm: "xgboost"
task: "regression"
xgboost:
n_estimators: 200
max_depth: 5
learning_rate: 0.05
subsample: 0.8
colsample_bytree: 0.8
min_child_weight: 3
gamma: 0.1
reg_alpha: 0.1
reg_lambda: 1.0
tree_method: "hist"
device: "cuda"
# Output: delta_high, delta_low for each horizon
outputs:
- "delta_high_15m"
- "delta_low_15m"
- "delta_high_1h"
- "delta_low_1h"
# Range classifier (bin classification)
range_classifier:
enabled: true
algorithm: "xgboost"
task: "classification"
xgboost:
n_estimators: 150
max_depth: 4
learning_rate: 0.05
num_class: 4
objective: "multi:softprob"
tree_method: "hist"
device: "cuda"
outputs:
- "delta_high_bin_15m"
- "delta_low_bin_15m"
- "delta_high_bin_1h"
- "delta_low_bin_1h"
# TP vs SL classifier
tp_sl_classifier:
enabled: true
algorithm: "xgboost"
task: "binary_classification"
xgboost:
n_estimators: 200
max_depth: 5
learning_rate: 0.05
scale_pos_weight: 1.0 # Adjust based on class imbalance
objective: "binary:logistic"
eval_metric: "auc"
tree_method: "hist"
device: "cuda"
# Threshold for generating signals
probability_threshold: 0.55
# Use range predictions as input features (stacking)
use_range_predictions: true
outputs:
- "tp_first_15m_rr_2_1"
- "tp_first_1h_rr_2_1"
- "tp_first_15m_rr_3_1"
- "tp_first_1h_rr_3_1"
# AMD phase classifier
amd_classifier:
enabled: true
algorithm: "xgboost"
task: "multiclass_classification"
xgboost:
n_estimators: 150
max_depth: 4
learning_rate: 0.05
num_class: 4 # accumulation, manipulation, distribution, neutral
objective: "multi:softprob"
tree_method: "hist"
device: "cuda"
# Phase labels
phases:
- name: "accumulation"
label: 0
- name: "manipulation"
label: 1
- name: "distribution"
label: 2
- name: "neutral"
label: 3
# Feature configuration for Phase 2
features:
# Base features (from Phase 1)
use_minimal_set: true
# Additional features for Phase 2
phase2_additions:
# Microstructure features
microstructure:
enabled: true
features:
- "body" # |close - open|
- "upper_wick" # high - max(open, close)
- "lower_wick" # min(open, close) - low
- "body_ratio" # body / range
- "upper_wick_ratio"
- "lower_wick_ratio"
# Explicit lags
lags:
enabled: true
columns: ["close", "high", "low", "volume", "atr"]
periods: [1, 2, 3, 5, 10]
# Volatility regime
volatility:
enabled: true
features:
- "atr_normalized" # ATR / close
- "volatility_regime" # categorical: low, medium, high
- "returns_std_20" # Rolling std of returns
# Session features
sessions:
enabled: true
features:
- "session_progress" # 0-1 progress through session
- "minutes_to_close" # Minutes until session close
- "is_session_open" # Binary: is a major session open
- "is_overlap" # Binary: London-NY overlap
# Evaluation metrics
evaluation:
# Prediction metrics
prediction:
regression:
- "mae"
- "mape"
- "rmse"
- "r2"
classification:
- "accuracy"
- "precision"
- "recall"
- "f1"
- "roc_auc"
# Trading metrics (PRIMARY for Phase 2)
trading:
- "winrate"
- "profit_factor"
- "max_drawdown"
- "sharpe_ratio"
- "sortino_ratio"
- "avg_rr_achieved"
- "max_consecutive_losses"
# Segmentation for analysis
segmentation:
- "by_instrument"
- "by_horizon"
- "by_amd_phase"
- "by_volatility_regime"
- "by_session"
# Backtesting configuration
backtesting:
# Capital and risk
initial_capital: 10000
risk_per_trade: 0.02 # 2% risk per trade
max_concurrent_trades: 1 # Only 1 trade at a time initially
# Costs
costs:
commission_pct: 0.0 # Usually spread-only for forex/gold
slippage_pct: 0.0005 # 0.05%
spread_included: true # Spread already in data
# Filters
filters:
min_confidence: 0.55 # Minimum probability to trade
favorable_amd_phases: ["accumulation", "distribution"]
min_atr_percentile: 20 # Don't trade in very low volatility
# Signal generation
signal_generation:
# Minimum requirements to generate a signal
requirements:
min_prob_tp_first: 0.55
min_confidence: 0.50
min_expected_rr: 1.5
# Filters
filters:
check_amd_phase: true
check_volatility: true
check_session: true
# Output format
output:
format: "json"
include_metadata: true
include_features: false # Don't include raw features in signal
# Logging for LLM fine-tuning
logging:
enabled: true
log_dir: "logs/signals"
# What to log
log_content:
market_context: true
model_predictions: true
decision_made: true
actual_result: true # After trade closes
# Export format for fine-tuning
export:
format: "jsonl"
conversational: true # Format as conversation for fine-tuning