Hierarchical ML Pipeline for trading predictions:
- Level 0: Attention Models (volatility/flow classification)
- Level 1: Base Models (XGBoost per symbol/timeframe)
- Level 2: Metamodels (XGBoost Stacking + Neural Gating)
Key components:
- src/pipelines/hierarchical_pipeline.py - Main prediction pipeline
- src/models/ - All ML model classes
- src/training/ - Training utilities
- src/api/ - FastAPI endpoints
- scripts/ - Training and evaluation scripts
- config/ - YAML configurations
Note: Trained models (*.joblib, *.pt) are gitignored.
Regenerate with training scripts.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
880 lines
30 KiB
Python
880 lines
30 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Hierarchical Pipeline Backtesting V2
|
|
====================================
|
|
Enhanced backtesting with multiple filtering strategies based on findings:
|
|
- Inverted attention filter (filter HIGH attention, keep MEDIUM)
|
|
- Confidence-based filtering using metamodel probability
|
|
- Dynamic R:R based on predicted delta_high/delta_low ratio
|
|
|
|
Key findings from v1:
|
|
- Medium attention (0.8-2.0) has 44.6% win rate
|
|
- High attention (>=2.0) has 39.8% win rate
|
|
- This suggests we should INVERT the attention filtering logic
|
|
|
|
Usage:
|
|
python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD EURUSD --strategy medium_attention
|
|
python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD --strategy dynamic_rr
|
|
python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD --strategy all
|
|
|
|
Author: ML Pipeline
|
|
Version: 2.0.0
|
|
Created: 2026-01-07
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Dict, List, Tuple, Optional, Any
|
|
from dataclasses import dataclass, asdict
|
|
import json
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from loguru import logger
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
|
|
|
# Import hierarchical pipeline directly
|
|
import importlib.util
|
|
pipeline_path = Path(__file__).parent.parent / 'src' / 'pipelines' / 'hierarchical_pipeline.py'
|
|
spec = importlib.util.spec_from_file_location("hierarchical_pipeline", pipeline_path)
|
|
hierarchical_module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(hierarchical_module)
|
|
|
|
HierarchicalPipeline = hierarchical_module.HierarchicalPipeline
|
|
PipelineConfig = hierarchical_module.PipelineConfig
|
|
PredictionResult = hierarchical_module.PredictionResult
|
|
|
|
|
|
@dataclass
|
|
class FilterStrategy:
|
|
"""Trading filter strategy configuration"""
|
|
name: str
|
|
description: str
|
|
|
|
# Attention filters
|
|
attention_min: float = 0.0 # Minimum attention to trade
|
|
attention_max: float = 999.0 # Maximum attention to trade
|
|
|
|
# Confidence filters
|
|
confidence_min: float = 0.0 # Minimum confidence probability
|
|
require_confidence: bool = False # Require confidence=True from metamodel
|
|
|
|
# Dynamic R:R
|
|
use_dynamic_rr: bool = False # Use predicted deltas for R:R
|
|
base_rr: float = 2.0 # Base R:R when not dynamic
|
|
min_rr: float = 1.5 # Minimum R:R for dynamic
|
|
max_rr: float = 4.0 # Maximum R:R for dynamic
|
|
|
|
|
|
# Pre-defined strategies based on findings
|
|
STRATEGIES = {
|
|
'baseline': FilterStrategy(
|
|
name='baseline',
|
|
description='No filtering - all trades',
|
|
attention_min=0.0,
|
|
attention_max=999.0,
|
|
confidence_min=0.0,
|
|
require_confidence=False,
|
|
use_dynamic_rr=False,
|
|
base_rr=2.0
|
|
),
|
|
'medium_attention': FilterStrategy(
|
|
name='medium_attention',
|
|
description='Only medium attention (0.8-2.0) - best win rate from v1',
|
|
attention_min=0.8,
|
|
attention_max=2.0,
|
|
confidence_min=0.0,
|
|
require_confidence=False,
|
|
use_dynamic_rr=False,
|
|
base_rr=2.0
|
|
),
|
|
'medium_with_confidence': FilterStrategy(
|
|
name='medium_with_confidence',
|
|
description='Medium attention + confidence filter',
|
|
attention_min=0.8,
|
|
attention_max=2.0,
|
|
confidence_min=0.5,
|
|
require_confidence=True,
|
|
use_dynamic_rr=False,
|
|
base_rr=2.0
|
|
),
|
|
'high_confidence': FilterStrategy(
|
|
name='high_confidence',
|
|
description='Only high confidence trades',
|
|
attention_min=0.0,
|
|
attention_max=999.0,
|
|
confidence_min=0.7,
|
|
require_confidence=True,
|
|
use_dynamic_rr=False,
|
|
base_rr=2.0
|
|
),
|
|
'dynamic_rr': FilterStrategy(
|
|
name='dynamic_rr',
|
|
description='Medium attention + dynamic R:R from predictions',
|
|
attention_min=0.8,
|
|
attention_max=2.0,
|
|
confidence_min=0.0,
|
|
require_confidence=False,
|
|
use_dynamic_rr=True,
|
|
base_rr=2.0,
|
|
min_rr=1.5,
|
|
max_rr=4.0
|
|
),
|
|
'aggressive_filter': FilterStrategy(
|
|
name='aggressive_filter',
|
|
description='Medium attention + high confidence + dynamic R:R',
|
|
attention_min=0.8,
|
|
attention_max=1.8, # Tighter range
|
|
confidence_min=0.6,
|
|
require_confidence=True,
|
|
use_dynamic_rr=True,
|
|
base_rr=2.0,
|
|
min_rr=1.5,
|
|
max_rr=3.5
|
|
),
|
|
'conservative': FilterStrategy(
|
|
name='conservative',
|
|
description='Very selective - only best setups',
|
|
attention_min=1.0,
|
|
attention_max=1.6,
|
|
confidence_min=0.65,
|
|
require_confidence=True,
|
|
use_dynamic_rr=True,
|
|
base_rr=2.0,
|
|
min_rr=2.0,
|
|
max_rr=3.0
|
|
)
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class TradeResult:
|
|
"""Result of a single trade"""
|
|
timestamp: datetime
|
|
symbol: str
|
|
direction: str
|
|
entry_price: float
|
|
stop_loss: float
|
|
take_profit: float
|
|
risk: float
|
|
reward: float
|
|
risk_reward: float
|
|
actual_high: float
|
|
actual_low: float
|
|
hit_tp: bool
|
|
hit_sl: bool
|
|
profit_r: float
|
|
attention_score: float
|
|
attention_class_5m: int
|
|
attention_class_15m: int
|
|
confidence: bool
|
|
confidence_proba: float
|
|
delta_high_pred: float
|
|
delta_low_pred: float
|
|
strategy: str
|
|
passed_filter: bool
|
|
|
|
|
|
@dataclass
|
|
class StrategyMetrics:
|
|
"""Metrics for a trading strategy"""
|
|
strategy_name: str
|
|
strategy_description: str
|
|
symbol: str
|
|
period: str
|
|
|
|
total_signals: int
|
|
filtered_out: int
|
|
executed_trades: int
|
|
filter_rate: float
|
|
|
|
wins: int
|
|
losses: int
|
|
win_rate: float
|
|
|
|
total_profit_r: float
|
|
avg_profit_r: float
|
|
expectancy: float
|
|
profit_factor: float
|
|
|
|
max_consecutive_losses: int
|
|
max_drawdown_r: float
|
|
|
|
avg_attention_winners: float
|
|
avg_attention_losers: float
|
|
avg_confidence_winners: float
|
|
avg_confidence_losers: float
|
|
|
|
avg_rr_used: float
|
|
|
|
|
|
def setup_logging(log_dir: Path, experiment_name: str) -> Path:
|
|
"""Configure logging."""
|
|
log_dir.mkdir(parents=True, exist_ok=True)
|
|
log_file = log_dir / f"{experiment_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
|
|
logger.remove()
|
|
logger.add(sys.stderr, level="INFO", format="{time:HH:mm:ss} | {level} | {message}")
|
|
logger.add(log_file, level="DEBUG", rotation="10 MB")
|
|
|
|
return log_file
|
|
|
|
|
|
def load_ohlcv_from_mysql(symbol: str, timeframe: str, start_date: str, end_date: str) -> pd.DataFrame:
|
|
"""Load OHLCV data from MySQL."""
|
|
from data.database import MySQLConnection
|
|
|
|
ticker_map = {
|
|
'XAUUSD': 'C:XAUUSD',
|
|
'EURUSD': 'C:EURUSD',
|
|
'GBPUSD': 'C:GBPUSD',
|
|
'USDJPY': 'C:USDJPY',
|
|
'BTCUSD': 'X:BTCUSD'
|
|
}
|
|
ticker = ticker_map.get(symbol, f'C:{symbol}')
|
|
|
|
logger.info(f"Loading {symbol} {timeframe} data from {start_date} to {end_date}...")
|
|
|
|
try:
|
|
db = MySQLConnection()
|
|
|
|
query = f"""
|
|
SELECT date_agg as timestamp, open, high, low, close, volume
|
|
FROM tickers_agg_data
|
|
WHERE ticker = '{ticker}'
|
|
AND date_agg >= '{start_date}'
|
|
AND date_agg <= '{end_date}'
|
|
ORDER BY date_agg ASC
|
|
"""
|
|
|
|
df = pd.read_sql(query, db.engine)
|
|
|
|
if df.empty:
|
|
logger.warning(f"No data found for {symbol}")
|
|
return df
|
|
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
|
df.set_index('timestamp', inplace=True)
|
|
df.sort_index(inplace=True)
|
|
|
|
logger.info(f" Loaded {len(df)} raw bars")
|
|
|
|
# Resample
|
|
agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
|
|
|
|
if timeframe == '5m':
|
|
df = df.resample('5min').agg(agg_dict).dropna()
|
|
elif timeframe == '15m':
|
|
df = df.resample('15min').agg(agg_dict).dropna()
|
|
|
|
logger.info(f" Resampled to {timeframe}: {len(df)} bars")
|
|
|
|
return df
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to load data: {e}")
|
|
raise
|
|
|
|
|
|
def generate_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
"""Generate comprehensive feature set."""
|
|
if len(df) == 0:
|
|
return df
|
|
|
|
df = df.copy()
|
|
features = pd.DataFrame(index=df.index)
|
|
|
|
close = df['close']
|
|
high = df['high']
|
|
low = df['low']
|
|
open_price = df['open']
|
|
volume = df.get('volume', pd.Series(1, index=df.index))
|
|
|
|
# Returns
|
|
for period in [1, 3, 5, 10, 20]:
|
|
features[f'returns_{period}'] = close.pct_change(period)
|
|
|
|
# Volatility
|
|
for period in [5, 10, 20]:
|
|
features[f'volatility_{period}'] = close.pct_change().rolling(period).std()
|
|
|
|
# Range
|
|
candle_range = high - low
|
|
features['range'] = candle_range
|
|
features['range_pct'] = candle_range / close
|
|
for period in [5, 10, 20]:
|
|
features[f'range_ma_{period}'] = candle_range.rolling(period).mean()
|
|
features['range_ratio_5'] = candle_range / features['range_ma_5']
|
|
features['range_ratio_20'] = candle_range / features['range_ma_20']
|
|
|
|
# ATR
|
|
tr1 = high - low
|
|
tr2 = abs(high - close.shift(1))
|
|
tr3 = abs(low - close.shift(1))
|
|
true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
|
|
features['atr_5'] = true_range.rolling(5).mean()
|
|
features['atr_14'] = true_range.rolling(14).mean()
|
|
features['atr_20'] = true_range.rolling(20).mean()
|
|
features['atr_ratio'] = true_range / features['atr_14']
|
|
|
|
# Moving Averages
|
|
sma_5 = close.rolling(5).mean()
|
|
sma_10 = close.rolling(10).mean()
|
|
sma_20 = close.rolling(20).mean()
|
|
sma_50 = close.rolling(50).mean()
|
|
ema_5 = close.ewm(span=5, adjust=False).mean()
|
|
ema_20 = close.ewm(span=20, adjust=False).mean()
|
|
|
|
features['price_vs_sma5'] = (close - sma_5) / features['atr_14']
|
|
features['price_vs_sma10'] = (close - sma_10) / features['atr_14']
|
|
features['price_vs_sma20'] = (close - sma_20) / features['atr_14']
|
|
features['price_vs_sma50'] = (close - sma_50) / features['atr_14']
|
|
features['sma5_vs_sma20'] = (sma_5 - sma_20) / features['atr_14']
|
|
features['ema5_vs_ema20'] = (ema_5 - ema_20) / features['atr_14']
|
|
|
|
# RSI
|
|
delta = close.diff()
|
|
gain = delta.where(delta > 0, 0).rolling(14).mean()
|
|
loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
|
|
rs = gain / (loss + 1e-10)
|
|
features['rsi_14'] = 100 - (100 / (1 + rs))
|
|
features['rsi_oversold'] = (features['rsi_14'] < 30).astype(float)
|
|
features['rsi_overbought'] = (features['rsi_14'] > 70).astype(float)
|
|
|
|
# Bollinger Bands
|
|
bb_middle = close.rolling(20).mean()
|
|
bb_std = close.rolling(20).std()
|
|
bb_upper = bb_middle + 2 * bb_std
|
|
bb_lower = bb_middle - 2 * bb_std
|
|
features['bb_width'] = (bb_upper - bb_lower) / bb_middle
|
|
features['bb_position'] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10)
|
|
|
|
# MACD
|
|
ema_12 = close.ewm(span=12, adjust=False).mean()
|
|
ema_26 = close.ewm(span=26, adjust=False).mean()
|
|
macd = ema_12 - ema_26
|
|
macd_signal = macd.ewm(span=9, adjust=False).mean()
|
|
features['macd'] = macd / features['atr_14']
|
|
features['macd_signal'] = macd_signal / features['atr_14']
|
|
features['macd_hist'] = (macd - macd_signal) / features['atr_14']
|
|
|
|
# Momentum
|
|
for period in [5, 10, 20]:
|
|
features[f'momentum_{period}'] = (close - close.shift(period)) / features['atr_14']
|
|
|
|
# Stochastic
|
|
low_14 = low.rolling(14).min()
|
|
high_14 = high.rolling(14).max()
|
|
features['stoch_k'] = 100 * (close - low_14) / (high_14 - low_14 + 1e-10)
|
|
features['stoch_d'] = features['stoch_k'].rolling(3).mean()
|
|
|
|
# Williams %R
|
|
features['williams_r'] = -100 * (high_14 - close) / (high_14 - low_14 + 1e-10)
|
|
|
|
# Volume
|
|
if volume.sum() > 0:
|
|
vol_ma_20 = volume.rolling(20).mean()
|
|
vol_ma_5 = volume.rolling(5).mean()
|
|
features['volume_ratio'] = volume / (vol_ma_20 + 1)
|
|
features['volume_trend'] = (vol_ma_5 - vol_ma_20) / (vol_ma_20 + 1)
|
|
else:
|
|
features['volume_ratio'] = 1.0
|
|
features['volume_trend'] = 0.0
|
|
|
|
# Candle patterns
|
|
body = close - open_price
|
|
features['body_pct'] = body / (candle_range + 1e-10)
|
|
features['upper_shadow'] = (high - np.maximum(close, open_price)) / (candle_range + 1e-10)
|
|
features['lower_shadow'] = (np.minimum(close, open_price) - low) / (candle_range + 1e-10)
|
|
|
|
# Price position
|
|
features['close_position'] = (close - low) / (candle_range + 1e-10)
|
|
high_5 = high.rolling(5).max()
|
|
low_5 = low.rolling(5).min()
|
|
features['price_position_5'] = (close - low_5) / (high_5 - low_5 + 1e-10)
|
|
high_20 = high.rolling(20).max()
|
|
low_20 = low.rolling(20).min()
|
|
features['price_position_20'] = (close - low_20) / (high_20 - low_20 + 1e-10)
|
|
|
|
# Time features
|
|
if hasattr(df.index, 'hour'):
|
|
hour = df.index.hour
|
|
day_of_week = df.index.dayofweek
|
|
features['hour_sin'] = np.sin(2 * np.pi * hour / 24)
|
|
features['hour_cos'] = np.cos(2 * np.pi * hour / 24)
|
|
features['dow_sin'] = np.sin(2 * np.pi * day_of_week / 7)
|
|
features['dow_cos'] = np.cos(2 * np.pi * day_of_week / 7)
|
|
features['is_london'] = ((hour >= 8) & (hour < 16)).astype(float)
|
|
features['is_newyork'] = ((hour >= 13) & (hour < 21)).astype(float)
|
|
features['is_overlap'] = ((hour >= 13) & (hour < 16)).astype(float)
|
|
|
|
features = features.replace([np.inf, -np.inf], np.nan)
|
|
result = pd.concat([df[['open', 'high', 'low', 'close', 'volume']], features], axis=1)
|
|
|
|
return result
|
|
|
|
|
|
def should_trade(result: PredictionResult, strategy: FilterStrategy) -> bool:
|
|
"""Check if trade passes strategy filters."""
|
|
avg_attention = (result.attention_score_5m + result.attention_score_15m) / 2
|
|
|
|
# Attention filter
|
|
if avg_attention < strategy.attention_min or avg_attention > strategy.attention_max:
|
|
return False
|
|
|
|
# Confidence filter
|
|
if strategy.require_confidence and not result.confidence:
|
|
return False
|
|
|
|
if result.confidence_proba < strategy.confidence_min:
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def calculate_rr(result: PredictionResult, strategy: FilterStrategy, direction: str) -> float:
|
|
"""Calculate risk:reward ratio based on strategy."""
|
|
if not strategy.use_dynamic_rr:
|
|
return strategy.base_rr
|
|
|
|
# Dynamic R:R based on predicted deltas
|
|
delta_high = abs(result.delta_high_final)
|
|
delta_low = abs(result.delta_low_final)
|
|
|
|
if direction == 'long':
|
|
# For long: TP based on high, SL based on low
|
|
if delta_low > 0:
|
|
dynamic_rr = delta_high / delta_low
|
|
else:
|
|
dynamic_rr = strategy.base_rr
|
|
else:
|
|
# For short: TP based on low, SL based on high
|
|
if delta_high > 0:
|
|
dynamic_rr = delta_low / delta_high
|
|
else:
|
|
dynamic_rr = strategy.base_rr
|
|
|
|
# Clamp to range
|
|
return max(strategy.min_rr, min(strategy.max_rr, dynamic_rr))
|
|
|
|
|
|
def run_backtest(
|
|
pipeline: HierarchicalPipeline,
|
|
df_5m: pd.DataFrame,
|
|
df_15m: pd.DataFrame,
|
|
symbol: str,
|
|
strategy: FilterStrategy,
|
|
horizon_bars: int = 3,
|
|
step_bars: int = 1
|
|
) -> List[TradeResult]:
|
|
"""Run backtest with specific strategy."""
|
|
trades = []
|
|
min_lookback = 100
|
|
|
|
df_5m = df_5m.sort_index()
|
|
df_15m = df_15m.sort_index()
|
|
|
|
df_5m_feat = generate_features(df_5m)
|
|
df_15m_feat = generate_features(df_15m)
|
|
|
|
valid_start_5m = df_5m_feat.index[min_lookback * 3]
|
|
valid_start_15m = df_15m_feat.index[min_lookback]
|
|
common_start = max(valid_start_5m, valid_start_15m)
|
|
|
|
df_15m_test = df_15m_feat[df_15m_feat.index >= common_start].iloc[:-horizon_bars]
|
|
|
|
logger.info(f"Backtesting {len(df_15m_test)} bars with strategy '{strategy.name}'...")
|
|
|
|
for i in range(0, len(df_15m_test), step_bars):
|
|
current_time = df_15m_test.index[i]
|
|
|
|
df_5m_slice = df_5m_feat[df_5m_feat.index <= current_time].tail(min_lookback * 3)
|
|
df_15m_slice = df_15m_feat[df_15m_feat.index <= current_time].tail(min_lookback)
|
|
|
|
if len(df_5m_slice) < min_lookback or len(df_15m_slice) < 50:
|
|
continue
|
|
|
|
try:
|
|
result = pipeline.predict(df_5m_slice, df_15m_slice, symbol)
|
|
|
|
entry_price = float(df_15m_slice['close'].iloc[-1])
|
|
|
|
# Determine direction
|
|
delta_high = result.delta_high_final
|
|
delta_low = result.delta_low_final
|
|
|
|
if delta_high > delta_low * 1.1:
|
|
direction = 'long'
|
|
elif delta_low > delta_high * 1.1:
|
|
direction = 'short'
|
|
else:
|
|
momentum = (df_15m_slice['close'].iloc[-1] / df_15m_slice['close'].iloc[-5]) - 1
|
|
direction = 'long' if momentum > 0 else 'short'
|
|
|
|
# Check if trade passes filters
|
|
passed_filter = should_trade(result, strategy)
|
|
|
|
# Calculate R:R
|
|
rr = calculate_rr(result, strategy, direction)
|
|
|
|
# Calculate SL and TP
|
|
if direction == 'long':
|
|
stop_loss = entry_price - delta_low
|
|
risk = entry_price - stop_loss
|
|
take_profit = entry_price + (risk * rr)
|
|
else:
|
|
stop_loss = entry_price + delta_high
|
|
risk = stop_loss - entry_price
|
|
take_profit = entry_price - (risk * rr)
|
|
|
|
# Get future data
|
|
future_start_idx = df_15m_feat.index.get_loc(current_time)
|
|
future_end_idx = min(future_start_idx + horizon_bars, len(df_15m_feat))
|
|
future_data = df_15m_feat.iloc[future_start_idx:future_end_idx]
|
|
|
|
if len(future_data) < 2:
|
|
continue
|
|
|
|
actual_high = future_data['high'].max()
|
|
actual_low = future_data['low'].min()
|
|
|
|
# Determine outcome
|
|
if direction == 'long':
|
|
hit_tp = actual_high >= take_profit
|
|
hit_sl = actual_low <= stop_loss
|
|
|
|
if hit_tp and hit_sl:
|
|
high_dist = actual_high - entry_price
|
|
low_dist = entry_price - actual_low
|
|
hit_tp = high_dist >= low_dist
|
|
hit_sl = not hit_tp
|
|
|
|
if hit_tp:
|
|
profit_r = rr
|
|
elif hit_sl:
|
|
profit_r = -1.0
|
|
else:
|
|
actual_pnl = future_data['close'].iloc[-1] - entry_price
|
|
profit_r = actual_pnl / risk if risk > 0 else 0
|
|
else:
|
|
hit_tp = actual_low <= take_profit
|
|
hit_sl = actual_high >= stop_loss
|
|
|
|
if hit_tp and hit_sl:
|
|
high_dist = actual_high - entry_price
|
|
low_dist = entry_price - actual_low
|
|
hit_tp = low_dist >= high_dist
|
|
hit_sl = not hit_tp
|
|
|
|
if hit_tp:
|
|
profit_r = rr
|
|
elif hit_sl:
|
|
profit_r = -1.0
|
|
else:
|
|
actual_pnl = entry_price - future_data['close'].iloc[-1]
|
|
profit_r = actual_pnl / risk if risk > 0 else 0
|
|
|
|
avg_attention = (result.attention_score_5m + result.attention_score_15m) / 2
|
|
|
|
trade = TradeResult(
|
|
timestamp=current_time,
|
|
symbol=symbol,
|
|
direction=direction,
|
|
entry_price=entry_price,
|
|
stop_loss=stop_loss,
|
|
take_profit=take_profit,
|
|
risk=risk,
|
|
reward=risk * rr,
|
|
risk_reward=rr,
|
|
actual_high=actual_high,
|
|
actual_low=actual_low,
|
|
hit_tp=hit_tp,
|
|
hit_sl=hit_sl,
|
|
profit_r=profit_r,
|
|
attention_score=avg_attention,
|
|
attention_class_5m=result.attention_class_5m,
|
|
attention_class_15m=result.attention_class_15m,
|
|
confidence=result.confidence,
|
|
confidence_proba=result.confidence_proba,
|
|
delta_high_pred=delta_high,
|
|
delta_low_pred=delta_low,
|
|
strategy=strategy.name,
|
|
passed_filter=passed_filter
|
|
)
|
|
trades.append(trade)
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Prediction failed at {current_time}: {e}")
|
|
continue
|
|
|
|
if (i + 1) % 1000 == 0:
|
|
logger.info(f" Processed {i + 1}/{len(df_15m_test)} bars...")
|
|
|
|
return trades
|
|
|
|
|
|
def calculate_metrics(trades: List[TradeResult], strategy: FilterStrategy, symbol: str) -> StrategyMetrics:
|
|
"""Calculate strategy metrics."""
|
|
if not trades:
|
|
return None
|
|
|
|
all_trades = trades
|
|
total_signals = len(all_trades)
|
|
|
|
executed = [t for t in trades if t.passed_filter]
|
|
filtered_out = total_signals - len(executed)
|
|
filter_rate = filtered_out / total_signals if total_signals > 0 else 0
|
|
|
|
if not executed:
|
|
return StrategyMetrics(
|
|
strategy_name=strategy.name,
|
|
strategy_description=strategy.description,
|
|
symbol=symbol,
|
|
period=f"{min(t.timestamp for t in trades).strftime('%Y-%m-%d')} to {max(t.timestamp for t in trades).strftime('%Y-%m-%d')}",
|
|
total_signals=total_signals,
|
|
filtered_out=filtered_out,
|
|
executed_trades=0,
|
|
filter_rate=filter_rate,
|
|
wins=0, losses=0, win_rate=0,
|
|
total_profit_r=0, avg_profit_r=0, expectancy=0, profit_factor=0,
|
|
max_consecutive_losses=0, max_drawdown_r=0,
|
|
avg_attention_winners=0, avg_attention_losers=0,
|
|
avg_confidence_winners=0, avg_confidence_losers=0,
|
|
avg_rr_used=strategy.base_rr
|
|
)
|
|
|
|
wins = [t for t in executed if t.profit_r > 0]
|
|
losses = [t for t in executed if t.profit_r <= 0]
|
|
|
|
win_rate = len(wins) / len(executed) if executed else 0
|
|
|
|
total_profit_r = sum(t.profit_r for t in executed)
|
|
avg_profit_r = total_profit_r / len(executed) if executed else 0
|
|
|
|
avg_win = sum(t.profit_r for t in wins) / len(wins) if wins else 0
|
|
avg_loss = abs(sum(t.profit_r for t in losses) / len(losses)) if losses else 0
|
|
expectancy = (win_rate * avg_win) - ((1 - win_rate) * avg_loss)
|
|
|
|
gross_profit = sum(t.profit_r for t in wins)
|
|
gross_loss = abs(sum(t.profit_r for t in losses))
|
|
profit_factor = gross_profit / gross_loss if gross_loss > 0 else float('inf')
|
|
|
|
# Risk metrics
|
|
consecutive_losses = 0
|
|
max_consecutive_losses = 0
|
|
equity_curve = []
|
|
cumulative = 0
|
|
|
|
for t in executed:
|
|
cumulative += t.profit_r
|
|
equity_curve.append(cumulative)
|
|
if t.profit_r <= 0:
|
|
consecutive_losses += 1
|
|
max_consecutive_losses = max(max_consecutive_losses, consecutive_losses)
|
|
else:
|
|
consecutive_losses = 0
|
|
|
|
peak = 0
|
|
max_dd = 0
|
|
for eq in equity_curve:
|
|
if eq > peak:
|
|
peak = eq
|
|
dd = peak - eq
|
|
if dd > max_dd:
|
|
max_dd = dd
|
|
|
|
# Analysis
|
|
avg_attention_winners = np.mean([t.attention_score for t in wins]) if wins else 0
|
|
avg_attention_losers = np.mean([t.attention_score for t in losses]) if losses else 0
|
|
avg_confidence_winners = np.mean([t.confidence_proba for t in wins]) if wins else 0
|
|
avg_confidence_losers = np.mean([t.confidence_proba for t in losses]) if losses else 0
|
|
avg_rr_used = np.mean([t.risk_reward for t in executed]) if executed else strategy.base_rr
|
|
|
|
start_date = min(t.timestamp for t in trades)
|
|
end_date = max(t.timestamp for t in trades)
|
|
period = f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"
|
|
|
|
return StrategyMetrics(
|
|
strategy_name=strategy.name,
|
|
strategy_description=strategy.description,
|
|
symbol=symbol,
|
|
period=period,
|
|
total_signals=total_signals,
|
|
filtered_out=filtered_out,
|
|
executed_trades=len(executed),
|
|
filter_rate=round(filter_rate, 4),
|
|
wins=len(wins),
|
|
losses=len(losses),
|
|
win_rate=round(win_rate, 4),
|
|
total_profit_r=round(total_profit_r, 2),
|
|
avg_profit_r=round(avg_profit_r, 4),
|
|
expectancy=round(expectancy, 4),
|
|
profit_factor=round(profit_factor, 2),
|
|
max_consecutive_losses=max_consecutive_losses,
|
|
max_drawdown_r=round(max_dd, 2),
|
|
avg_attention_winners=round(avg_attention_winners, 3),
|
|
avg_attention_losers=round(avg_attention_losers, 3),
|
|
avg_confidence_winners=round(avg_confidence_winners, 3),
|
|
avg_confidence_losers=round(avg_confidence_losers, 3),
|
|
avg_rr_used=round(avg_rr_used, 2)
|
|
)
|
|
|
|
|
|
def print_metrics(metrics: StrategyMetrics):
|
|
"""Print strategy metrics."""
|
|
print(f"\n{'=' * 70}")
|
|
print(f"STRATEGY: {metrics.strategy_name}")
|
|
print(f"Description: {metrics.strategy_description}")
|
|
print(f"{'=' * 70}")
|
|
print(f"Symbol: {metrics.symbol} | Period: {metrics.period}")
|
|
|
|
print(f"\n--- Trade Statistics ---")
|
|
print(f"Total Signals: {metrics.total_signals}")
|
|
print(f"Filtered Out: {metrics.filtered_out} ({metrics.filter_rate * 100:.1f}%)")
|
|
print(f"Executed Trades: {metrics.executed_trades}")
|
|
print(f"Wins: {metrics.wins} | Losses: {metrics.losses}")
|
|
|
|
# Win Rate
|
|
wr_status = "PASS" if metrics.win_rate >= 0.40 else "FAIL"
|
|
print(f"\n--- Key Metrics ---")
|
|
print(f"Win Rate: {metrics.win_rate * 100:.1f}% (target: 40%) [{wr_status}]")
|
|
|
|
# Expectancy
|
|
exp_status = "PASS" if metrics.expectancy >= 0.10 else ("IMPROVED" if metrics.expectancy > -0.04 else "FAIL")
|
|
print(f"Expectancy: {metrics.expectancy:.4f} (target: 0.10) [{exp_status}]")
|
|
|
|
print(f"Profit Factor: {metrics.profit_factor:.2f}")
|
|
print(f"Total Profit (R): {metrics.total_profit_r:.2f}")
|
|
print(f"Avg R:R Used: {metrics.avg_rr_used:.2f}")
|
|
|
|
print(f"\n--- Risk ---")
|
|
print(f"Max Consecutive Losses: {metrics.max_consecutive_losses}")
|
|
print(f"Max Drawdown (R): {metrics.max_drawdown_r:.2f}")
|
|
|
|
print(f"\n--- Analysis ---")
|
|
print(f"Avg Attention (Winners): {metrics.avg_attention_winners:.3f}")
|
|
print(f"Avg Attention (Losers): {metrics.avg_attention_losers:.3f}")
|
|
print(f"Avg Confidence (Winners): {metrics.avg_confidence_winners:.3f}")
|
|
print(f"Avg Confidence (Losers): {metrics.avg_confidence_losers:.3f}")
|
|
|
|
|
|
def print_comparison(all_metrics: List[StrategyMetrics]):
|
|
"""Print comparison table."""
|
|
print(f"\n{'=' * 90}")
|
|
print("STRATEGY COMPARISON")
|
|
print(f"{'=' * 90}")
|
|
print(f"{'Strategy':<25} {'Trades':>8} {'Filter%':>8} {'WinRate':>8} {'Expect':>10} {'PF':>6} {'Profit(R)':>10}")
|
|
print("-" * 90)
|
|
|
|
for m in sorted(all_metrics, key=lambda x: x.expectancy, reverse=True):
|
|
wr_str = f"{m.win_rate * 100:.1f}%"
|
|
print(f"{m.strategy_name:<25} {m.executed_trades:>8} {m.filter_rate * 100:>7.1f}% {wr_str:>8} {m.expectancy:>10.4f} {m.profit_factor:>6.2f} {m.total_profit_r:>10.2f}")
|
|
|
|
print(f"{'=' * 90}")
|
|
|
|
# Find best strategy
|
|
best = max(all_metrics, key=lambda x: x.expectancy)
|
|
print(f"\nBest Strategy by Expectancy: {best.strategy_name}")
|
|
print(f" Expectancy: {best.expectancy:.4f}")
|
|
print(f" Win Rate: {best.win_rate * 100:.1f}%")
|
|
print(f" Profit Factor: {best.profit_factor:.2f}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Enhanced Hierarchical Pipeline Backtest')
|
|
parser.add_argument('--symbols', nargs='+', default=['XAUUSD'],
|
|
help='Symbols to backtest')
|
|
parser.add_argument('--start-date', type=str, default='2024-09-01')
|
|
parser.add_argument('--end-date', type=str, default='2024-12-31')
|
|
parser.add_argument('--strategy', type=str, default='all',
|
|
choices=['all'] + list(STRATEGIES.keys()),
|
|
help='Strategy to test')
|
|
parser.add_argument('--step', type=int, default=3)
|
|
parser.add_argument('--models-dir', type=str, default='models')
|
|
parser.add_argument('--output-dir', type=str, default='models/backtest_results_v2')
|
|
|
|
args = parser.parse_args()
|
|
|
|
output_dir = Path(args.output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
setup_logging(output_dir / 'logs', 'hierarchical_backtest_v2')
|
|
|
|
logger.info("=" * 70)
|
|
logger.info("HIERARCHICAL PIPELINE BACKTEST V2 - STRATEGY COMPARISON")
|
|
logger.info("=" * 70)
|
|
|
|
# Initialize pipeline
|
|
config = PipelineConfig(
|
|
attention_model_path=f'{args.models_dir}/attention',
|
|
base_model_path=f'{args.models_dir}/symbol_timeframe_models',
|
|
metamodel_path=f'{args.models_dir}/metamodels'
|
|
)
|
|
pipeline = HierarchicalPipeline(config)
|
|
|
|
# Determine strategies to test
|
|
if args.strategy == 'all':
|
|
strategies_to_test = list(STRATEGIES.values())
|
|
else:
|
|
strategies_to_test = [STRATEGIES[args.strategy]]
|
|
|
|
all_results = []
|
|
|
|
for symbol in args.symbols:
|
|
logger.info(f"\nProcessing {symbol}...")
|
|
|
|
if not pipeline.load_models(symbol):
|
|
logger.warning(f"Could not load models for {symbol}")
|
|
continue
|
|
|
|
# Load data once
|
|
try:
|
|
df_5m = load_ohlcv_from_mysql(symbol, '5m', args.start_date, args.end_date)
|
|
df_15m = load_ohlcv_from_mysql(symbol, '15m', args.start_date, args.end_date)
|
|
|
|
if df_5m.empty or df_15m.empty:
|
|
continue
|
|
except Exception as e:
|
|
logger.error(f"Data loading failed: {e}")
|
|
continue
|
|
|
|
symbol_metrics = []
|
|
|
|
for strategy in strategies_to_test:
|
|
logger.info(f"\nTesting strategy: {strategy.name}")
|
|
|
|
trades = run_backtest(
|
|
pipeline=pipeline,
|
|
df_5m=df_5m,
|
|
df_15m=df_15m,
|
|
symbol=symbol,
|
|
strategy=strategy,
|
|
step_bars=args.step
|
|
)
|
|
|
|
if trades:
|
|
metrics = calculate_metrics(trades, strategy, symbol)
|
|
if metrics:
|
|
symbol_metrics.append(metrics)
|
|
print_metrics(metrics)
|
|
|
|
if symbol_metrics:
|
|
print_comparison(symbol_metrics)
|
|
all_results.extend(symbol_metrics)
|
|
|
|
# Save results
|
|
if all_results:
|
|
results_file = output_dir / f'strategy_comparison_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'
|
|
with open(results_file, 'w') as f:
|
|
json.dump([asdict(m) for m in all_results], f, indent=2, default=str)
|
|
logger.info(f"\nResults saved to: {results_file}")
|
|
|
|
logger.info("\nBACKTEST V2 COMPLETE")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|