Hierarchical ML Pipeline for trading predictions:
- Level 0: Attention Models (volatility/flow classification)
- Level 1: Base Models (XGBoost per symbol/timeframe)
- Level 2: Metamodels (XGBoost Stacking + Neural Gating)
Key components:
- src/pipelines/hierarchical_pipeline.py - Main prediction pipeline
- src/models/ - All ML model classes
- src/training/ - Training utilities
- src/api/ - FastAPI endpoints
- scripts/ - Training and evaluation scripts
- config/ - YAML configurations
Note: Trained models (*.joblib, *.pt) are gitignored.
Regenerate with training scripts.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
857 lines
31 KiB
Python
857 lines
31 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Hierarchical Pipeline Backtesting
|
|
=================================
|
|
Evaluates the 3-level hierarchical ML architecture with R:R 2:1 backtesting.
|
|
|
|
Key metrics:
|
|
- Win Rate with R:R 2:1 (target: >40%)
|
|
- Expectancy (target: >0.10)
|
|
- Trade filtering effectiveness
|
|
- Comparison: filtered vs unfiltered
|
|
|
|
Usage:
|
|
python scripts/evaluate_hierarchical.py --symbols XAUUSD EURUSD
|
|
python scripts/evaluate_hierarchical.py --symbols XAUUSD --rr 2.0 --attention-threshold 0.8
|
|
|
|
Author: ML Pipeline
|
|
Version: 1.0.0
|
|
Created: 2026-01-07
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Dict, List, Tuple, Optional, Any
|
|
from dataclasses import dataclass, asdict
|
|
import json
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from loguru import logger
|
|
import joblib
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
|
|
|
# Import hierarchical pipeline directly to avoid __init__.py issues
|
|
import importlib.util
|
|
pipeline_path = Path(__file__).parent.parent / 'src' / 'pipelines' / 'hierarchical_pipeline.py'
|
|
spec = importlib.util.spec_from_file_location("hierarchical_pipeline", pipeline_path)
|
|
hierarchical_module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(hierarchical_module)
|
|
|
|
HierarchicalPipeline = hierarchical_module.HierarchicalPipeline
|
|
PipelineConfig = hierarchical_module.PipelineConfig
|
|
PredictionResult = hierarchical_module.PredictionResult
|
|
|
|
|
|
@dataclass
|
|
class TradeResult:
|
|
"""Result of a single trade"""
|
|
timestamp: datetime
|
|
symbol: str
|
|
direction: str # 'long' or 'short'
|
|
entry_price: float
|
|
stop_loss: float
|
|
take_profit: float
|
|
risk: float
|
|
reward: float
|
|
actual_high: float
|
|
actual_low: float
|
|
hit_tp: bool
|
|
hit_sl: bool
|
|
profit_r: float # Profit in R multiples
|
|
attention_score: float
|
|
confidence_proba: float
|
|
trade_quality: str
|
|
was_filtered: bool # Would this trade be filtered by attention?
|
|
|
|
|
|
@dataclass
|
|
class BacktestMetrics:
|
|
"""Comprehensive backtest metrics"""
|
|
symbol: str
|
|
timeframe: str
|
|
period: str
|
|
risk_reward: float
|
|
|
|
# Trade counts
|
|
total_bars: int
|
|
total_trades: int
|
|
filtered_trades: int
|
|
executed_trades: int
|
|
|
|
# Win/Loss
|
|
wins: int
|
|
losses: int
|
|
win_rate: float
|
|
|
|
# Profitability
|
|
total_profit_r: float
|
|
avg_profit_r: float
|
|
expectancy: float
|
|
profit_factor: float
|
|
|
|
# Risk metrics
|
|
max_consecutive_losses: int
|
|
max_drawdown_r: float
|
|
|
|
# Attention analysis
|
|
avg_attention_winners: float
|
|
avg_attention_losers: float
|
|
high_attention_win_rate: float
|
|
medium_attention_win_rate: float
|
|
low_attention_win_rate: float
|
|
|
|
# Comparison: unfiltered
|
|
unfiltered_total_trades: int
|
|
unfiltered_win_rate: float
|
|
unfiltered_expectancy: float
|
|
improvement_pct: float
|
|
|
|
|
|
def setup_logging(log_dir: Path, experiment_name: str) -> Path:
|
|
"""Configure logging to file and console."""
|
|
log_dir.mkdir(parents=True, exist_ok=True)
|
|
log_file = log_dir / f"{experiment_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
|
|
logger.remove()
|
|
logger.add(sys.stderr, level="INFO", format="{time:HH:mm:ss} | {level} | {message}")
|
|
logger.add(log_file, level="DEBUG", rotation="10 MB")
|
|
|
|
return log_file
|
|
|
|
|
|
def load_ohlcv_from_mysql(
|
|
symbol: str,
|
|
timeframe: str,
|
|
start_date: str,
|
|
end_date: str
|
|
) -> pd.DataFrame:
|
|
"""Load OHLCV data from MySQL database using project's database module."""
|
|
from data.database import MySQLConnection
|
|
import pandas as pd
|
|
|
|
# Map symbol to ticker
|
|
ticker_map = {
|
|
'XAUUSD': 'C:XAUUSD',
|
|
'EURUSD': 'C:EURUSD',
|
|
'GBPUSD': 'C:GBPUSD',
|
|
'USDJPY': 'C:USDJPY',
|
|
'BTCUSD': 'X:BTCUSD'
|
|
}
|
|
ticker = ticker_map.get(symbol, f'C:{symbol}')
|
|
|
|
logger.info(f"Loading {symbol} {timeframe} data from {start_date} to {end_date}...")
|
|
|
|
try:
|
|
db = MySQLConnection()
|
|
|
|
# Load raw OHLCV data (base frequency)
|
|
query = f"""
|
|
SELECT date_agg as timestamp, open, high, low, close, volume
|
|
FROM tickers_agg_data
|
|
WHERE ticker = '{ticker}'
|
|
AND date_agg >= '{start_date}'
|
|
AND date_agg <= '{end_date}'
|
|
ORDER BY date_agg ASC
|
|
"""
|
|
|
|
df = pd.read_sql(query, db.engine)
|
|
|
|
if df.empty:
|
|
logger.warning(f"No data found for {symbol}")
|
|
return df
|
|
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
|
df.set_index('timestamp', inplace=True)
|
|
df.sort_index(inplace=True)
|
|
|
|
logger.info(f" Loaded {len(df)} raw bars")
|
|
|
|
# Resample to requested timeframe
|
|
agg_dict = {
|
|
'open': 'first',
|
|
'high': 'max',
|
|
'low': 'min',
|
|
'close': 'last',
|
|
'volume': 'sum'
|
|
}
|
|
|
|
if timeframe == '5m':
|
|
df = df.resample('5min').agg(agg_dict).dropna()
|
|
elif timeframe == '15m':
|
|
df = df.resample('15min').agg(agg_dict).dropna()
|
|
elif timeframe == '1h':
|
|
df = df.resample('1h').agg(agg_dict).dropna()
|
|
elif timeframe == '4h':
|
|
df = df.resample('4h').agg(agg_dict).dropna()
|
|
|
|
logger.info(f" Resampled to {timeframe}: {len(df)} bars")
|
|
|
|
return df
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to load data from MySQL: {e}")
|
|
raise
|
|
|
|
|
|
def generate_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
"""Generate comprehensive feature set matching training."""
|
|
if len(df) == 0:
|
|
return df
|
|
|
|
df = df.copy()
|
|
features = pd.DataFrame(index=df.index)
|
|
|
|
close = df['close']
|
|
high = df['high']
|
|
low = df['low']
|
|
open_price = df['open']
|
|
volume = df.get('volume', pd.Series(1, index=df.index))
|
|
|
|
# Returns
|
|
features['returns_1'] = close.pct_change(1)
|
|
features['returns_3'] = close.pct_change(3)
|
|
features['returns_5'] = close.pct_change(5)
|
|
features['returns_10'] = close.pct_change(10)
|
|
features['returns_20'] = close.pct_change(20)
|
|
|
|
# Volatility
|
|
features['volatility_5'] = close.pct_change().rolling(5).std()
|
|
features['volatility_10'] = close.pct_change().rolling(10).std()
|
|
features['volatility_20'] = close.pct_change().rolling(20).std()
|
|
|
|
# Range
|
|
candle_range = high - low
|
|
features['range'] = candle_range
|
|
features['range_pct'] = candle_range / close
|
|
features['range_ma_5'] = candle_range.rolling(5).mean()
|
|
features['range_ma_10'] = candle_range.rolling(10).mean()
|
|
features['range_ma_20'] = candle_range.rolling(20).mean()
|
|
features['range_ratio_5'] = candle_range / features['range_ma_5']
|
|
features['range_ratio_20'] = candle_range / features['range_ma_20']
|
|
|
|
# ATR
|
|
tr1 = high - low
|
|
tr2 = abs(high - close.shift(1))
|
|
tr3 = abs(low - close.shift(1))
|
|
true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
|
|
features['atr_5'] = true_range.rolling(5).mean()
|
|
features['atr_14'] = true_range.rolling(14).mean()
|
|
features['atr_20'] = true_range.rolling(20).mean()
|
|
features['atr_ratio'] = true_range / features['atr_14']
|
|
|
|
# Moving Averages
|
|
sma_5 = close.rolling(5).mean()
|
|
sma_10 = close.rolling(10).mean()
|
|
sma_20 = close.rolling(20).mean()
|
|
sma_50 = close.rolling(50).mean()
|
|
ema_5 = close.ewm(span=5, adjust=False).mean()
|
|
ema_20 = close.ewm(span=20, adjust=False).mean()
|
|
|
|
features['price_vs_sma5'] = (close - sma_5) / features['atr_14']
|
|
features['price_vs_sma10'] = (close - sma_10) / features['atr_14']
|
|
features['price_vs_sma20'] = (close - sma_20) / features['atr_14']
|
|
features['price_vs_sma50'] = (close - sma_50) / features['atr_14']
|
|
features['sma5_vs_sma20'] = (sma_5 - sma_20) / features['atr_14']
|
|
features['ema5_vs_ema20'] = (ema_5 - ema_20) / features['atr_14']
|
|
|
|
# RSI
|
|
delta = close.diff()
|
|
gain = delta.where(delta > 0, 0).rolling(14).mean()
|
|
loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
|
|
rs = gain / (loss + 1e-10)
|
|
features['rsi_14'] = 100 - (100 / (1 + rs))
|
|
features['rsi_oversold'] = (features['rsi_14'] < 30).astype(float)
|
|
features['rsi_overbought'] = (features['rsi_14'] > 70).astype(float)
|
|
|
|
# Bollinger Bands
|
|
bb_middle = close.rolling(20).mean()
|
|
bb_std = close.rolling(20).std()
|
|
bb_upper = bb_middle + 2 * bb_std
|
|
bb_lower = bb_middle - 2 * bb_std
|
|
features['bb_width'] = (bb_upper - bb_lower) / bb_middle
|
|
features['bb_position'] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10)
|
|
|
|
# MACD
|
|
ema_12 = close.ewm(span=12, adjust=False).mean()
|
|
ema_26 = close.ewm(span=26, adjust=False).mean()
|
|
macd = ema_12 - ema_26
|
|
macd_signal = macd.ewm(span=9, adjust=False).mean()
|
|
features['macd'] = macd / features['atr_14']
|
|
features['macd_signal'] = macd_signal / features['atr_14']
|
|
features['macd_hist'] = (macd - macd_signal) / features['atr_14']
|
|
|
|
# Momentum
|
|
features['momentum_5'] = (close - close.shift(5)) / features['atr_14']
|
|
features['momentum_10'] = (close - close.shift(10)) / features['atr_14']
|
|
features['momentum_20'] = (close - close.shift(20)) / features['atr_14']
|
|
|
|
# Stochastic
|
|
low_14 = low.rolling(14).min()
|
|
high_14 = high.rolling(14).max()
|
|
features['stoch_k'] = 100 * (close - low_14) / (high_14 - low_14 + 1e-10)
|
|
features['stoch_d'] = features['stoch_k'].rolling(3).mean()
|
|
|
|
# Williams %R
|
|
features['williams_r'] = -100 * (high_14 - close) / (high_14 - low_14 + 1e-10)
|
|
|
|
# Volume
|
|
if volume.sum() > 0:
|
|
vol_ma_5 = volume.rolling(5).mean()
|
|
vol_ma_20 = volume.rolling(20).mean()
|
|
features['volume_ratio'] = volume / (vol_ma_20 + 1)
|
|
features['volume_trend'] = (vol_ma_5 - vol_ma_20) / (vol_ma_20 + 1)
|
|
else:
|
|
features['volume_ratio'] = 1.0
|
|
features['volume_trend'] = 0.0
|
|
|
|
# Candle patterns
|
|
body = close - open_price
|
|
features['body_pct'] = body / (candle_range + 1e-10)
|
|
features['upper_shadow'] = (high - np.maximum(close, open_price)) / (candle_range + 1e-10)
|
|
features['lower_shadow'] = (np.minimum(close, open_price) - low) / (candle_range + 1e-10)
|
|
|
|
# Price position
|
|
features['close_position'] = (close - low) / (candle_range + 1e-10)
|
|
high_5 = high.rolling(5).max()
|
|
low_5 = low.rolling(5).min()
|
|
features['price_position_5'] = (close - low_5) / (high_5 - low_5 + 1e-10)
|
|
high_20 = high.rolling(20).max()
|
|
low_20 = low.rolling(20).min()
|
|
features['price_position_20'] = (close - low_20) / (high_20 - low_20 + 1e-10)
|
|
|
|
# Time features
|
|
if hasattr(df.index, 'hour'):
|
|
hour = df.index.hour
|
|
day_of_week = df.index.dayofweek
|
|
features['hour_sin'] = np.sin(2 * np.pi * hour / 24)
|
|
features['hour_cos'] = np.cos(2 * np.pi * hour / 24)
|
|
features['dow_sin'] = np.sin(2 * np.pi * day_of_week / 7)
|
|
features['dow_cos'] = np.cos(2 * np.pi * day_of_week / 7)
|
|
features['is_london'] = ((hour >= 8) & (hour < 16)).astype(float)
|
|
features['is_newyork'] = ((hour >= 13) & (hour < 21)).astype(float)
|
|
features['is_overlap'] = ((hour >= 13) & (hour < 16)).astype(float)
|
|
|
|
# Clean
|
|
features = features.replace([np.inf, -np.inf], np.nan)
|
|
|
|
# Combine
|
|
result = pd.concat([df[['open', 'high', 'low', 'close', 'volume']], features], axis=1)
|
|
return result
|
|
|
|
|
|
def run_backtest(
|
|
pipeline: HierarchicalPipeline,
|
|
df_5m: pd.DataFrame,
|
|
df_15m: pd.DataFrame,
|
|
symbol: str,
|
|
risk_reward: float = 2.0,
|
|
attention_threshold: float = 0.8,
|
|
horizon_bars: int = 3,
|
|
step_bars: int = 1
|
|
) -> List[TradeResult]:
|
|
"""
|
|
Run backtest simulation.
|
|
|
|
Args:
|
|
pipeline: Hierarchical pipeline instance
|
|
df_5m: 5-minute OHLCV data
|
|
df_15m: 15-minute OHLCV data
|
|
symbol: Trading symbol
|
|
risk_reward: Risk/reward ratio for TP
|
|
attention_threshold: Minimum attention to take trade
|
|
horizon_bars: Bars to look forward for TP/SL
|
|
step_bars: Step size between predictions
|
|
|
|
Returns:
|
|
List of TradeResult
|
|
"""
|
|
trades = []
|
|
min_lookback = 100 # Minimum bars for features
|
|
|
|
# Ensure data is sorted
|
|
df_5m = df_5m.sort_index()
|
|
df_15m = df_15m.sort_index()
|
|
|
|
# Add features
|
|
df_5m_feat = generate_features(df_5m)
|
|
df_15m_feat = generate_features(df_15m)
|
|
|
|
# Get common valid range
|
|
valid_start_5m = df_5m_feat.index[min_lookback * 3]
|
|
valid_start_15m = df_15m_feat.index[min_lookback]
|
|
common_start = max(valid_start_5m, valid_start_15m)
|
|
|
|
# Filter to common range leaving room for horizon
|
|
df_15m_test = df_15m_feat[df_15m_feat.index >= common_start].iloc[:-horizon_bars]
|
|
|
|
logger.info(f"Backtesting {len(df_15m_test)} bars...")
|
|
|
|
for i in range(0, len(df_15m_test), step_bars):
|
|
current_time = df_15m_test.index[i]
|
|
|
|
# Get historical data up to current time
|
|
df_5m_slice = df_5m_feat[df_5m_feat.index <= current_time].tail(min_lookback * 3)
|
|
df_15m_slice = df_15m_feat[df_15m_feat.index <= current_time].tail(min_lookback)
|
|
|
|
if len(df_5m_slice) < min_lookback or len(df_15m_slice) < 50:
|
|
continue
|
|
|
|
try:
|
|
# Get prediction
|
|
result = pipeline.predict(df_5m_slice, df_15m_slice, symbol)
|
|
|
|
# Get entry price
|
|
entry_price = float(df_15m_slice['close'].iloc[-1])
|
|
|
|
# Determine direction from predictions
|
|
delta_high = result.delta_high_final
|
|
delta_low = result.delta_low_final
|
|
|
|
if delta_high > delta_low * 1.1:
|
|
direction = 'long'
|
|
elif delta_low > delta_high * 1.1:
|
|
direction = 'short'
|
|
else:
|
|
# Use momentum
|
|
momentum = (df_15m_slice['close'].iloc[-1] / df_15m_slice['close'].iloc[-5]) - 1
|
|
direction = 'long' if momentum > 0 else 'short'
|
|
|
|
# Calculate SL and TP
|
|
if direction == 'long':
|
|
stop_loss = entry_price - delta_low
|
|
risk = entry_price - stop_loss
|
|
take_profit = entry_price + (risk * risk_reward)
|
|
else:
|
|
stop_loss = entry_price + delta_high
|
|
risk = stop_loss - entry_price
|
|
take_profit = entry_price - (risk * risk_reward)
|
|
|
|
# Get future data for outcome
|
|
future_start_idx = df_15m_feat.index.get_loc(current_time)
|
|
future_end_idx = min(future_start_idx + horizon_bars, len(df_15m_feat))
|
|
future_data = df_15m_feat.iloc[future_start_idx:future_end_idx]
|
|
|
|
if len(future_data) < 2:
|
|
continue
|
|
|
|
actual_high = future_data['high'].max()
|
|
actual_low = future_data['low'].min()
|
|
|
|
# Determine outcome
|
|
if direction == 'long':
|
|
hit_tp = actual_high >= take_profit
|
|
hit_sl = actual_low <= stop_loss
|
|
|
|
if hit_tp and hit_sl:
|
|
# Both hit - determine which first (simplified: assume TP first if diff is larger)
|
|
high_dist = actual_high - entry_price
|
|
low_dist = entry_price - actual_low
|
|
hit_tp = high_dist >= low_dist
|
|
hit_sl = not hit_tp
|
|
|
|
if hit_tp:
|
|
profit_r = risk_reward
|
|
elif hit_sl:
|
|
profit_r = -1.0
|
|
else:
|
|
# Neither hit - use actual PnL
|
|
actual_pnl = future_data['close'].iloc[-1] - entry_price
|
|
profit_r = actual_pnl / risk if risk > 0 else 0
|
|
else:
|
|
hit_tp = actual_low <= take_profit
|
|
hit_sl = actual_high >= stop_loss
|
|
|
|
if hit_tp and hit_sl:
|
|
high_dist = actual_high - entry_price
|
|
low_dist = entry_price - actual_low
|
|
hit_tp = low_dist >= high_dist
|
|
hit_sl = not hit_tp
|
|
|
|
if hit_tp:
|
|
profit_r = risk_reward
|
|
elif hit_sl:
|
|
profit_r = -1.0
|
|
else:
|
|
actual_pnl = entry_price - future_data['close'].iloc[-1]
|
|
profit_r = actual_pnl / risk if risk > 0 else 0
|
|
|
|
# Calculate average attention
|
|
avg_attention = (result.attention_score_5m + result.attention_score_15m) / 2
|
|
was_filtered = avg_attention < attention_threshold or not result.confidence
|
|
|
|
trade = TradeResult(
|
|
timestamp=current_time,
|
|
symbol=symbol,
|
|
direction=direction,
|
|
entry_price=entry_price,
|
|
stop_loss=stop_loss,
|
|
take_profit=take_profit,
|
|
risk=risk,
|
|
reward=risk * risk_reward,
|
|
actual_high=actual_high,
|
|
actual_low=actual_low,
|
|
hit_tp=hit_tp,
|
|
hit_sl=hit_sl,
|
|
profit_r=profit_r,
|
|
attention_score=avg_attention,
|
|
confidence_proba=result.confidence_proba,
|
|
trade_quality=result.trade_quality,
|
|
was_filtered=was_filtered
|
|
)
|
|
trades.append(trade)
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Prediction failed at {current_time}: {e}")
|
|
continue
|
|
|
|
if (i + 1) % 500 == 0:
|
|
logger.info(f" Processed {i + 1}/{len(df_15m_test)} bars...")
|
|
|
|
return trades
|
|
|
|
|
|
def calculate_metrics(
|
|
trades: List[TradeResult],
|
|
symbol: str,
|
|
risk_reward: float,
|
|
attention_threshold: float
|
|
) -> BacktestMetrics:
|
|
"""Calculate comprehensive backtest metrics."""
|
|
if not trades:
|
|
return None
|
|
|
|
# All trades
|
|
all_trades = trades
|
|
total_trades = len(all_trades)
|
|
|
|
# Filtered trades (executed)
|
|
executed_trades = [t for t in trades if not t.was_filtered]
|
|
filtered_count = total_trades - len(executed_trades)
|
|
|
|
# Win/Loss for executed trades
|
|
wins = [t for t in executed_trades if t.profit_r > 0]
|
|
losses = [t for t in executed_trades if t.profit_r <= 0]
|
|
|
|
win_rate = len(wins) / len(executed_trades) if executed_trades else 0
|
|
|
|
# Profitability
|
|
total_profit_r = sum(t.profit_r for t in executed_trades)
|
|
avg_profit_r = total_profit_r / len(executed_trades) if executed_trades else 0
|
|
|
|
# Expectancy = (WinRate * AvgWin) - (LossRate * AvgLoss)
|
|
avg_win = sum(t.profit_r for t in wins) / len(wins) if wins else 0
|
|
avg_loss = abs(sum(t.profit_r for t in losses) / len(losses)) if losses else 0
|
|
expectancy = (win_rate * avg_win) - ((1 - win_rate) * avg_loss)
|
|
|
|
# Profit factor
|
|
gross_profit = sum(t.profit_r for t in wins)
|
|
gross_loss = abs(sum(t.profit_r for t in losses))
|
|
profit_factor = gross_profit / gross_loss if gross_loss > 0 else float('inf')
|
|
|
|
# Risk metrics
|
|
consecutive_losses = 0
|
|
max_consecutive_losses = 0
|
|
equity_curve = []
|
|
cumulative = 0
|
|
|
|
for t in executed_trades:
|
|
cumulative += t.profit_r
|
|
equity_curve.append(cumulative)
|
|
if t.profit_r <= 0:
|
|
consecutive_losses += 1
|
|
max_consecutive_losses = max(max_consecutive_losses, consecutive_losses)
|
|
else:
|
|
consecutive_losses = 0
|
|
|
|
# Max drawdown
|
|
peak = 0
|
|
max_dd = 0
|
|
for eq in equity_curve:
|
|
if eq > peak:
|
|
peak = eq
|
|
dd = peak - eq
|
|
if dd > max_dd:
|
|
max_dd = dd
|
|
|
|
# Attention analysis
|
|
winners_attention = [t.attention_score for t in wins]
|
|
losers_attention = [t.attention_score for t in losses]
|
|
|
|
avg_attention_winners = np.mean(winners_attention) if winners_attention else 0
|
|
avg_attention_losers = np.mean(losers_attention) if losers_attention else 0
|
|
|
|
# Win rate by attention level
|
|
high_attention = [t for t in executed_trades if t.attention_score >= 2.0]
|
|
medium_attention = [t for t in executed_trades if 0.8 <= t.attention_score < 2.0]
|
|
low_attention = [t for t in executed_trades if t.attention_score < 0.8]
|
|
|
|
high_attention_wr = sum(1 for t in high_attention if t.profit_r > 0) / len(high_attention) if high_attention else 0
|
|
medium_attention_wr = sum(1 for t in medium_attention if t.profit_r > 0) / len(medium_attention) if medium_attention else 0
|
|
low_attention_wr = sum(1 for t in low_attention if t.profit_r > 0) / len(low_attention) if low_attention else 0
|
|
|
|
# Unfiltered comparison (all trades)
|
|
unfiltered_wins = [t for t in all_trades if t.profit_r > 0]
|
|
unfiltered_win_rate = len(unfiltered_wins) / len(all_trades) if all_trades else 0
|
|
unfiltered_profit = sum(t.profit_r for t in all_trades)
|
|
unfiltered_expectancy = unfiltered_profit / len(all_trades) if all_trades else 0
|
|
|
|
# Improvement
|
|
improvement_pct = ((expectancy - unfiltered_expectancy) / abs(unfiltered_expectancy) * 100) if unfiltered_expectancy != 0 else 0
|
|
|
|
# Get period
|
|
start_date = min(t.timestamp for t in trades)
|
|
end_date = max(t.timestamp for t in trades)
|
|
period = f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"
|
|
|
|
return BacktestMetrics(
|
|
symbol=symbol,
|
|
timeframe='15m',
|
|
period=period,
|
|
risk_reward=risk_reward,
|
|
total_bars=len(trades),
|
|
total_trades=total_trades,
|
|
filtered_trades=filtered_count,
|
|
executed_trades=len(executed_trades),
|
|
wins=len(wins),
|
|
losses=len(losses),
|
|
win_rate=round(win_rate, 4),
|
|
total_profit_r=round(total_profit_r, 2),
|
|
avg_profit_r=round(avg_profit_r, 4),
|
|
expectancy=round(expectancy, 4),
|
|
profit_factor=round(profit_factor, 2),
|
|
max_consecutive_losses=max_consecutive_losses,
|
|
max_drawdown_r=round(max_dd, 2),
|
|
avg_attention_winners=round(avg_attention_winners, 3),
|
|
avg_attention_losers=round(avg_attention_losers, 3),
|
|
high_attention_win_rate=round(high_attention_wr, 4),
|
|
medium_attention_win_rate=round(medium_attention_wr, 4),
|
|
low_attention_win_rate=round(low_attention_wr, 4),
|
|
unfiltered_total_trades=total_trades,
|
|
unfiltered_win_rate=round(unfiltered_win_rate, 4),
|
|
unfiltered_expectancy=round(unfiltered_expectancy, 4),
|
|
improvement_pct=round(improvement_pct, 1)
|
|
)
|
|
|
|
|
|
def print_metrics(metrics: BacktestMetrics, target_wr: float = 0.40, target_exp: float = 0.10):
|
|
"""Print metrics with pass/fail indicators."""
|
|
print(f"\n{'=' * 60}")
|
|
print(f"BACKTEST RESULTS: {metrics.symbol}")
|
|
print(f"{'=' * 60}")
|
|
print(f"Period: {metrics.period}")
|
|
print(f"Timeframe: {metrics.timeframe}")
|
|
print(f"Risk:Reward: 1:{metrics.risk_reward}")
|
|
|
|
print(f"\n--- Trade Statistics ---")
|
|
print(f"Total Signals: {metrics.total_trades}")
|
|
print(f"Filtered Out: {metrics.filtered_trades} ({metrics.filtered_trades / metrics.total_trades * 100:.1f}%)")
|
|
print(f"Executed Trades: {metrics.executed_trades}")
|
|
print(f"Wins: {metrics.wins}")
|
|
print(f"Losses: {metrics.losses}")
|
|
|
|
# Win Rate with target comparison
|
|
wr_status = "PASS" if metrics.win_rate >= target_wr else "FAIL"
|
|
print(f"\n--- Key Metrics ---")
|
|
print(f"Win Rate: {metrics.win_rate * 100:.1f}% (target: {target_wr * 100}%) [{wr_status}]")
|
|
|
|
# Expectancy with target comparison
|
|
exp_status = "PASS" if metrics.expectancy >= target_exp else "FAIL"
|
|
print(f"Expectancy: {metrics.expectancy:.4f} (target: {target_exp}) [{exp_status}]")
|
|
|
|
print(f"Profit Factor: {metrics.profit_factor:.2f}")
|
|
print(f"Total Profit (R): {metrics.total_profit_r:.2f}")
|
|
print(f"Avg Profit/Trade (R): {metrics.avg_profit_r:.4f}")
|
|
|
|
print(f"\n--- Risk Metrics ---")
|
|
print(f"Max Consecutive Losses: {metrics.max_consecutive_losses}")
|
|
print(f"Max Drawdown (R): {metrics.max_drawdown_r:.2f}")
|
|
|
|
print(f"\n--- Attention Analysis ---")
|
|
print(f"Avg Attention (Winners): {metrics.avg_attention_winners:.3f}")
|
|
print(f"Avg Attention (Losers): {metrics.avg_attention_losers:.3f}")
|
|
print(f"High Attention (>=2.0) Win Rate: {metrics.high_attention_win_rate * 100:.1f}%")
|
|
print(f"Medium Attention (0.8-2.0) Win Rate: {metrics.medium_attention_win_rate * 100:.1f}%")
|
|
print(f"Low Attention (<0.8) Win Rate: {metrics.low_attention_win_rate * 100:.1f}%")
|
|
|
|
print(f"\n--- Comparison: Filtered vs Unfiltered ---")
|
|
print(f"Unfiltered Win Rate: {metrics.unfiltered_win_rate * 100:.1f}%")
|
|
print(f"Unfiltered Expectancy: {metrics.unfiltered_expectancy:.4f}")
|
|
print(f"Improvement: {metrics.improvement_pct:+.1f}%")
|
|
|
|
print(f"\n{'=' * 60}")
|
|
|
|
|
|
def generate_report(all_metrics: List[BacktestMetrics], output_path: Path):
|
|
"""Generate markdown report."""
|
|
report = []
|
|
report.append("# Hierarchical Pipeline Backtest Report")
|
|
report.append(f"\n**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
|
|
# Summary table
|
|
report.append("\n## Summary\n")
|
|
report.append("| Symbol | Period | Win Rate | Expectancy | Profit (R) | Improvement |")
|
|
report.append("|--------|--------|----------|------------|------------|-------------|")
|
|
|
|
for m in all_metrics:
|
|
wr_status = "PASS" if m.win_rate >= 0.40 else "FAIL"
|
|
exp_status = "PASS" if m.expectancy >= 0.10 else "FAIL"
|
|
report.append(
|
|
f"| {m.symbol} | {m.period} | {m.win_rate * 100:.1f}% ({wr_status}) | "
|
|
f"{m.expectancy:.4f} ({exp_status}) | {m.total_profit_r:.1f} | {m.improvement_pct:+.1f}% |"
|
|
)
|
|
|
|
# Detailed sections
|
|
for m in all_metrics:
|
|
report.append(f"\n## {m.symbol} Details\n")
|
|
report.append(f"- **Total Signals:** {m.total_trades}")
|
|
report.append(f"- **Filtered Out:** {m.filtered_trades} ({m.filtered_trades / m.total_trades * 100:.1f}%)")
|
|
report.append(f"- **Executed Trades:** {m.executed_trades}")
|
|
report.append(f"- **Win Rate:** {m.win_rate * 100:.1f}%")
|
|
report.append(f"- **Expectancy:** {m.expectancy:.4f}")
|
|
report.append(f"- **Profit Factor:** {m.profit_factor:.2f}")
|
|
|
|
report.append("\n### Attention Analysis\n")
|
|
report.append("| Attention Level | Win Rate |")
|
|
report.append("|-----------------|----------|")
|
|
report.append(f"| High (>=2.0) | {m.high_attention_win_rate * 100:.1f}% |")
|
|
report.append(f"| Medium (0.8-2.0) | {m.medium_attention_win_rate * 100:.1f}% |")
|
|
report.append(f"| Low (<0.8) | {m.low_attention_win_rate * 100:.1f}% |")
|
|
|
|
# Write report
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(output_path, 'w') as f:
|
|
f.write('\n'.join(report))
|
|
|
|
logger.info(f"Report saved to: {output_path}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Hierarchical Pipeline Backtest')
|
|
parser.add_argument('--symbols', nargs='+', default=['XAUUSD', 'EURUSD'],
|
|
help='Symbols to backtest')
|
|
parser.add_argument('--start-date', type=str, default='2024-06-01',
|
|
help='Start date (YYYY-MM-DD)')
|
|
parser.add_argument('--end-date', type=str, default='2025-12-31',
|
|
help='End date (YYYY-MM-DD)')
|
|
parser.add_argument('--rr', type=float, default=2.0,
|
|
help='Risk:Reward ratio')
|
|
parser.add_argument('--attention-threshold', type=float, default=0.8,
|
|
help='Minimum attention score to trade')
|
|
parser.add_argument('--horizon', type=int, default=3,
|
|
help='Bars to look forward for TP/SL')
|
|
parser.add_argument('--step', type=int, default=1,
|
|
help='Step size between predictions')
|
|
parser.add_argument('--models-dir', type=str, default='models',
|
|
help='Directory containing trained models')
|
|
parser.add_argument('--output-dir', type=str, default='models/backtest_results',
|
|
help='Output directory for reports')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Setup
|
|
output_dir = Path(args.output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
log_file = setup_logging(output_dir / 'logs', 'hierarchical_backtest')
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("HIERARCHICAL PIPELINE BACKTEST")
|
|
logger.info("=" * 60)
|
|
logger.info(f"Symbols: {args.symbols}")
|
|
logger.info(f"Period: {args.start_date} to {args.end_date}")
|
|
logger.info(f"R:R: 1:{args.rr}")
|
|
logger.info(f"Attention Threshold: {args.attention_threshold}")
|
|
|
|
# Initialize pipeline
|
|
config = PipelineConfig(
|
|
attention_model_path=f'{args.models_dir}/attention',
|
|
base_model_path=f'{args.models_dir}/symbol_timeframe_models',
|
|
metamodel_path=f'{args.models_dir}/metamodels',
|
|
attention_threshold_low=args.attention_threshold,
|
|
attention_threshold_high=2.0,
|
|
confidence_threshold=0.5
|
|
)
|
|
pipeline = HierarchicalPipeline(config)
|
|
|
|
all_metrics = []
|
|
|
|
for symbol in args.symbols:
|
|
logger.info(f"\n{'=' * 40}")
|
|
logger.info(f"Processing {symbol}...")
|
|
logger.info(f"{'=' * 40}")
|
|
|
|
# Load models
|
|
if not pipeline.load_models(symbol):
|
|
logger.warning(f"Could not load all models for {symbol}, skipping...")
|
|
continue
|
|
|
|
# Load data
|
|
try:
|
|
df_5m = load_ohlcv_from_mysql(symbol, '5m', args.start_date, args.end_date)
|
|
df_15m = load_ohlcv_from_mysql(symbol, '15m', args.start_date, args.end_date)
|
|
|
|
if df_5m.empty or df_15m.empty:
|
|
logger.warning(f"No data for {symbol}, skipping...")
|
|
continue
|
|
|
|
except Exception as e:
|
|
logger.error(f"Data loading failed for {symbol}: {e}")
|
|
continue
|
|
|
|
# Run backtest
|
|
trades = run_backtest(
|
|
pipeline=pipeline,
|
|
df_5m=df_5m,
|
|
df_15m=df_15m,
|
|
symbol=symbol,
|
|
risk_reward=args.rr,
|
|
attention_threshold=args.attention_threshold,
|
|
horizon_bars=args.horizon,
|
|
step_bars=args.step
|
|
)
|
|
|
|
if not trades:
|
|
logger.warning(f"No trades generated for {symbol}")
|
|
continue
|
|
|
|
# Calculate metrics
|
|
metrics = calculate_metrics(
|
|
trades=trades,
|
|
symbol=symbol,
|
|
risk_reward=args.rr,
|
|
attention_threshold=args.attention_threshold
|
|
)
|
|
|
|
if metrics:
|
|
all_metrics.append(metrics)
|
|
print_metrics(metrics)
|
|
|
|
# Save trades
|
|
trades_df = pd.DataFrame([asdict(t) for t in trades])
|
|
trades_file = output_dir / f'{symbol}_trades_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
|
|
trades_df.to_csv(trades_file, index=False)
|
|
logger.info(f"Trades saved to: {trades_file}")
|
|
|
|
# Generate final report
|
|
if all_metrics:
|
|
report_file = output_dir / f'backtest_report_{datetime.now().strftime("%Y%m%d_%H%M%S")}.md'
|
|
generate_report(all_metrics, report_file)
|
|
|
|
# Save metrics as JSON
|
|
metrics_json = output_dir / f'backtest_metrics_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'
|
|
with open(metrics_json, 'w') as f:
|
|
json.dump([asdict(m) for m in all_metrics], f, indent=2, default=str)
|
|
logger.info(f"Metrics saved to: {metrics_json}")
|
|
|
|
logger.info("\n" + "=" * 60)
|
|
logger.info("BACKTEST COMPLETE")
|
|
logger.info("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|