trading-platform-ml-engine-v2/scripts/evaluate_hierarchical_v2.py
rckrdmrd 75c4d07690 feat: Initial commit - ML Engine codebase
Hierarchical ML Pipeline for trading predictions:
- Level 0: Attention Models (volatility/flow classification)
- Level 1: Base Models (XGBoost per symbol/timeframe)
- Level 2: Metamodels (XGBoost Stacking + Neural Gating)

Key components:
- src/pipelines/hierarchical_pipeline.py - Main prediction pipeline
- src/models/ - All ML model classes
- src/training/ - Training utilities
- src/api/ - FastAPI endpoints
- scripts/ - Training and evaluation scripts
- config/ - YAML configurations

Note: Trained models (*.joblib, *.pt) are gitignored.
      Regenerate with training scripts.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 04:27:40 -06:00

880 lines
30 KiB
Python

#!/usr/bin/env python3
"""
Hierarchical Pipeline Backtesting V2
====================================
Enhanced backtesting with multiple filtering strategies based on findings:
- Inverted attention filter (filter HIGH attention, keep MEDIUM)
- Confidence-based filtering using metamodel probability
- Dynamic R:R based on predicted delta_high/delta_low ratio
Key findings from v1:
- Medium attention (0.8-2.0) has 44.6% win rate
- High attention (>=2.0) has 39.8% win rate
- This suggests we should INVERT the attention filtering logic
Usage:
python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD EURUSD --strategy medium_attention
python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD --strategy dynamic_rr
python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD --strategy all
Author: ML Pipeline
Version: 2.0.0
Created: 2026-01-07
"""
import argparse
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Tuple, Optional, Any
from dataclasses import dataclass, asdict
import json
import numpy as np
import pandas as pd
from loguru import logger
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
# Import hierarchical pipeline directly
import importlib.util
pipeline_path = Path(__file__).parent.parent / 'src' / 'pipelines' / 'hierarchical_pipeline.py'
spec = importlib.util.spec_from_file_location("hierarchical_pipeline", pipeline_path)
hierarchical_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(hierarchical_module)
HierarchicalPipeline = hierarchical_module.HierarchicalPipeline
PipelineConfig = hierarchical_module.PipelineConfig
PredictionResult = hierarchical_module.PredictionResult
@dataclass
class FilterStrategy:
"""Trading filter strategy configuration"""
name: str
description: str
# Attention filters
attention_min: float = 0.0 # Minimum attention to trade
attention_max: float = 999.0 # Maximum attention to trade
# Confidence filters
confidence_min: float = 0.0 # Minimum confidence probability
require_confidence: bool = False # Require confidence=True from metamodel
# Dynamic R:R
use_dynamic_rr: bool = False # Use predicted deltas for R:R
base_rr: float = 2.0 # Base R:R when not dynamic
min_rr: float = 1.5 # Minimum R:R for dynamic
max_rr: float = 4.0 # Maximum R:R for dynamic
# Pre-defined strategies based on findings
STRATEGIES = {
'baseline': FilterStrategy(
name='baseline',
description='No filtering - all trades',
attention_min=0.0,
attention_max=999.0,
confidence_min=0.0,
require_confidence=False,
use_dynamic_rr=False,
base_rr=2.0
),
'medium_attention': FilterStrategy(
name='medium_attention',
description='Only medium attention (0.8-2.0) - best win rate from v1',
attention_min=0.8,
attention_max=2.0,
confidence_min=0.0,
require_confidence=False,
use_dynamic_rr=False,
base_rr=2.0
),
'medium_with_confidence': FilterStrategy(
name='medium_with_confidence',
description='Medium attention + confidence filter',
attention_min=0.8,
attention_max=2.0,
confidence_min=0.5,
require_confidence=True,
use_dynamic_rr=False,
base_rr=2.0
),
'high_confidence': FilterStrategy(
name='high_confidence',
description='Only high confidence trades',
attention_min=0.0,
attention_max=999.0,
confidence_min=0.7,
require_confidence=True,
use_dynamic_rr=False,
base_rr=2.0
),
'dynamic_rr': FilterStrategy(
name='dynamic_rr',
description='Medium attention + dynamic R:R from predictions',
attention_min=0.8,
attention_max=2.0,
confidence_min=0.0,
require_confidence=False,
use_dynamic_rr=True,
base_rr=2.0,
min_rr=1.5,
max_rr=4.0
),
'aggressive_filter': FilterStrategy(
name='aggressive_filter',
description='Medium attention + high confidence + dynamic R:R',
attention_min=0.8,
attention_max=1.8, # Tighter range
confidence_min=0.6,
require_confidence=True,
use_dynamic_rr=True,
base_rr=2.0,
min_rr=1.5,
max_rr=3.5
),
'conservative': FilterStrategy(
name='conservative',
description='Very selective - only best setups',
attention_min=1.0,
attention_max=1.6,
confidence_min=0.65,
require_confidence=True,
use_dynamic_rr=True,
base_rr=2.0,
min_rr=2.0,
max_rr=3.0
)
}
@dataclass
class TradeResult:
"""Result of a single trade"""
timestamp: datetime
symbol: str
direction: str
entry_price: float
stop_loss: float
take_profit: float
risk: float
reward: float
risk_reward: float
actual_high: float
actual_low: float
hit_tp: bool
hit_sl: bool
profit_r: float
attention_score: float
attention_class_5m: int
attention_class_15m: int
confidence: bool
confidence_proba: float
delta_high_pred: float
delta_low_pred: float
strategy: str
passed_filter: bool
@dataclass
class StrategyMetrics:
"""Metrics for a trading strategy"""
strategy_name: str
strategy_description: str
symbol: str
period: str
total_signals: int
filtered_out: int
executed_trades: int
filter_rate: float
wins: int
losses: int
win_rate: float
total_profit_r: float
avg_profit_r: float
expectancy: float
profit_factor: float
max_consecutive_losses: int
max_drawdown_r: float
avg_attention_winners: float
avg_attention_losers: float
avg_confidence_winners: float
avg_confidence_losers: float
avg_rr_used: float
def setup_logging(log_dir: Path, experiment_name: str) -> Path:
"""Configure logging."""
log_dir.mkdir(parents=True, exist_ok=True)
log_file = log_dir / f"{experiment_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logger.remove()
logger.add(sys.stderr, level="INFO", format="{time:HH:mm:ss} | {level} | {message}")
logger.add(log_file, level="DEBUG", rotation="10 MB")
return log_file
def load_ohlcv_from_mysql(symbol: str, timeframe: str, start_date: str, end_date: str) -> pd.DataFrame:
"""Load OHLCV data from MySQL."""
from data.database import MySQLConnection
ticker_map = {
'XAUUSD': 'C:XAUUSD',
'EURUSD': 'C:EURUSD',
'GBPUSD': 'C:GBPUSD',
'USDJPY': 'C:USDJPY',
'BTCUSD': 'X:BTCUSD'
}
ticker = ticker_map.get(symbol, f'C:{symbol}')
logger.info(f"Loading {symbol} {timeframe} data from {start_date} to {end_date}...")
try:
db = MySQLConnection()
query = f"""
SELECT date_agg as timestamp, open, high, low, close, volume
FROM tickers_agg_data
WHERE ticker = '{ticker}'
AND date_agg >= '{start_date}'
AND date_agg <= '{end_date}'
ORDER BY date_agg ASC
"""
df = pd.read_sql(query, db.engine)
if df.empty:
logger.warning(f"No data found for {symbol}")
return df
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)
df.sort_index(inplace=True)
logger.info(f" Loaded {len(df)} raw bars")
# Resample
agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
if timeframe == '5m':
df = df.resample('5min').agg(agg_dict).dropna()
elif timeframe == '15m':
df = df.resample('15min').agg(agg_dict).dropna()
logger.info(f" Resampled to {timeframe}: {len(df)} bars")
return df
except Exception as e:
logger.error(f"Failed to load data: {e}")
raise
def generate_features(df: pd.DataFrame) -> pd.DataFrame:
"""Generate comprehensive feature set."""
if len(df) == 0:
return df
df = df.copy()
features = pd.DataFrame(index=df.index)
close = df['close']
high = df['high']
low = df['low']
open_price = df['open']
volume = df.get('volume', pd.Series(1, index=df.index))
# Returns
for period in [1, 3, 5, 10, 20]:
features[f'returns_{period}'] = close.pct_change(period)
# Volatility
for period in [5, 10, 20]:
features[f'volatility_{period}'] = close.pct_change().rolling(period).std()
# Range
candle_range = high - low
features['range'] = candle_range
features['range_pct'] = candle_range / close
for period in [5, 10, 20]:
features[f'range_ma_{period}'] = candle_range.rolling(period).mean()
features['range_ratio_5'] = candle_range / features['range_ma_5']
features['range_ratio_20'] = candle_range / features['range_ma_20']
# ATR
tr1 = high - low
tr2 = abs(high - close.shift(1))
tr3 = abs(low - close.shift(1))
true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
features['atr_5'] = true_range.rolling(5).mean()
features['atr_14'] = true_range.rolling(14).mean()
features['atr_20'] = true_range.rolling(20).mean()
features['atr_ratio'] = true_range / features['atr_14']
# Moving Averages
sma_5 = close.rolling(5).mean()
sma_10 = close.rolling(10).mean()
sma_20 = close.rolling(20).mean()
sma_50 = close.rolling(50).mean()
ema_5 = close.ewm(span=5, adjust=False).mean()
ema_20 = close.ewm(span=20, adjust=False).mean()
features['price_vs_sma5'] = (close - sma_5) / features['atr_14']
features['price_vs_sma10'] = (close - sma_10) / features['atr_14']
features['price_vs_sma20'] = (close - sma_20) / features['atr_14']
features['price_vs_sma50'] = (close - sma_50) / features['atr_14']
features['sma5_vs_sma20'] = (sma_5 - sma_20) / features['atr_14']
features['ema5_vs_ema20'] = (ema_5 - ema_20) / features['atr_14']
# RSI
delta = close.diff()
gain = delta.where(delta > 0, 0).rolling(14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
rs = gain / (loss + 1e-10)
features['rsi_14'] = 100 - (100 / (1 + rs))
features['rsi_oversold'] = (features['rsi_14'] < 30).astype(float)
features['rsi_overbought'] = (features['rsi_14'] > 70).astype(float)
# Bollinger Bands
bb_middle = close.rolling(20).mean()
bb_std = close.rolling(20).std()
bb_upper = bb_middle + 2 * bb_std
bb_lower = bb_middle - 2 * bb_std
features['bb_width'] = (bb_upper - bb_lower) / bb_middle
features['bb_position'] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10)
# MACD
ema_12 = close.ewm(span=12, adjust=False).mean()
ema_26 = close.ewm(span=26, adjust=False).mean()
macd = ema_12 - ema_26
macd_signal = macd.ewm(span=9, adjust=False).mean()
features['macd'] = macd / features['atr_14']
features['macd_signal'] = macd_signal / features['atr_14']
features['macd_hist'] = (macd - macd_signal) / features['atr_14']
# Momentum
for period in [5, 10, 20]:
features[f'momentum_{period}'] = (close - close.shift(period)) / features['atr_14']
# Stochastic
low_14 = low.rolling(14).min()
high_14 = high.rolling(14).max()
features['stoch_k'] = 100 * (close - low_14) / (high_14 - low_14 + 1e-10)
features['stoch_d'] = features['stoch_k'].rolling(3).mean()
# Williams %R
features['williams_r'] = -100 * (high_14 - close) / (high_14 - low_14 + 1e-10)
# Volume
if volume.sum() > 0:
vol_ma_20 = volume.rolling(20).mean()
vol_ma_5 = volume.rolling(5).mean()
features['volume_ratio'] = volume / (vol_ma_20 + 1)
features['volume_trend'] = (vol_ma_5 - vol_ma_20) / (vol_ma_20 + 1)
else:
features['volume_ratio'] = 1.0
features['volume_trend'] = 0.0
# Candle patterns
body = close - open_price
features['body_pct'] = body / (candle_range + 1e-10)
features['upper_shadow'] = (high - np.maximum(close, open_price)) / (candle_range + 1e-10)
features['lower_shadow'] = (np.minimum(close, open_price) - low) / (candle_range + 1e-10)
# Price position
features['close_position'] = (close - low) / (candle_range + 1e-10)
high_5 = high.rolling(5).max()
low_5 = low.rolling(5).min()
features['price_position_5'] = (close - low_5) / (high_5 - low_5 + 1e-10)
high_20 = high.rolling(20).max()
low_20 = low.rolling(20).min()
features['price_position_20'] = (close - low_20) / (high_20 - low_20 + 1e-10)
# Time features
if hasattr(df.index, 'hour'):
hour = df.index.hour
day_of_week = df.index.dayofweek
features['hour_sin'] = np.sin(2 * np.pi * hour / 24)
features['hour_cos'] = np.cos(2 * np.pi * hour / 24)
features['dow_sin'] = np.sin(2 * np.pi * day_of_week / 7)
features['dow_cos'] = np.cos(2 * np.pi * day_of_week / 7)
features['is_london'] = ((hour >= 8) & (hour < 16)).astype(float)
features['is_newyork'] = ((hour >= 13) & (hour < 21)).astype(float)
features['is_overlap'] = ((hour >= 13) & (hour < 16)).astype(float)
features = features.replace([np.inf, -np.inf], np.nan)
result = pd.concat([df[['open', 'high', 'low', 'close', 'volume']], features], axis=1)
return result
def should_trade(result: PredictionResult, strategy: FilterStrategy) -> bool:
"""Check if trade passes strategy filters."""
avg_attention = (result.attention_score_5m + result.attention_score_15m) / 2
# Attention filter
if avg_attention < strategy.attention_min or avg_attention > strategy.attention_max:
return False
# Confidence filter
if strategy.require_confidence and not result.confidence:
return False
if result.confidence_proba < strategy.confidence_min:
return False
return True
def calculate_rr(result: PredictionResult, strategy: FilterStrategy, direction: str) -> float:
"""Calculate risk:reward ratio based on strategy."""
if not strategy.use_dynamic_rr:
return strategy.base_rr
# Dynamic R:R based on predicted deltas
delta_high = abs(result.delta_high_final)
delta_low = abs(result.delta_low_final)
if direction == 'long':
# For long: TP based on high, SL based on low
if delta_low > 0:
dynamic_rr = delta_high / delta_low
else:
dynamic_rr = strategy.base_rr
else:
# For short: TP based on low, SL based on high
if delta_high > 0:
dynamic_rr = delta_low / delta_high
else:
dynamic_rr = strategy.base_rr
# Clamp to range
return max(strategy.min_rr, min(strategy.max_rr, dynamic_rr))
def run_backtest(
pipeline: HierarchicalPipeline,
df_5m: pd.DataFrame,
df_15m: pd.DataFrame,
symbol: str,
strategy: FilterStrategy,
horizon_bars: int = 3,
step_bars: int = 1
) -> List[TradeResult]:
"""Run backtest with specific strategy."""
trades = []
min_lookback = 100
df_5m = df_5m.sort_index()
df_15m = df_15m.sort_index()
df_5m_feat = generate_features(df_5m)
df_15m_feat = generate_features(df_15m)
valid_start_5m = df_5m_feat.index[min_lookback * 3]
valid_start_15m = df_15m_feat.index[min_lookback]
common_start = max(valid_start_5m, valid_start_15m)
df_15m_test = df_15m_feat[df_15m_feat.index >= common_start].iloc[:-horizon_bars]
logger.info(f"Backtesting {len(df_15m_test)} bars with strategy '{strategy.name}'...")
for i in range(0, len(df_15m_test), step_bars):
current_time = df_15m_test.index[i]
df_5m_slice = df_5m_feat[df_5m_feat.index <= current_time].tail(min_lookback * 3)
df_15m_slice = df_15m_feat[df_15m_feat.index <= current_time].tail(min_lookback)
if len(df_5m_slice) < min_lookback or len(df_15m_slice) < 50:
continue
try:
result = pipeline.predict(df_5m_slice, df_15m_slice, symbol)
entry_price = float(df_15m_slice['close'].iloc[-1])
# Determine direction
delta_high = result.delta_high_final
delta_low = result.delta_low_final
if delta_high > delta_low * 1.1:
direction = 'long'
elif delta_low > delta_high * 1.1:
direction = 'short'
else:
momentum = (df_15m_slice['close'].iloc[-1] / df_15m_slice['close'].iloc[-5]) - 1
direction = 'long' if momentum > 0 else 'short'
# Check if trade passes filters
passed_filter = should_trade(result, strategy)
# Calculate R:R
rr = calculate_rr(result, strategy, direction)
# Calculate SL and TP
if direction == 'long':
stop_loss = entry_price - delta_low
risk = entry_price - stop_loss
take_profit = entry_price + (risk * rr)
else:
stop_loss = entry_price + delta_high
risk = stop_loss - entry_price
take_profit = entry_price - (risk * rr)
# Get future data
future_start_idx = df_15m_feat.index.get_loc(current_time)
future_end_idx = min(future_start_idx + horizon_bars, len(df_15m_feat))
future_data = df_15m_feat.iloc[future_start_idx:future_end_idx]
if len(future_data) < 2:
continue
actual_high = future_data['high'].max()
actual_low = future_data['low'].min()
# Determine outcome
if direction == 'long':
hit_tp = actual_high >= take_profit
hit_sl = actual_low <= stop_loss
if hit_tp and hit_sl:
high_dist = actual_high - entry_price
low_dist = entry_price - actual_low
hit_tp = high_dist >= low_dist
hit_sl = not hit_tp
if hit_tp:
profit_r = rr
elif hit_sl:
profit_r = -1.0
else:
actual_pnl = future_data['close'].iloc[-1] - entry_price
profit_r = actual_pnl / risk if risk > 0 else 0
else:
hit_tp = actual_low <= take_profit
hit_sl = actual_high >= stop_loss
if hit_tp and hit_sl:
high_dist = actual_high - entry_price
low_dist = entry_price - actual_low
hit_tp = low_dist >= high_dist
hit_sl = not hit_tp
if hit_tp:
profit_r = rr
elif hit_sl:
profit_r = -1.0
else:
actual_pnl = entry_price - future_data['close'].iloc[-1]
profit_r = actual_pnl / risk if risk > 0 else 0
avg_attention = (result.attention_score_5m + result.attention_score_15m) / 2
trade = TradeResult(
timestamp=current_time,
symbol=symbol,
direction=direction,
entry_price=entry_price,
stop_loss=stop_loss,
take_profit=take_profit,
risk=risk,
reward=risk * rr,
risk_reward=rr,
actual_high=actual_high,
actual_low=actual_low,
hit_tp=hit_tp,
hit_sl=hit_sl,
profit_r=profit_r,
attention_score=avg_attention,
attention_class_5m=result.attention_class_5m,
attention_class_15m=result.attention_class_15m,
confidence=result.confidence,
confidence_proba=result.confidence_proba,
delta_high_pred=delta_high,
delta_low_pred=delta_low,
strategy=strategy.name,
passed_filter=passed_filter
)
trades.append(trade)
except Exception as e:
logger.debug(f"Prediction failed at {current_time}: {e}")
continue
if (i + 1) % 1000 == 0:
logger.info(f" Processed {i + 1}/{len(df_15m_test)} bars...")
return trades
def calculate_metrics(trades: List[TradeResult], strategy: FilterStrategy, symbol: str) -> StrategyMetrics:
"""Calculate strategy metrics."""
if not trades:
return None
all_trades = trades
total_signals = len(all_trades)
executed = [t for t in trades if t.passed_filter]
filtered_out = total_signals - len(executed)
filter_rate = filtered_out / total_signals if total_signals > 0 else 0
if not executed:
return StrategyMetrics(
strategy_name=strategy.name,
strategy_description=strategy.description,
symbol=symbol,
period=f"{min(t.timestamp for t in trades).strftime('%Y-%m-%d')} to {max(t.timestamp for t in trades).strftime('%Y-%m-%d')}",
total_signals=total_signals,
filtered_out=filtered_out,
executed_trades=0,
filter_rate=filter_rate,
wins=0, losses=0, win_rate=0,
total_profit_r=0, avg_profit_r=0, expectancy=0, profit_factor=0,
max_consecutive_losses=0, max_drawdown_r=0,
avg_attention_winners=0, avg_attention_losers=0,
avg_confidence_winners=0, avg_confidence_losers=0,
avg_rr_used=strategy.base_rr
)
wins = [t for t in executed if t.profit_r > 0]
losses = [t for t in executed if t.profit_r <= 0]
win_rate = len(wins) / len(executed) if executed else 0
total_profit_r = sum(t.profit_r for t in executed)
avg_profit_r = total_profit_r / len(executed) if executed else 0
avg_win = sum(t.profit_r for t in wins) / len(wins) if wins else 0
avg_loss = abs(sum(t.profit_r for t in losses) / len(losses)) if losses else 0
expectancy = (win_rate * avg_win) - ((1 - win_rate) * avg_loss)
gross_profit = sum(t.profit_r for t in wins)
gross_loss = abs(sum(t.profit_r for t in losses))
profit_factor = gross_profit / gross_loss if gross_loss > 0 else float('inf')
# Risk metrics
consecutive_losses = 0
max_consecutive_losses = 0
equity_curve = []
cumulative = 0
for t in executed:
cumulative += t.profit_r
equity_curve.append(cumulative)
if t.profit_r <= 0:
consecutive_losses += 1
max_consecutive_losses = max(max_consecutive_losses, consecutive_losses)
else:
consecutive_losses = 0
peak = 0
max_dd = 0
for eq in equity_curve:
if eq > peak:
peak = eq
dd = peak - eq
if dd > max_dd:
max_dd = dd
# Analysis
avg_attention_winners = np.mean([t.attention_score for t in wins]) if wins else 0
avg_attention_losers = np.mean([t.attention_score for t in losses]) if losses else 0
avg_confidence_winners = np.mean([t.confidence_proba for t in wins]) if wins else 0
avg_confidence_losers = np.mean([t.confidence_proba for t in losses]) if losses else 0
avg_rr_used = np.mean([t.risk_reward for t in executed]) if executed else strategy.base_rr
start_date = min(t.timestamp for t in trades)
end_date = max(t.timestamp for t in trades)
period = f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"
return StrategyMetrics(
strategy_name=strategy.name,
strategy_description=strategy.description,
symbol=symbol,
period=period,
total_signals=total_signals,
filtered_out=filtered_out,
executed_trades=len(executed),
filter_rate=round(filter_rate, 4),
wins=len(wins),
losses=len(losses),
win_rate=round(win_rate, 4),
total_profit_r=round(total_profit_r, 2),
avg_profit_r=round(avg_profit_r, 4),
expectancy=round(expectancy, 4),
profit_factor=round(profit_factor, 2),
max_consecutive_losses=max_consecutive_losses,
max_drawdown_r=round(max_dd, 2),
avg_attention_winners=round(avg_attention_winners, 3),
avg_attention_losers=round(avg_attention_losers, 3),
avg_confidence_winners=round(avg_confidence_winners, 3),
avg_confidence_losers=round(avg_confidence_losers, 3),
avg_rr_used=round(avg_rr_used, 2)
)
def print_metrics(metrics: StrategyMetrics):
"""Print strategy metrics."""
print(f"\n{'=' * 70}")
print(f"STRATEGY: {metrics.strategy_name}")
print(f"Description: {metrics.strategy_description}")
print(f"{'=' * 70}")
print(f"Symbol: {metrics.symbol} | Period: {metrics.period}")
print(f"\n--- Trade Statistics ---")
print(f"Total Signals: {metrics.total_signals}")
print(f"Filtered Out: {metrics.filtered_out} ({metrics.filter_rate * 100:.1f}%)")
print(f"Executed Trades: {metrics.executed_trades}")
print(f"Wins: {metrics.wins} | Losses: {metrics.losses}")
# Win Rate
wr_status = "PASS" if metrics.win_rate >= 0.40 else "FAIL"
print(f"\n--- Key Metrics ---")
print(f"Win Rate: {metrics.win_rate * 100:.1f}% (target: 40%) [{wr_status}]")
# Expectancy
exp_status = "PASS" if metrics.expectancy >= 0.10 else ("IMPROVED" if metrics.expectancy > -0.04 else "FAIL")
print(f"Expectancy: {metrics.expectancy:.4f} (target: 0.10) [{exp_status}]")
print(f"Profit Factor: {metrics.profit_factor:.2f}")
print(f"Total Profit (R): {metrics.total_profit_r:.2f}")
print(f"Avg R:R Used: {metrics.avg_rr_used:.2f}")
print(f"\n--- Risk ---")
print(f"Max Consecutive Losses: {metrics.max_consecutive_losses}")
print(f"Max Drawdown (R): {metrics.max_drawdown_r:.2f}")
print(f"\n--- Analysis ---")
print(f"Avg Attention (Winners): {metrics.avg_attention_winners:.3f}")
print(f"Avg Attention (Losers): {metrics.avg_attention_losers:.3f}")
print(f"Avg Confidence (Winners): {metrics.avg_confidence_winners:.3f}")
print(f"Avg Confidence (Losers): {metrics.avg_confidence_losers:.3f}")
def print_comparison(all_metrics: List[StrategyMetrics]):
"""Print comparison table."""
print(f"\n{'=' * 90}")
print("STRATEGY COMPARISON")
print(f"{'=' * 90}")
print(f"{'Strategy':<25} {'Trades':>8} {'Filter%':>8} {'WinRate':>8} {'Expect':>10} {'PF':>6} {'Profit(R)':>10}")
print("-" * 90)
for m in sorted(all_metrics, key=lambda x: x.expectancy, reverse=True):
wr_str = f"{m.win_rate * 100:.1f}%"
print(f"{m.strategy_name:<25} {m.executed_trades:>8} {m.filter_rate * 100:>7.1f}% {wr_str:>8} {m.expectancy:>10.4f} {m.profit_factor:>6.2f} {m.total_profit_r:>10.2f}")
print(f"{'=' * 90}")
# Find best strategy
best = max(all_metrics, key=lambda x: x.expectancy)
print(f"\nBest Strategy by Expectancy: {best.strategy_name}")
print(f" Expectancy: {best.expectancy:.4f}")
print(f" Win Rate: {best.win_rate * 100:.1f}%")
print(f" Profit Factor: {best.profit_factor:.2f}")
def main():
parser = argparse.ArgumentParser(description='Enhanced Hierarchical Pipeline Backtest')
parser.add_argument('--symbols', nargs='+', default=['XAUUSD'],
help='Symbols to backtest')
parser.add_argument('--start-date', type=str, default='2024-09-01')
parser.add_argument('--end-date', type=str, default='2024-12-31')
parser.add_argument('--strategy', type=str, default='all',
choices=['all'] + list(STRATEGIES.keys()),
help='Strategy to test')
parser.add_argument('--step', type=int, default=3)
parser.add_argument('--models-dir', type=str, default='models')
parser.add_argument('--output-dir', type=str, default='models/backtest_results_v2')
args = parser.parse_args()
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
setup_logging(output_dir / 'logs', 'hierarchical_backtest_v2')
logger.info("=" * 70)
logger.info("HIERARCHICAL PIPELINE BACKTEST V2 - STRATEGY COMPARISON")
logger.info("=" * 70)
# Initialize pipeline
config = PipelineConfig(
attention_model_path=f'{args.models_dir}/attention',
base_model_path=f'{args.models_dir}/symbol_timeframe_models',
metamodel_path=f'{args.models_dir}/metamodels'
)
pipeline = HierarchicalPipeline(config)
# Determine strategies to test
if args.strategy == 'all':
strategies_to_test = list(STRATEGIES.values())
else:
strategies_to_test = [STRATEGIES[args.strategy]]
all_results = []
for symbol in args.symbols:
logger.info(f"\nProcessing {symbol}...")
if not pipeline.load_models(symbol):
logger.warning(f"Could not load models for {symbol}")
continue
# Load data once
try:
df_5m = load_ohlcv_from_mysql(symbol, '5m', args.start_date, args.end_date)
df_15m = load_ohlcv_from_mysql(symbol, '15m', args.start_date, args.end_date)
if df_5m.empty or df_15m.empty:
continue
except Exception as e:
logger.error(f"Data loading failed: {e}")
continue
symbol_metrics = []
for strategy in strategies_to_test:
logger.info(f"\nTesting strategy: {strategy.name}")
trades = run_backtest(
pipeline=pipeline,
df_5m=df_5m,
df_15m=df_15m,
symbol=symbol,
strategy=strategy,
step_bars=args.step
)
if trades:
metrics = calculate_metrics(trades, strategy, symbol)
if metrics:
symbol_metrics.append(metrics)
print_metrics(metrics)
if symbol_metrics:
print_comparison(symbol_metrics)
all_results.extend(symbol_metrics)
# Save results
if all_results:
results_file = output_dir / f'strategy_comparison_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'
with open(results_file, 'w') as f:
json.dump([asdict(m) for m in all_results], f, indent=2, default=str)
logger.info(f"\nResults saved to: {results_file}")
logger.info("\nBACKTEST V2 COMPLETE")
if __name__ == "__main__":
main()