#!/usr/bin/env python3 """ Hierarchical Pipeline Backtesting ================================= Evaluates the 3-level hierarchical ML architecture with R:R 2:1 backtesting. Key metrics: - Win Rate with R:R 2:1 (target: >40%) - Expectancy (target: >0.10) - Trade filtering effectiveness - Comparison: filtered vs unfiltered Usage: python scripts/evaluate_hierarchical.py --symbols XAUUSD EURUSD python scripts/evaluate_hierarchical.py --symbols XAUUSD --rr 2.0 --attention-threshold 0.8 Author: ML Pipeline Version: 1.0.0 Created: 2026-01-07 """ import argparse import sys from pathlib import Path from datetime import datetime from typing import Dict, List, Tuple, Optional, Any from dataclasses import dataclass, asdict import json import numpy as np import pandas as pd from loguru import logger import joblib # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent / 'src')) # Import hierarchical pipeline directly to avoid __init__.py issues import importlib.util pipeline_path = Path(__file__).parent.parent / 'src' / 'pipelines' / 'hierarchical_pipeline.py' spec = importlib.util.spec_from_file_location("hierarchical_pipeline", pipeline_path) hierarchical_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(hierarchical_module) HierarchicalPipeline = hierarchical_module.HierarchicalPipeline PipelineConfig = hierarchical_module.PipelineConfig PredictionResult = hierarchical_module.PredictionResult @dataclass class TradeResult: """Result of a single trade""" timestamp: datetime symbol: str direction: str # 'long' or 'short' entry_price: float stop_loss: float take_profit: float risk: float reward: float actual_high: float actual_low: float hit_tp: bool hit_sl: bool profit_r: float # Profit in R multiples attention_score: float confidence_proba: float trade_quality: str was_filtered: bool # Would this trade be filtered by attention? @dataclass class BacktestMetrics: """Comprehensive backtest metrics""" symbol: str timeframe: str period: str risk_reward: float # Trade counts total_bars: int total_trades: int filtered_trades: int executed_trades: int # Win/Loss wins: int losses: int win_rate: float # Profitability total_profit_r: float avg_profit_r: float expectancy: float profit_factor: float # Risk metrics max_consecutive_losses: int max_drawdown_r: float # Attention analysis avg_attention_winners: float avg_attention_losers: float high_attention_win_rate: float medium_attention_win_rate: float low_attention_win_rate: float # Comparison: unfiltered unfiltered_total_trades: int unfiltered_win_rate: float unfiltered_expectancy: float improvement_pct: float def setup_logging(log_dir: Path, experiment_name: str) -> Path: """Configure logging to file and console.""" log_dir.mkdir(parents=True, exist_ok=True) log_file = log_dir / f"{experiment_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" logger.remove() logger.add(sys.stderr, level="INFO", format="{time:HH:mm:ss} | {level} | {message}") logger.add(log_file, level="DEBUG", rotation="10 MB") return log_file def load_ohlcv_from_mysql( symbol: str, timeframe: str, start_date: str, end_date: str ) -> pd.DataFrame: """Load OHLCV data from MySQL database using project's database module.""" from data.database import MySQLConnection import pandas as pd # Map symbol to ticker ticker_map = { 'XAUUSD': 'C:XAUUSD', 'EURUSD': 'C:EURUSD', 'GBPUSD': 'C:GBPUSD', 'USDJPY': 'C:USDJPY', 'BTCUSD': 'X:BTCUSD' } ticker = ticker_map.get(symbol, f'C:{symbol}') logger.info(f"Loading {symbol} {timeframe} data from {start_date} to {end_date}...") try: db = MySQLConnection() # Load raw OHLCV data (base frequency) query = f""" SELECT date_agg as timestamp, open, high, low, close, volume FROM tickers_agg_data WHERE ticker = '{ticker}' AND date_agg >= '{start_date}' AND date_agg <= '{end_date}' ORDER BY date_agg ASC """ df = pd.read_sql(query, db.engine) if df.empty: logger.warning(f"No data found for {symbol}") return df df['timestamp'] = pd.to_datetime(df['timestamp']) df.set_index('timestamp', inplace=True) df.sort_index(inplace=True) logger.info(f" Loaded {len(df)} raw bars") # Resample to requested timeframe agg_dict = { 'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum' } if timeframe == '5m': df = df.resample('5min').agg(agg_dict).dropna() elif timeframe == '15m': df = df.resample('15min').agg(agg_dict).dropna() elif timeframe == '1h': df = df.resample('1h').agg(agg_dict).dropna() elif timeframe == '4h': df = df.resample('4h').agg(agg_dict).dropna() logger.info(f" Resampled to {timeframe}: {len(df)} bars") return df except Exception as e: logger.error(f"Failed to load data from MySQL: {e}") raise def generate_features(df: pd.DataFrame) -> pd.DataFrame: """Generate comprehensive feature set matching training.""" if len(df) == 0: return df df = df.copy() features = pd.DataFrame(index=df.index) close = df['close'] high = df['high'] low = df['low'] open_price = df['open'] volume = df.get('volume', pd.Series(1, index=df.index)) # Returns features['returns_1'] = close.pct_change(1) features['returns_3'] = close.pct_change(3) features['returns_5'] = close.pct_change(5) features['returns_10'] = close.pct_change(10) features['returns_20'] = close.pct_change(20) # Volatility features['volatility_5'] = close.pct_change().rolling(5).std() features['volatility_10'] = close.pct_change().rolling(10).std() features['volatility_20'] = close.pct_change().rolling(20).std() # Range candle_range = high - low features['range'] = candle_range features['range_pct'] = candle_range / close features['range_ma_5'] = candle_range.rolling(5).mean() features['range_ma_10'] = candle_range.rolling(10).mean() features['range_ma_20'] = candle_range.rolling(20).mean() features['range_ratio_5'] = candle_range / features['range_ma_5'] features['range_ratio_20'] = candle_range / features['range_ma_20'] # ATR tr1 = high - low tr2 = abs(high - close.shift(1)) tr3 = abs(low - close.shift(1)) true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1) features['atr_5'] = true_range.rolling(5).mean() features['atr_14'] = true_range.rolling(14).mean() features['atr_20'] = true_range.rolling(20).mean() features['atr_ratio'] = true_range / features['atr_14'] # Moving Averages sma_5 = close.rolling(5).mean() sma_10 = close.rolling(10).mean() sma_20 = close.rolling(20).mean() sma_50 = close.rolling(50).mean() ema_5 = close.ewm(span=5, adjust=False).mean() ema_20 = close.ewm(span=20, adjust=False).mean() features['price_vs_sma5'] = (close - sma_5) / features['atr_14'] features['price_vs_sma10'] = (close - sma_10) / features['atr_14'] features['price_vs_sma20'] = (close - sma_20) / features['atr_14'] features['price_vs_sma50'] = (close - sma_50) / features['atr_14'] features['sma5_vs_sma20'] = (sma_5 - sma_20) / features['atr_14'] features['ema5_vs_ema20'] = (ema_5 - ema_20) / features['atr_14'] # RSI delta = close.diff() gain = delta.where(delta > 0, 0).rolling(14).mean() loss = (-delta.where(delta < 0, 0)).rolling(14).mean() rs = gain / (loss + 1e-10) features['rsi_14'] = 100 - (100 / (1 + rs)) features['rsi_oversold'] = (features['rsi_14'] < 30).astype(float) features['rsi_overbought'] = (features['rsi_14'] > 70).astype(float) # Bollinger Bands bb_middle = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_upper = bb_middle + 2 * bb_std bb_lower = bb_middle - 2 * bb_std features['bb_width'] = (bb_upper - bb_lower) / bb_middle features['bb_position'] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10) # MACD ema_12 = close.ewm(span=12, adjust=False).mean() ema_26 = close.ewm(span=26, adjust=False).mean() macd = ema_12 - ema_26 macd_signal = macd.ewm(span=9, adjust=False).mean() features['macd'] = macd / features['atr_14'] features['macd_signal'] = macd_signal / features['atr_14'] features['macd_hist'] = (macd - macd_signal) / features['atr_14'] # Momentum features['momentum_5'] = (close - close.shift(5)) / features['atr_14'] features['momentum_10'] = (close - close.shift(10)) / features['atr_14'] features['momentum_20'] = (close - close.shift(20)) / features['atr_14'] # Stochastic low_14 = low.rolling(14).min() high_14 = high.rolling(14).max() features['stoch_k'] = 100 * (close - low_14) / (high_14 - low_14 + 1e-10) features['stoch_d'] = features['stoch_k'].rolling(3).mean() # Williams %R features['williams_r'] = -100 * (high_14 - close) / (high_14 - low_14 + 1e-10) # Volume if volume.sum() > 0: vol_ma_5 = volume.rolling(5).mean() vol_ma_20 = volume.rolling(20).mean() features['volume_ratio'] = volume / (vol_ma_20 + 1) features['volume_trend'] = (vol_ma_5 - vol_ma_20) / (vol_ma_20 + 1) else: features['volume_ratio'] = 1.0 features['volume_trend'] = 0.0 # Candle patterns body = close - open_price features['body_pct'] = body / (candle_range + 1e-10) features['upper_shadow'] = (high - np.maximum(close, open_price)) / (candle_range + 1e-10) features['lower_shadow'] = (np.minimum(close, open_price) - low) / (candle_range + 1e-10) # Price position features['close_position'] = (close - low) / (candle_range + 1e-10) high_5 = high.rolling(5).max() low_5 = low.rolling(5).min() features['price_position_5'] = (close - low_5) / (high_5 - low_5 + 1e-10) high_20 = high.rolling(20).max() low_20 = low.rolling(20).min() features['price_position_20'] = (close - low_20) / (high_20 - low_20 + 1e-10) # Time features if hasattr(df.index, 'hour'): hour = df.index.hour day_of_week = df.index.dayofweek features['hour_sin'] = np.sin(2 * np.pi * hour / 24) features['hour_cos'] = np.cos(2 * np.pi * hour / 24) features['dow_sin'] = np.sin(2 * np.pi * day_of_week / 7) features['dow_cos'] = np.cos(2 * np.pi * day_of_week / 7) features['is_london'] = ((hour >= 8) & (hour < 16)).astype(float) features['is_newyork'] = ((hour >= 13) & (hour < 21)).astype(float) features['is_overlap'] = ((hour >= 13) & (hour < 16)).astype(float) # Clean features = features.replace([np.inf, -np.inf], np.nan) # Combine result = pd.concat([df[['open', 'high', 'low', 'close', 'volume']], features], axis=1) return result def run_backtest( pipeline: HierarchicalPipeline, df_5m: pd.DataFrame, df_15m: pd.DataFrame, symbol: str, risk_reward: float = 2.0, attention_threshold: float = 0.8, horizon_bars: int = 3, step_bars: int = 1 ) -> List[TradeResult]: """ Run backtest simulation. Args: pipeline: Hierarchical pipeline instance df_5m: 5-minute OHLCV data df_15m: 15-minute OHLCV data symbol: Trading symbol risk_reward: Risk/reward ratio for TP attention_threshold: Minimum attention to take trade horizon_bars: Bars to look forward for TP/SL step_bars: Step size between predictions Returns: List of TradeResult """ trades = [] min_lookback = 100 # Minimum bars for features # Ensure data is sorted df_5m = df_5m.sort_index() df_15m = df_15m.sort_index() # Add features df_5m_feat = generate_features(df_5m) df_15m_feat = generate_features(df_15m) # Get common valid range valid_start_5m = df_5m_feat.index[min_lookback * 3] valid_start_15m = df_15m_feat.index[min_lookback] common_start = max(valid_start_5m, valid_start_15m) # Filter to common range leaving room for horizon df_15m_test = df_15m_feat[df_15m_feat.index >= common_start].iloc[:-horizon_bars] logger.info(f"Backtesting {len(df_15m_test)} bars...") for i in range(0, len(df_15m_test), step_bars): current_time = df_15m_test.index[i] # Get historical data up to current time df_5m_slice = df_5m_feat[df_5m_feat.index <= current_time].tail(min_lookback * 3) df_15m_slice = df_15m_feat[df_15m_feat.index <= current_time].tail(min_lookback) if len(df_5m_slice) < min_lookback or len(df_15m_slice) < 50: continue try: # Get prediction result = pipeline.predict(df_5m_slice, df_15m_slice, symbol) # Get entry price entry_price = float(df_15m_slice['close'].iloc[-1]) # Determine direction from predictions delta_high = result.delta_high_final delta_low = result.delta_low_final if delta_high > delta_low * 1.1: direction = 'long' elif delta_low > delta_high * 1.1: direction = 'short' else: # Use momentum momentum = (df_15m_slice['close'].iloc[-1] / df_15m_slice['close'].iloc[-5]) - 1 direction = 'long' if momentum > 0 else 'short' # Calculate SL and TP if direction == 'long': stop_loss = entry_price - delta_low risk = entry_price - stop_loss take_profit = entry_price + (risk * risk_reward) else: stop_loss = entry_price + delta_high risk = stop_loss - entry_price take_profit = entry_price - (risk * risk_reward) # Get future data for outcome future_start_idx = df_15m_feat.index.get_loc(current_time) future_end_idx = min(future_start_idx + horizon_bars, len(df_15m_feat)) future_data = df_15m_feat.iloc[future_start_idx:future_end_idx] if len(future_data) < 2: continue actual_high = future_data['high'].max() actual_low = future_data['low'].min() # Determine outcome if direction == 'long': hit_tp = actual_high >= take_profit hit_sl = actual_low <= stop_loss if hit_tp and hit_sl: # Both hit - determine which first (simplified: assume TP first if diff is larger) high_dist = actual_high - entry_price low_dist = entry_price - actual_low hit_tp = high_dist >= low_dist hit_sl = not hit_tp if hit_tp: profit_r = risk_reward elif hit_sl: profit_r = -1.0 else: # Neither hit - use actual PnL actual_pnl = future_data['close'].iloc[-1] - entry_price profit_r = actual_pnl / risk if risk > 0 else 0 else: hit_tp = actual_low <= take_profit hit_sl = actual_high >= stop_loss if hit_tp and hit_sl: high_dist = actual_high - entry_price low_dist = entry_price - actual_low hit_tp = low_dist >= high_dist hit_sl = not hit_tp if hit_tp: profit_r = risk_reward elif hit_sl: profit_r = -1.0 else: actual_pnl = entry_price - future_data['close'].iloc[-1] profit_r = actual_pnl / risk if risk > 0 else 0 # Calculate average attention avg_attention = (result.attention_score_5m + result.attention_score_15m) / 2 was_filtered = avg_attention < attention_threshold or not result.confidence trade = TradeResult( timestamp=current_time, symbol=symbol, direction=direction, entry_price=entry_price, stop_loss=stop_loss, take_profit=take_profit, risk=risk, reward=risk * risk_reward, actual_high=actual_high, actual_low=actual_low, hit_tp=hit_tp, hit_sl=hit_sl, profit_r=profit_r, attention_score=avg_attention, confidence_proba=result.confidence_proba, trade_quality=result.trade_quality, was_filtered=was_filtered ) trades.append(trade) except Exception as e: logger.debug(f"Prediction failed at {current_time}: {e}") continue if (i + 1) % 500 == 0: logger.info(f" Processed {i + 1}/{len(df_15m_test)} bars...") return trades def calculate_metrics( trades: List[TradeResult], symbol: str, risk_reward: float, attention_threshold: float ) -> BacktestMetrics: """Calculate comprehensive backtest metrics.""" if not trades: return None # All trades all_trades = trades total_trades = len(all_trades) # Filtered trades (executed) executed_trades = [t for t in trades if not t.was_filtered] filtered_count = total_trades - len(executed_trades) # Win/Loss for executed trades wins = [t for t in executed_trades if t.profit_r > 0] losses = [t for t in executed_trades if t.profit_r <= 0] win_rate = len(wins) / len(executed_trades) if executed_trades else 0 # Profitability total_profit_r = sum(t.profit_r for t in executed_trades) avg_profit_r = total_profit_r / len(executed_trades) if executed_trades else 0 # Expectancy = (WinRate * AvgWin) - (LossRate * AvgLoss) avg_win = sum(t.profit_r for t in wins) / len(wins) if wins else 0 avg_loss = abs(sum(t.profit_r for t in losses) / len(losses)) if losses else 0 expectancy = (win_rate * avg_win) - ((1 - win_rate) * avg_loss) # Profit factor gross_profit = sum(t.profit_r for t in wins) gross_loss = abs(sum(t.profit_r for t in losses)) profit_factor = gross_profit / gross_loss if gross_loss > 0 else float('inf') # Risk metrics consecutive_losses = 0 max_consecutive_losses = 0 equity_curve = [] cumulative = 0 for t in executed_trades: cumulative += t.profit_r equity_curve.append(cumulative) if t.profit_r <= 0: consecutive_losses += 1 max_consecutive_losses = max(max_consecutive_losses, consecutive_losses) else: consecutive_losses = 0 # Max drawdown peak = 0 max_dd = 0 for eq in equity_curve: if eq > peak: peak = eq dd = peak - eq if dd > max_dd: max_dd = dd # Attention analysis winners_attention = [t.attention_score for t in wins] losers_attention = [t.attention_score for t in losses] avg_attention_winners = np.mean(winners_attention) if winners_attention else 0 avg_attention_losers = np.mean(losers_attention) if losers_attention else 0 # Win rate by attention level high_attention = [t for t in executed_trades if t.attention_score >= 2.0] medium_attention = [t for t in executed_trades if 0.8 <= t.attention_score < 2.0] low_attention = [t for t in executed_trades if t.attention_score < 0.8] high_attention_wr = sum(1 for t in high_attention if t.profit_r > 0) / len(high_attention) if high_attention else 0 medium_attention_wr = sum(1 for t in medium_attention if t.profit_r > 0) / len(medium_attention) if medium_attention else 0 low_attention_wr = sum(1 for t in low_attention if t.profit_r > 0) / len(low_attention) if low_attention else 0 # Unfiltered comparison (all trades) unfiltered_wins = [t for t in all_trades if t.profit_r > 0] unfiltered_win_rate = len(unfiltered_wins) / len(all_trades) if all_trades else 0 unfiltered_profit = sum(t.profit_r for t in all_trades) unfiltered_expectancy = unfiltered_profit / len(all_trades) if all_trades else 0 # Improvement improvement_pct = ((expectancy - unfiltered_expectancy) / abs(unfiltered_expectancy) * 100) if unfiltered_expectancy != 0 else 0 # Get period start_date = min(t.timestamp for t in trades) end_date = max(t.timestamp for t in trades) period = f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}" return BacktestMetrics( symbol=symbol, timeframe='15m', period=period, risk_reward=risk_reward, total_bars=len(trades), total_trades=total_trades, filtered_trades=filtered_count, executed_trades=len(executed_trades), wins=len(wins), losses=len(losses), win_rate=round(win_rate, 4), total_profit_r=round(total_profit_r, 2), avg_profit_r=round(avg_profit_r, 4), expectancy=round(expectancy, 4), profit_factor=round(profit_factor, 2), max_consecutive_losses=max_consecutive_losses, max_drawdown_r=round(max_dd, 2), avg_attention_winners=round(avg_attention_winners, 3), avg_attention_losers=round(avg_attention_losers, 3), high_attention_win_rate=round(high_attention_wr, 4), medium_attention_win_rate=round(medium_attention_wr, 4), low_attention_win_rate=round(low_attention_wr, 4), unfiltered_total_trades=total_trades, unfiltered_win_rate=round(unfiltered_win_rate, 4), unfiltered_expectancy=round(unfiltered_expectancy, 4), improvement_pct=round(improvement_pct, 1) ) def print_metrics(metrics: BacktestMetrics, target_wr: float = 0.40, target_exp: float = 0.10): """Print metrics with pass/fail indicators.""" print(f"\n{'=' * 60}") print(f"BACKTEST RESULTS: {metrics.symbol}") print(f"{'=' * 60}") print(f"Period: {metrics.period}") print(f"Timeframe: {metrics.timeframe}") print(f"Risk:Reward: 1:{metrics.risk_reward}") print(f"\n--- Trade Statistics ---") print(f"Total Signals: {metrics.total_trades}") print(f"Filtered Out: {metrics.filtered_trades} ({metrics.filtered_trades / metrics.total_trades * 100:.1f}%)") print(f"Executed Trades: {metrics.executed_trades}") print(f"Wins: {metrics.wins}") print(f"Losses: {metrics.losses}") # Win Rate with target comparison wr_status = "PASS" if metrics.win_rate >= target_wr else "FAIL" print(f"\n--- Key Metrics ---") print(f"Win Rate: {metrics.win_rate * 100:.1f}% (target: {target_wr * 100}%) [{wr_status}]") # Expectancy with target comparison exp_status = "PASS" if metrics.expectancy >= target_exp else "FAIL" print(f"Expectancy: {metrics.expectancy:.4f} (target: {target_exp}) [{exp_status}]") print(f"Profit Factor: {metrics.profit_factor:.2f}") print(f"Total Profit (R): {metrics.total_profit_r:.2f}") print(f"Avg Profit/Trade (R): {metrics.avg_profit_r:.4f}") print(f"\n--- Risk Metrics ---") print(f"Max Consecutive Losses: {metrics.max_consecutive_losses}") print(f"Max Drawdown (R): {metrics.max_drawdown_r:.2f}") print(f"\n--- Attention Analysis ---") print(f"Avg Attention (Winners): {metrics.avg_attention_winners:.3f}") print(f"Avg Attention (Losers): {metrics.avg_attention_losers:.3f}") print(f"High Attention (>=2.0) Win Rate: {metrics.high_attention_win_rate * 100:.1f}%") print(f"Medium Attention (0.8-2.0) Win Rate: {metrics.medium_attention_win_rate * 100:.1f}%") print(f"Low Attention (<0.8) Win Rate: {metrics.low_attention_win_rate * 100:.1f}%") print(f"\n--- Comparison: Filtered vs Unfiltered ---") print(f"Unfiltered Win Rate: {metrics.unfiltered_win_rate * 100:.1f}%") print(f"Unfiltered Expectancy: {metrics.unfiltered_expectancy:.4f}") print(f"Improvement: {metrics.improvement_pct:+.1f}%") print(f"\n{'=' * 60}") def generate_report(all_metrics: List[BacktestMetrics], output_path: Path): """Generate markdown report.""" report = [] report.append("# Hierarchical Pipeline Backtest Report") report.append(f"\n**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") # Summary table report.append("\n## Summary\n") report.append("| Symbol | Period | Win Rate | Expectancy | Profit (R) | Improvement |") report.append("|--------|--------|----------|------------|------------|-------------|") for m in all_metrics: wr_status = "PASS" if m.win_rate >= 0.40 else "FAIL" exp_status = "PASS" if m.expectancy >= 0.10 else "FAIL" report.append( f"| {m.symbol} | {m.period} | {m.win_rate * 100:.1f}% ({wr_status}) | " f"{m.expectancy:.4f} ({exp_status}) | {m.total_profit_r:.1f} | {m.improvement_pct:+.1f}% |" ) # Detailed sections for m in all_metrics: report.append(f"\n## {m.symbol} Details\n") report.append(f"- **Total Signals:** {m.total_trades}") report.append(f"- **Filtered Out:** {m.filtered_trades} ({m.filtered_trades / m.total_trades * 100:.1f}%)") report.append(f"- **Executed Trades:** {m.executed_trades}") report.append(f"- **Win Rate:** {m.win_rate * 100:.1f}%") report.append(f"- **Expectancy:** {m.expectancy:.4f}") report.append(f"- **Profit Factor:** {m.profit_factor:.2f}") report.append("\n### Attention Analysis\n") report.append("| Attention Level | Win Rate |") report.append("|-----------------|----------|") report.append(f"| High (>=2.0) | {m.high_attention_win_rate * 100:.1f}% |") report.append(f"| Medium (0.8-2.0) | {m.medium_attention_win_rate * 100:.1f}% |") report.append(f"| Low (<0.8) | {m.low_attention_win_rate * 100:.1f}% |") # Write report output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w') as f: f.write('\n'.join(report)) logger.info(f"Report saved to: {output_path}") def main(): parser = argparse.ArgumentParser(description='Hierarchical Pipeline Backtest') parser.add_argument('--symbols', nargs='+', default=['XAUUSD', 'EURUSD'], help='Symbols to backtest') parser.add_argument('--start-date', type=str, default='2024-06-01', help='Start date (YYYY-MM-DD)') parser.add_argument('--end-date', type=str, default='2025-12-31', help='End date (YYYY-MM-DD)') parser.add_argument('--rr', type=float, default=2.0, help='Risk:Reward ratio') parser.add_argument('--attention-threshold', type=float, default=0.8, help='Minimum attention score to trade') parser.add_argument('--horizon', type=int, default=3, help='Bars to look forward for TP/SL') parser.add_argument('--step', type=int, default=1, help='Step size between predictions') parser.add_argument('--models-dir', type=str, default='models', help='Directory containing trained models') parser.add_argument('--output-dir', type=str, default='models/backtest_results', help='Output directory for reports') args = parser.parse_args() # Setup output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) log_file = setup_logging(output_dir / 'logs', 'hierarchical_backtest') logger.info("=" * 60) logger.info("HIERARCHICAL PIPELINE BACKTEST") logger.info("=" * 60) logger.info(f"Symbols: {args.symbols}") logger.info(f"Period: {args.start_date} to {args.end_date}") logger.info(f"R:R: 1:{args.rr}") logger.info(f"Attention Threshold: {args.attention_threshold}") # Initialize pipeline config = PipelineConfig( attention_model_path=f'{args.models_dir}/attention', base_model_path=f'{args.models_dir}/symbol_timeframe_models', metamodel_path=f'{args.models_dir}/metamodels', attention_threshold_low=args.attention_threshold, attention_threshold_high=2.0, confidence_threshold=0.5 ) pipeline = HierarchicalPipeline(config) all_metrics = [] for symbol in args.symbols: logger.info(f"\n{'=' * 40}") logger.info(f"Processing {symbol}...") logger.info(f"{'=' * 40}") # Load models if not pipeline.load_models(symbol): logger.warning(f"Could not load all models for {symbol}, skipping...") continue # Load data try: df_5m = load_ohlcv_from_mysql(symbol, '5m', args.start_date, args.end_date) df_15m = load_ohlcv_from_mysql(symbol, '15m', args.start_date, args.end_date) if df_5m.empty or df_15m.empty: logger.warning(f"No data for {symbol}, skipping...") continue except Exception as e: logger.error(f"Data loading failed for {symbol}: {e}") continue # Run backtest trades = run_backtest( pipeline=pipeline, df_5m=df_5m, df_15m=df_15m, symbol=symbol, risk_reward=args.rr, attention_threshold=args.attention_threshold, horizon_bars=args.horizon, step_bars=args.step ) if not trades: logger.warning(f"No trades generated for {symbol}") continue # Calculate metrics metrics = calculate_metrics( trades=trades, symbol=symbol, risk_reward=args.rr, attention_threshold=args.attention_threshold ) if metrics: all_metrics.append(metrics) print_metrics(metrics) # Save trades trades_df = pd.DataFrame([asdict(t) for t in trades]) trades_file = output_dir / f'{symbol}_trades_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv' trades_df.to_csv(trades_file, index=False) logger.info(f"Trades saved to: {trades_file}") # Generate final report if all_metrics: report_file = output_dir / f'backtest_report_{datetime.now().strftime("%Y%m%d_%H%M%S")}.md' generate_report(all_metrics, report_file) # Save metrics as JSON metrics_json = output_dir / f'backtest_metrics_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json' with open(metrics_json, 'w') as f: json.dump([asdict(m) for m in all_metrics], f, indent=2, default=str) logger.info(f"Metrics saved to: {metrics_json}") logger.info("\n" + "=" * 60) logger.info("BACKTEST COMPLETE") logger.info("=" * 60) if __name__ == "__main__": main()