#!/usr/bin/env python3 """ Hierarchical Pipeline Backtesting V2 ==================================== Enhanced backtesting with multiple filtering strategies based on findings: - Inverted attention filter (filter HIGH attention, keep MEDIUM) - Confidence-based filtering using metamodel probability - Dynamic R:R based on predicted delta_high/delta_low ratio Key findings from v1: - Medium attention (0.8-2.0) has 44.6% win rate - High attention (>=2.0) has 39.8% win rate - This suggests we should INVERT the attention filtering logic Usage: python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD EURUSD --strategy medium_attention python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD --strategy dynamic_rr python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD --strategy all Author: ML Pipeline Version: 2.0.0 Created: 2026-01-07 """ import argparse import sys from pathlib import Path from datetime import datetime from typing import Dict, List, Tuple, Optional, Any from dataclasses import dataclass, asdict import json import numpy as np import pandas as pd from loguru import logger # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent / 'src')) # Import hierarchical pipeline directly import importlib.util pipeline_path = Path(__file__).parent.parent / 'src' / 'pipelines' / 'hierarchical_pipeline.py' spec = importlib.util.spec_from_file_location("hierarchical_pipeline", pipeline_path) hierarchical_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(hierarchical_module) HierarchicalPipeline = hierarchical_module.HierarchicalPipeline PipelineConfig = hierarchical_module.PipelineConfig PredictionResult = hierarchical_module.PredictionResult @dataclass class FilterStrategy: """Trading filter strategy configuration""" name: str description: str # Attention filters attention_min: float = 0.0 # Minimum attention to trade attention_max: float = 999.0 # Maximum attention to trade # Confidence filters confidence_min: float = 0.0 # Minimum confidence probability require_confidence: bool = False # Require confidence=True from metamodel # Dynamic R:R use_dynamic_rr: bool = False # Use predicted deltas for R:R base_rr: float = 2.0 # Base R:R when not dynamic min_rr: float = 1.5 # Minimum R:R for dynamic max_rr: float = 4.0 # Maximum R:R for dynamic # Pre-defined strategies based on findings STRATEGIES = { 'baseline': FilterStrategy( name='baseline', description='No filtering - all trades', attention_min=0.0, attention_max=999.0, confidence_min=0.0, require_confidence=False, use_dynamic_rr=False, base_rr=2.0 ), 'medium_attention': FilterStrategy( name='medium_attention', description='Only medium attention (0.8-2.0) - best win rate from v1', attention_min=0.8, attention_max=2.0, confidence_min=0.0, require_confidence=False, use_dynamic_rr=False, base_rr=2.0 ), 'medium_with_confidence': FilterStrategy( name='medium_with_confidence', description='Medium attention + confidence filter', attention_min=0.8, attention_max=2.0, confidence_min=0.5, require_confidence=True, use_dynamic_rr=False, base_rr=2.0 ), 'high_confidence': FilterStrategy( name='high_confidence', description='Only high confidence trades', attention_min=0.0, attention_max=999.0, confidence_min=0.7, require_confidence=True, use_dynamic_rr=False, base_rr=2.0 ), 'dynamic_rr': FilterStrategy( name='dynamic_rr', description='Medium attention + dynamic R:R from predictions', attention_min=0.8, attention_max=2.0, confidence_min=0.0, require_confidence=False, use_dynamic_rr=True, base_rr=2.0, min_rr=1.5, max_rr=4.0 ), 'aggressive_filter': FilterStrategy( name='aggressive_filter', description='Medium attention + high confidence + dynamic R:R', attention_min=0.8, attention_max=1.8, # Tighter range confidence_min=0.6, require_confidence=True, use_dynamic_rr=True, base_rr=2.0, min_rr=1.5, max_rr=3.5 ), 'conservative': FilterStrategy( name='conservative', description='Very selective - only best setups', attention_min=1.0, attention_max=1.6, confidence_min=0.65, require_confidence=True, use_dynamic_rr=True, base_rr=2.0, min_rr=2.0, max_rr=3.0 ) } @dataclass class TradeResult: """Result of a single trade""" timestamp: datetime symbol: str direction: str entry_price: float stop_loss: float take_profit: float risk: float reward: float risk_reward: float actual_high: float actual_low: float hit_tp: bool hit_sl: bool profit_r: float attention_score: float attention_class_5m: int attention_class_15m: int confidence: bool confidence_proba: float delta_high_pred: float delta_low_pred: float strategy: str passed_filter: bool @dataclass class StrategyMetrics: """Metrics for a trading strategy""" strategy_name: str strategy_description: str symbol: str period: str total_signals: int filtered_out: int executed_trades: int filter_rate: float wins: int losses: int win_rate: float total_profit_r: float avg_profit_r: float expectancy: float profit_factor: float max_consecutive_losses: int max_drawdown_r: float avg_attention_winners: float avg_attention_losers: float avg_confidence_winners: float avg_confidence_losers: float avg_rr_used: float def setup_logging(log_dir: Path, experiment_name: str) -> Path: """Configure logging.""" log_dir.mkdir(parents=True, exist_ok=True) log_file = log_dir / f"{experiment_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" logger.remove() logger.add(sys.stderr, level="INFO", format="{time:HH:mm:ss} | {level} | {message}") logger.add(log_file, level="DEBUG", rotation="10 MB") return log_file def load_ohlcv_from_mysql(symbol: str, timeframe: str, start_date: str, end_date: str) -> pd.DataFrame: """Load OHLCV data from MySQL.""" from data.database import MySQLConnection ticker_map = { 'XAUUSD': 'C:XAUUSD', 'EURUSD': 'C:EURUSD', 'GBPUSD': 'C:GBPUSD', 'USDJPY': 'C:USDJPY', 'BTCUSD': 'X:BTCUSD' } ticker = ticker_map.get(symbol, f'C:{symbol}') logger.info(f"Loading {symbol} {timeframe} data from {start_date} to {end_date}...") try: db = MySQLConnection() query = f""" SELECT date_agg as timestamp, open, high, low, close, volume FROM tickers_agg_data WHERE ticker = '{ticker}' AND date_agg >= '{start_date}' AND date_agg <= '{end_date}' ORDER BY date_agg ASC """ df = pd.read_sql(query, db.engine) if df.empty: logger.warning(f"No data found for {symbol}") return df df['timestamp'] = pd.to_datetime(df['timestamp']) df.set_index('timestamp', inplace=True) df.sort_index(inplace=True) logger.info(f" Loaded {len(df)} raw bars") # Resample agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'} if timeframe == '5m': df = df.resample('5min').agg(agg_dict).dropna() elif timeframe == '15m': df = df.resample('15min').agg(agg_dict).dropna() logger.info(f" Resampled to {timeframe}: {len(df)} bars") return df except Exception as e: logger.error(f"Failed to load data: {e}") raise def generate_features(df: pd.DataFrame) -> pd.DataFrame: """Generate comprehensive feature set.""" if len(df) == 0: return df df = df.copy() features = pd.DataFrame(index=df.index) close = df['close'] high = df['high'] low = df['low'] open_price = df['open'] volume = df.get('volume', pd.Series(1, index=df.index)) # Returns for period in [1, 3, 5, 10, 20]: features[f'returns_{period}'] = close.pct_change(period) # Volatility for period in [5, 10, 20]: features[f'volatility_{period}'] = close.pct_change().rolling(period).std() # Range candle_range = high - low features['range'] = candle_range features['range_pct'] = candle_range / close for period in [5, 10, 20]: features[f'range_ma_{period}'] = candle_range.rolling(period).mean() features['range_ratio_5'] = candle_range / features['range_ma_5'] features['range_ratio_20'] = candle_range / features['range_ma_20'] # ATR tr1 = high - low tr2 = abs(high - close.shift(1)) tr3 = abs(low - close.shift(1)) true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1) features['atr_5'] = true_range.rolling(5).mean() features['atr_14'] = true_range.rolling(14).mean() features['atr_20'] = true_range.rolling(20).mean() features['atr_ratio'] = true_range / features['atr_14'] # Moving Averages sma_5 = close.rolling(5).mean() sma_10 = close.rolling(10).mean() sma_20 = close.rolling(20).mean() sma_50 = close.rolling(50).mean() ema_5 = close.ewm(span=5, adjust=False).mean() ema_20 = close.ewm(span=20, adjust=False).mean() features['price_vs_sma5'] = (close - sma_5) / features['atr_14'] features['price_vs_sma10'] = (close - sma_10) / features['atr_14'] features['price_vs_sma20'] = (close - sma_20) / features['atr_14'] features['price_vs_sma50'] = (close - sma_50) / features['atr_14'] features['sma5_vs_sma20'] = (sma_5 - sma_20) / features['atr_14'] features['ema5_vs_ema20'] = (ema_5 - ema_20) / features['atr_14'] # RSI delta = close.diff() gain = delta.where(delta > 0, 0).rolling(14).mean() loss = (-delta.where(delta < 0, 0)).rolling(14).mean() rs = gain / (loss + 1e-10) features['rsi_14'] = 100 - (100 / (1 + rs)) features['rsi_oversold'] = (features['rsi_14'] < 30).astype(float) features['rsi_overbought'] = (features['rsi_14'] > 70).astype(float) # Bollinger Bands bb_middle = close.rolling(20).mean() bb_std = close.rolling(20).std() bb_upper = bb_middle + 2 * bb_std bb_lower = bb_middle - 2 * bb_std features['bb_width'] = (bb_upper - bb_lower) / bb_middle features['bb_position'] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10) # MACD ema_12 = close.ewm(span=12, adjust=False).mean() ema_26 = close.ewm(span=26, adjust=False).mean() macd = ema_12 - ema_26 macd_signal = macd.ewm(span=9, adjust=False).mean() features['macd'] = macd / features['atr_14'] features['macd_signal'] = macd_signal / features['atr_14'] features['macd_hist'] = (macd - macd_signal) / features['atr_14'] # Momentum for period in [5, 10, 20]: features[f'momentum_{period}'] = (close - close.shift(period)) / features['atr_14'] # Stochastic low_14 = low.rolling(14).min() high_14 = high.rolling(14).max() features['stoch_k'] = 100 * (close - low_14) / (high_14 - low_14 + 1e-10) features['stoch_d'] = features['stoch_k'].rolling(3).mean() # Williams %R features['williams_r'] = -100 * (high_14 - close) / (high_14 - low_14 + 1e-10) # Volume if volume.sum() > 0: vol_ma_20 = volume.rolling(20).mean() vol_ma_5 = volume.rolling(5).mean() features['volume_ratio'] = volume / (vol_ma_20 + 1) features['volume_trend'] = (vol_ma_5 - vol_ma_20) / (vol_ma_20 + 1) else: features['volume_ratio'] = 1.0 features['volume_trend'] = 0.0 # Candle patterns body = close - open_price features['body_pct'] = body / (candle_range + 1e-10) features['upper_shadow'] = (high - np.maximum(close, open_price)) / (candle_range + 1e-10) features['lower_shadow'] = (np.minimum(close, open_price) - low) / (candle_range + 1e-10) # Price position features['close_position'] = (close - low) / (candle_range + 1e-10) high_5 = high.rolling(5).max() low_5 = low.rolling(5).min() features['price_position_5'] = (close - low_5) / (high_5 - low_5 + 1e-10) high_20 = high.rolling(20).max() low_20 = low.rolling(20).min() features['price_position_20'] = (close - low_20) / (high_20 - low_20 + 1e-10) # Time features if hasattr(df.index, 'hour'): hour = df.index.hour day_of_week = df.index.dayofweek features['hour_sin'] = np.sin(2 * np.pi * hour / 24) features['hour_cos'] = np.cos(2 * np.pi * hour / 24) features['dow_sin'] = np.sin(2 * np.pi * day_of_week / 7) features['dow_cos'] = np.cos(2 * np.pi * day_of_week / 7) features['is_london'] = ((hour >= 8) & (hour < 16)).astype(float) features['is_newyork'] = ((hour >= 13) & (hour < 21)).astype(float) features['is_overlap'] = ((hour >= 13) & (hour < 16)).astype(float) features = features.replace([np.inf, -np.inf], np.nan) result = pd.concat([df[['open', 'high', 'low', 'close', 'volume']], features], axis=1) return result def should_trade(result: PredictionResult, strategy: FilterStrategy) -> bool: """Check if trade passes strategy filters.""" avg_attention = (result.attention_score_5m + result.attention_score_15m) / 2 # Attention filter if avg_attention < strategy.attention_min or avg_attention > strategy.attention_max: return False # Confidence filter if strategy.require_confidence and not result.confidence: return False if result.confidence_proba < strategy.confidence_min: return False return True def calculate_rr(result: PredictionResult, strategy: FilterStrategy, direction: str) -> float: """Calculate risk:reward ratio based on strategy.""" if not strategy.use_dynamic_rr: return strategy.base_rr # Dynamic R:R based on predicted deltas delta_high = abs(result.delta_high_final) delta_low = abs(result.delta_low_final) if direction == 'long': # For long: TP based on high, SL based on low if delta_low > 0: dynamic_rr = delta_high / delta_low else: dynamic_rr = strategy.base_rr else: # For short: TP based on low, SL based on high if delta_high > 0: dynamic_rr = delta_low / delta_high else: dynamic_rr = strategy.base_rr # Clamp to range return max(strategy.min_rr, min(strategy.max_rr, dynamic_rr)) def run_backtest( pipeline: HierarchicalPipeline, df_5m: pd.DataFrame, df_15m: pd.DataFrame, symbol: str, strategy: FilterStrategy, horizon_bars: int = 3, step_bars: int = 1 ) -> List[TradeResult]: """Run backtest with specific strategy.""" trades = [] min_lookback = 100 df_5m = df_5m.sort_index() df_15m = df_15m.sort_index() df_5m_feat = generate_features(df_5m) df_15m_feat = generate_features(df_15m) valid_start_5m = df_5m_feat.index[min_lookback * 3] valid_start_15m = df_15m_feat.index[min_lookback] common_start = max(valid_start_5m, valid_start_15m) df_15m_test = df_15m_feat[df_15m_feat.index >= common_start].iloc[:-horizon_bars] logger.info(f"Backtesting {len(df_15m_test)} bars with strategy '{strategy.name}'...") for i in range(0, len(df_15m_test), step_bars): current_time = df_15m_test.index[i] df_5m_slice = df_5m_feat[df_5m_feat.index <= current_time].tail(min_lookback * 3) df_15m_slice = df_15m_feat[df_15m_feat.index <= current_time].tail(min_lookback) if len(df_5m_slice) < min_lookback or len(df_15m_slice) < 50: continue try: result = pipeline.predict(df_5m_slice, df_15m_slice, symbol) entry_price = float(df_15m_slice['close'].iloc[-1]) # Determine direction delta_high = result.delta_high_final delta_low = result.delta_low_final if delta_high > delta_low * 1.1: direction = 'long' elif delta_low > delta_high * 1.1: direction = 'short' else: momentum = (df_15m_slice['close'].iloc[-1] / df_15m_slice['close'].iloc[-5]) - 1 direction = 'long' if momentum > 0 else 'short' # Check if trade passes filters passed_filter = should_trade(result, strategy) # Calculate R:R rr = calculate_rr(result, strategy, direction) # Calculate SL and TP if direction == 'long': stop_loss = entry_price - delta_low risk = entry_price - stop_loss take_profit = entry_price + (risk * rr) else: stop_loss = entry_price + delta_high risk = stop_loss - entry_price take_profit = entry_price - (risk * rr) # Get future data future_start_idx = df_15m_feat.index.get_loc(current_time) future_end_idx = min(future_start_idx + horizon_bars, len(df_15m_feat)) future_data = df_15m_feat.iloc[future_start_idx:future_end_idx] if len(future_data) < 2: continue actual_high = future_data['high'].max() actual_low = future_data['low'].min() # Determine outcome if direction == 'long': hit_tp = actual_high >= take_profit hit_sl = actual_low <= stop_loss if hit_tp and hit_sl: high_dist = actual_high - entry_price low_dist = entry_price - actual_low hit_tp = high_dist >= low_dist hit_sl = not hit_tp if hit_tp: profit_r = rr elif hit_sl: profit_r = -1.0 else: actual_pnl = future_data['close'].iloc[-1] - entry_price profit_r = actual_pnl / risk if risk > 0 else 0 else: hit_tp = actual_low <= take_profit hit_sl = actual_high >= stop_loss if hit_tp and hit_sl: high_dist = actual_high - entry_price low_dist = entry_price - actual_low hit_tp = low_dist >= high_dist hit_sl = not hit_tp if hit_tp: profit_r = rr elif hit_sl: profit_r = -1.0 else: actual_pnl = entry_price - future_data['close'].iloc[-1] profit_r = actual_pnl / risk if risk > 0 else 0 avg_attention = (result.attention_score_5m + result.attention_score_15m) / 2 trade = TradeResult( timestamp=current_time, symbol=symbol, direction=direction, entry_price=entry_price, stop_loss=stop_loss, take_profit=take_profit, risk=risk, reward=risk * rr, risk_reward=rr, actual_high=actual_high, actual_low=actual_low, hit_tp=hit_tp, hit_sl=hit_sl, profit_r=profit_r, attention_score=avg_attention, attention_class_5m=result.attention_class_5m, attention_class_15m=result.attention_class_15m, confidence=result.confidence, confidence_proba=result.confidence_proba, delta_high_pred=delta_high, delta_low_pred=delta_low, strategy=strategy.name, passed_filter=passed_filter ) trades.append(trade) except Exception as e: logger.debug(f"Prediction failed at {current_time}: {e}") continue if (i + 1) % 1000 == 0: logger.info(f" Processed {i + 1}/{len(df_15m_test)} bars...") return trades def calculate_metrics(trades: List[TradeResult], strategy: FilterStrategy, symbol: str) -> StrategyMetrics: """Calculate strategy metrics.""" if not trades: return None all_trades = trades total_signals = len(all_trades) executed = [t for t in trades if t.passed_filter] filtered_out = total_signals - len(executed) filter_rate = filtered_out / total_signals if total_signals > 0 else 0 if not executed: return StrategyMetrics( strategy_name=strategy.name, strategy_description=strategy.description, symbol=symbol, period=f"{min(t.timestamp for t in trades).strftime('%Y-%m-%d')} to {max(t.timestamp for t in trades).strftime('%Y-%m-%d')}", total_signals=total_signals, filtered_out=filtered_out, executed_trades=0, filter_rate=filter_rate, wins=0, losses=0, win_rate=0, total_profit_r=0, avg_profit_r=0, expectancy=0, profit_factor=0, max_consecutive_losses=0, max_drawdown_r=0, avg_attention_winners=0, avg_attention_losers=0, avg_confidence_winners=0, avg_confidence_losers=0, avg_rr_used=strategy.base_rr ) wins = [t for t in executed if t.profit_r > 0] losses = [t for t in executed if t.profit_r <= 0] win_rate = len(wins) / len(executed) if executed else 0 total_profit_r = sum(t.profit_r for t in executed) avg_profit_r = total_profit_r / len(executed) if executed else 0 avg_win = sum(t.profit_r for t in wins) / len(wins) if wins else 0 avg_loss = abs(sum(t.profit_r for t in losses) / len(losses)) if losses else 0 expectancy = (win_rate * avg_win) - ((1 - win_rate) * avg_loss) gross_profit = sum(t.profit_r for t in wins) gross_loss = abs(sum(t.profit_r for t in losses)) profit_factor = gross_profit / gross_loss if gross_loss > 0 else float('inf') # Risk metrics consecutive_losses = 0 max_consecutive_losses = 0 equity_curve = [] cumulative = 0 for t in executed: cumulative += t.profit_r equity_curve.append(cumulative) if t.profit_r <= 0: consecutive_losses += 1 max_consecutive_losses = max(max_consecutive_losses, consecutive_losses) else: consecutive_losses = 0 peak = 0 max_dd = 0 for eq in equity_curve: if eq > peak: peak = eq dd = peak - eq if dd > max_dd: max_dd = dd # Analysis avg_attention_winners = np.mean([t.attention_score for t in wins]) if wins else 0 avg_attention_losers = np.mean([t.attention_score for t in losses]) if losses else 0 avg_confidence_winners = np.mean([t.confidence_proba for t in wins]) if wins else 0 avg_confidence_losers = np.mean([t.confidence_proba for t in losses]) if losses else 0 avg_rr_used = np.mean([t.risk_reward for t in executed]) if executed else strategy.base_rr start_date = min(t.timestamp for t in trades) end_date = max(t.timestamp for t in trades) period = f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}" return StrategyMetrics( strategy_name=strategy.name, strategy_description=strategy.description, symbol=symbol, period=period, total_signals=total_signals, filtered_out=filtered_out, executed_trades=len(executed), filter_rate=round(filter_rate, 4), wins=len(wins), losses=len(losses), win_rate=round(win_rate, 4), total_profit_r=round(total_profit_r, 2), avg_profit_r=round(avg_profit_r, 4), expectancy=round(expectancy, 4), profit_factor=round(profit_factor, 2), max_consecutive_losses=max_consecutive_losses, max_drawdown_r=round(max_dd, 2), avg_attention_winners=round(avg_attention_winners, 3), avg_attention_losers=round(avg_attention_losers, 3), avg_confidence_winners=round(avg_confidence_winners, 3), avg_confidence_losers=round(avg_confidence_losers, 3), avg_rr_used=round(avg_rr_used, 2) ) def print_metrics(metrics: StrategyMetrics): """Print strategy metrics.""" print(f"\n{'=' * 70}") print(f"STRATEGY: {metrics.strategy_name}") print(f"Description: {metrics.strategy_description}") print(f"{'=' * 70}") print(f"Symbol: {metrics.symbol} | Period: {metrics.period}") print(f"\n--- Trade Statistics ---") print(f"Total Signals: {metrics.total_signals}") print(f"Filtered Out: {metrics.filtered_out} ({metrics.filter_rate * 100:.1f}%)") print(f"Executed Trades: {metrics.executed_trades}") print(f"Wins: {metrics.wins} | Losses: {metrics.losses}") # Win Rate wr_status = "PASS" if metrics.win_rate >= 0.40 else "FAIL" print(f"\n--- Key Metrics ---") print(f"Win Rate: {metrics.win_rate * 100:.1f}% (target: 40%) [{wr_status}]") # Expectancy exp_status = "PASS" if metrics.expectancy >= 0.10 else ("IMPROVED" if metrics.expectancy > -0.04 else "FAIL") print(f"Expectancy: {metrics.expectancy:.4f} (target: 0.10) [{exp_status}]") print(f"Profit Factor: {metrics.profit_factor:.2f}") print(f"Total Profit (R): {metrics.total_profit_r:.2f}") print(f"Avg R:R Used: {metrics.avg_rr_used:.2f}") print(f"\n--- Risk ---") print(f"Max Consecutive Losses: {metrics.max_consecutive_losses}") print(f"Max Drawdown (R): {metrics.max_drawdown_r:.2f}") print(f"\n--- Analysis ---") print(f"Avg Attention (Winners): {metrics.avg_attention_winners:.3f}") print(f"Avg Attention (Losers): {metrics.avg_attention_losers:.3f}") print(f"Avg Confidence (Winners): {metrics.avg_confidence_winners:.3f}") print(f"Avg Confidence (Losers): {metrics.avg_confidence_losers:.3f}") def print_comparison(all_metrics: List[StrategyMetrics]): """Print comparison table.""" print(f"\n{'=' * 90}") print("STRATEGY COMPARISON") print(f"{'=' * 90}") print(f"{'Strategy':<25} {'Trades':>8} {'Filter%':>8} {'WinRate':>8} {'Expect':>10} {'PF':>6} {'Profit(R)':>10}") print("-" * 90) for m in sorted(all_metrics, key=lambda x: x.expectancy, reverse=True): wr_str = f"{m.win_rate * 100:.1f}%" print(f"{m.strategy_name:<25} {m.executed_trades:>8} {m.filter_rate * 100:>7.1f}% {wr_str:>8} {m.expectancy:>10.4f} {m.profit_factor:>6.2f} {m.total_profit_r:>10.2f}") print(f"{'=' * 90}") # Find best strategy best = max(all_metrics, key=lambda x: x.expectancy) print(f"\nBest Strategy by Expectancy: {best.strategy_name}") print(f" Expectancy: {best.expectancy:.4f}") print(f" Win Rate: {best.win_rate * 100:.1f}%") print(f" Profit Factor: {best.profit_factor:.2f}") def main(): parser = argparse.ArgumentParser(description='Enhanced Hierarchical Pipeline Backtest') parser.add_argument('--symbols', nargs='+', default=['XAUUSD'], help='Symbols to backtest') parser.add_argument('--start-date', type=str, default='2024-09-01') parser.add_argument('--end-date', type=str, default='2024-12-31') parser.add_argument('--strategy', type=str, default='all', choices=['all'] + list(STRATEGIES.keys()), help='Strategy to test') parser.add_argument('--step', type=int, default=3) parser.add_argument('--models-dir', type=str, default='models') parser.add_argument('--output-dir', type=str, default='models/backtest_results_v2') args = parser.parse_args() output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) setup_logging(output_dir / 'logs', 'hierarchical_backtest_v2') logger.info("=" * 70) logger.info("HIERARCHICAL PIPELINE BACKTEST V2 - STRATEGY COMPARISON") logger.info("=" * 70) # Initialize pipeline config = PipelineConfig( attention_model_path=f'{args.models_dir}/attention', base_model_path=f'{args.models_dir}/symbol_timeframe_models', metamodel_path=f'{args.models_dir}/metamodels' ) pipeline = HierarchicalPipeline(config) # Determine strategies to test if args.strategy == 'all': strategies_to_test = list(STRATEGIES.values()) else: strategies_to_test = [STRATEGIES[args.strategy]] all_results = [] for symbol in args.symbols: logger.info(f"\nProcessing {symbol}...") if not pipeline.load_models(symbol): logger.warning(f"Could not load models for {symbol}") continue # Load data once try: df_5m = load_ohlcv_from_mysql(symbol, '5m', args.start_date, args.end_date) df_15m = load_ohlcv_from_mysql(symbol, '15m', args.start_date, args.end_date) if df_5m.empty or df_15m.empty: continue except Exception as e: logger.error(f"Data loading failed: {e}") continue symbol_metrics = [] for strategy in strategies_to_test: logger.info(f"\nTesting strategy: {strategy.name}") trades = run_backtest( pipeline=pipeline, df_5m=df_5m, df_15m=df_15m, symbol=symbol, strategy=strategy, step_bars=args.step ) if trades: metrics = calculate_metrics(trades, strategy, symbol) if metrics: symbol_metrics.append(metrics) print_metrics(metrics) if symbol_metrics: print_comparison(symbol_metrics) all_results.extend(symbol_metrics) # Save results if all_results: results_file = output_dir / f'strategy_comparison_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json' with open(results_file, 'w') as f: json.dump([asdict(m) for m in all_results], f, indent=2, default=str) logger.info(f"\nResults saved to: {results_file}") logger.info("\nBACKTEST V2 COMPLETE") if __name__ == "__main__": main()