trading-platform-ml-engine-v2/scripts/evaluate_hierarchical_v2.py

#!/usr/bin/env python3
"""
Hierarchical Pipeline Backtesting V2
====================================
Enhanced backtesting with multiple filtering strategies based on findings:
- Inverted attention filter (filter HIGH attention, keep MEDIUM)
- Confidence-based filtering using metamodel probability
- Dynamic R:R based on predicted delta_high/delta_low ratio

Key findings from v1:
- Medium attention (0.8-2.0) has 44.6% win rate
- High attention (>=2.0) has 39.8% win rate
- This suggests we should INVERT the attention filtering logic

Usage:
    python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD EURUSD --strategy medium_attention
    python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD --strategy dynamic_rr
    python scripts/evaluate_hierarchical_v2.py --symbols XAUUSD --strategy all

Author: ML Pipeline
Version: 2.0.0
Created: 2026-01-07
"""

import argparse
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Tuple, Optional, Any
from dataclasses import dataclass, asdict
import json

import numpy as np
import pandas as pd
from loguru import logger

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))

# Import hierarchical pipeline directly
import importlib.util
pipeline_path = Path(__file__).parent.parent / 'src' / 'pipelines' / 'hierarchical_pipeline.py'
spec = importlib.util.spec_from_file_location("hierarchical_pipeline", pipeline_path)
hierarchical_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(hierarchical_module)

HierarchicalPipeline = hierarchical_module.HierarchicalPipeline
PipelineConfig = hierarchical_module.PipelineConfig
PredictionResult = hierarchical_module.PredictionResult


@dataclass
class FilterStrategy:
    """Trading filter strategy configuration"""
    name: str
    description: str

    # Attention filters
    attention_min: float = 0.0      # Minimum attention to trade
    attention_max: float = 999.0    # Maximum attention to trade

    # Confidence filters
    confidence_min: float = 0.0     # Minimum confidence probability
    require_confidence: bool = False # Require confidence=True from metamodel

    # Dynamic R:R
    use_dynamic_rr: bool = False    # Use predicted deltas for R:R
    base_rr: float = 2.0            # Base R:R when not dynamic
    min_rr: float = 1.5             # Minimum R:R for dynamic
    max_rr: float = 4.0             # Maximum R:R for dynamic


# Pre-defined strategies based on findings
STRATEGIES = {
    'baseline': FilterStrategy(
        name='baseline',
        description='No filtering - all trades',
        attention_min=0.0,
        attention_max=999.0,
        confidence_min=0.0,
        require_confidence=False,
        use_dynamic_rr=False,
        base_rr=2.0
    ),
    'medium_attention': FilterStrategy(
        name='medium_attention',
        description='Only medium attention (0.8-2.0) - best win rate from v1',
        attention_min=0.8,
        attention_max=2.0,
        confidence_min=0.0,
        require_confidence=False,
        use_dynamic_rr=False,
        base_rr=2.0
    ),
    'medium_with_confidence': FilterStrategy(
        name='medium_with_confidence',
        description='Medium attention + confidence filter',
        attention_min=0.8,
        attention_max=2.0,
        confidence_min=0.5,
        require_confidence=True,
        use_dynamic_rr=False,
        base_rr=2.0
    ),
    'high_confidence': FilterStrategy(
        name='high_confidence',
        description='Only high confidence trades',
        attention_min=0.0,
        attention_max=999.0,
        confidence_min=0.7,
        require_confidence=True,
        use_dynamic_rr=False,
        base_rr=2.0
    ),
    'dynamic_rr': FilterStrategy(
        name='dynamic_rr',
        description='Medium attention + dynamic R:R from predictions',
        attention_min=0.8,
        attention_max=2.0,
        confidence_min=0.0,
        require_confidence=False,
        use_dynamic_rr=True,
        base_rr=2.0,
        min_rr=1.5,
        max_rr=4.0
    ),
    'aggressive_filter': FilterStrategy(
        name='aggressive_filter',
        description='Medium attention + high confidence + dynamic R:R',
        attention_min=0.8,
        attention_max=1.8,  # Tighter range
        confidence_min=0.6,
        require_confidence=True,
        use_dynamic_rr=True,
        base_rr=2.0,
        min_rr=1.5,
        max_rr=3.5
    ),
    'conservative': FilterStrategy(
        name='conservative',
        description='Very selective - only best setups',
        attention_min=1.0,
        attention_max=1.6,
        confidence_min=0.65,
        require_confidence=True,
        use_dynamic_rr=True,
        base_rr=2.0,
        min_rr=2.0,
        max_rr=3.0
    )
}


@dataclass
class TradeResult:
    """Result of a single trade"""
    timestamp: datetime
    symbol: str
    direction: str
    entry_price: float
    stop_loss: float
    take_profit: float
    risk: float
    reward: float
    risk_reward: float
    actual_high: float
    actual_low: float
    hit_tp: bool
    hit_sl: bool
    profit_r: float
    attention_score: float
    attention_class_5m: int
    attention_class_15m: int
    confidence: bool
    confidence_proba: float
    delta_high_pred: float
    delta_low_pred: float
    strategy: str
    passed_filter: bool


@dataclass
class StrategyMetrics:
    """Metrics for a trading strategy"""
    strategy_name: str
    strategy_description: str
    symbol: str
    period: str

    total_signals: int
    filtered_out: int
    executed_trades: int
    filter_rate: float

    wins: int
    losses: int
    win_rate: float

    total_profit_r: float
    avg_profit_r: float
    expectancy: float
    profit_factor: float

    max_consecutive_losses: int
    max_drawdown_r: float

    avg_attention_winners: float
    avg_attention_losers: float
    avg_confidence_winners: float
    avg_confidence_losers: float

    avg_rr_used: float


def setup_logging(log_dir: Path, experiment_name: str) -> Path:
    """Configure logging."""
    log_dir.mkdir(parents=True, exist_ok=True)
    log_file = log_dir / f"{experiment_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"

    logger.remove()
    logger.add(sys.stderr, level="INFO", format="{time:HH:mm:ss} | {level} | {message}")
    logger.add(log_file, level="DEBUG", rotation="10 MB")

    return log_file


def load_ohlcv_from_mysql(symbol: str, timeframe: str, start_date: str, end_date: str) -> pd.DataFrame:
    """Load OHLCV data from MySQL."""
    from data.database import MySQLConnection

    ticker_map = {
        'XAUUSD': 'C:XAUUSD',
        'EURUSD': 'C:EURUSD',
        'GBPUSD': 'C:GBPUSD',
        'USDJPY': 'C:USDJPY',
        'BTCUSD': 'X:BTCUSD'
    }
    ticker = ticker_map.get(symbol, f'C:{symbol}')

    logger.info(f"Loading {symbol} {timeframe} data from {start_date} to {end_date}...")

    try:
        db = MySQLConnection()

        query = f"""
            SELECT date_agg as timestamp, open, high, low, close, volume
            FROM tickers_agg_data
            WHERE ticker = '{ticker}'
            AND date_agg >= '{start_date}'
            AND date_agg <= '{end_date}'
            ORDER BY date_agg ASC
        """

        df = pd.read_sql(query, db.engine)

        if df.empty:
            logger.warning(f"No data found for {symbol}")
            return df

        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df.set_index('timestamp', inplace=True)
        df.sort_index(inplace=True)

        logger.info(f"  Loaded {len(df)} raw bars")

        # Resample
        agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}

        if timeframe == '5m':
            df = df.resample('5min').agg(agg_dict).dropna()
        elif timeframe == '15m':
            df = df.resample('15min').agg(agg_dict).dropna()

        logger.info(f"  Resampled to {timeframe}: {len(df)} bars")

        return df

    except Exception as e:
        logger.error(f"Failed to load data: {e}")
        raise


def generate_features(df: pd.DataFrame) -> pd.DataFrame:
    """Generate comprehensive feature set."""
    if len(df) == 0:
        return df

    df = df.copy()
    features = pd.DataFrame(index=df.index)

    close = df['close']
    high = df['high']
    low = df['low']
    open_price = df['open']
    volume = df.get('volume', pd.Series(1, index=df.index))

    # Returns
    for period in [1, 3, 5, 10, 20]:
        features[f'returns_{period}'] = close.pct_change(period)

    # Volatility
    for period in [5, 10, 20]:
        features[f'volatility_{period}'] = close.pct_change().rolling(period).std()

    # Range
    candle_range = high - low
    features['range'] = candle_range
    features['range_pct'] = candle_range / close
    for period in [5, 10, 20]:
        features[f'range_ma_{period}'] = candle_range.rolling(period).mean()
    features['range_ratio_5'] = candle_range / features['range_ma_5']
    features['range_ratio_20'] = candle_range / features['range_ma_20']

    # ATR
    tr1 = high - low
    tr2 = abs(high - close.shift(1))
    tr3 = abs(low - close.shift(1))
    true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
    features['atr_5'] = true_range.rolling(5).mean()
    features['atr_14'] = true_range.rolling(14).mean()
    features['atr_20'] = true_range.rolling(20).mean()
    features['atr_ratio'] = true_range / features['atr_14']

    # Moving Averages
    sma_5 = close.rolling(5).mean()
    sma_10 = close.rolling(10).mean()
    sma_20 = close.rolling(20).mean()
    sma_50 = close.rolling(50).mean()
    ema_5 = close.ewm(span=5, adjust=False).mean()
    ema_20 = close.ewm(span=20, adjust=False).mean()

    features['price_vs_sma5'] = (close - sma_5) / features['atr_14']
    features['price_vs_sma10'] = (close - sma_10) / features['atr_14']
    features['price_vs_sma20'] = (close - sma_20) / features['atr_14']
    features['price_vs_sma50'] = (close - sma_50) / features['atr_14']
    features['sma5_vs_sma20'] = (sma_5 - sma_20) / features['atr_14']
    features['ema5_vs_ema20'] = (ema_5 - ema_20) / features['atr_14']

    # RSI
    delta = close.diff()
    gain = delta.where(delta > 0, 0).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    rs = gain / (loss + 1e-10)
    features['rsi_14'] = 100 - (100 / (1 + rs))
    features['rsi_oversold'] = (features['rsi_14'] < 30).astype(float)
    features['rsi_overbought'] = (features['rsi_14'] > 70).astype(float)

    # Bollinger Bands
    bb_middle = close.rolling(20).mean()
    bb_std = close.rolling(20).std()
    bb_upper = bb_middle + 2 * bb_std
    bb_lower = bb_middle - 2 * bb_std
    features['bb_width'] = (bb_upper - bb_lower) / bb_middle
    features['bb_position'] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10)

    # MACD
    ema_12 = close.ewm(span=12, adjust=False).mean()
    ema_26 = close.ewm(span=26, adjust=False).mean()
    macd = ema_12 - ema_26
    macd_signal = macd.ewm(span=9, adjust=False).mean()
    features['macd'] = macd / features['atr_14']
    features['macd_signal'] = macd_signal / features['atr_14']
    features['macd_hist'] = (macd - macd_signal) / features['atr_14']

    # Momentum
    for period in [5, 10, 20]:
        features[f'momentum_{period}'] = (close - close.shift(period)) / features['atr_14']

    # Stochastic
    low_14 = low.rolling(14).min()
    high_14 = high.rolling(14).max()
    features['stoch_k'] = 100 * (close - low_14) / (high_14 - low_14 + 1e-10)
    features['stoch_d'] = features['stoch_k'].rolling(3).mean()

    # Williams %R
    features['williams_r'] = -100 * (high_14 - close) / (high_14 - low_14 + 1e-10)

    # Volume
    if volume.sum() > 0:
        vol_ma_20 = volume.rolling(20).mean()
        vol_ma_5 = volume.rolling(5).mean()
        features['volume_ratio'] = volume / (vol_ma_20 + 1)
        features['volume_trend'] = (vol_ma_5 - vol_ma_20) / (vol_ma_20 + 1)
    else:
        features['volume_ratio'] = 1.0
        features['volume_trend'] = 0.0

    # Candle patterns
    body = close - open_price
    features['body_pct'] = body / (candle_range + 1e-10)
    features['upper_shadow'] = (high - np.maximum(close, open_price)) / (candle_range + 1e-10)
    features['lower_shadow'] = (np.minimum(close, open_price) - low) / (candle_range + 1e-10)

    # Price position
    features['close_position'] = (close - low) / (candle_range + 1e-10)
    high_5 = high.rolling(5).max()
    low_5 = low.rolling(5).min()
    features['price_position_5'] = (close - low_5) / (high_5 - low_5 + 1e-10)
    high_20 = high.rolling(20).max()
    low_20 = low.rolling(20).min()
    features['price_position_20'] = (close - low_20) / (high_20 - low_20 + 1e-10)

    # Time features
    if hasattr(df.index, 'hour'):
        hour = df.index.hour
        day_of_week = df.index.dayofweek
        features['hour_sin'] = np.sin(2 * np.pi * hour / 24)
        features['hour_cos'] = np.cos(2 * np.pi * hour / 24)
        features['dow_sin'] = np.sin(2 * np.pi * day_of_week / 7)
        features['dow_cos'] = np.cos(2 * np.pi * day_of_week / 7)
        features['is_london'] = ((hour >= 8) & (hour < 16)).astype(float)
        features['is_newyork'] = ((hour >= 13) & (hour < 21)).astype(float)
        features['is_overlap'] = ((hour >= 13) & (hour < 16)).astype(float)

    features = features.replace([np.inf, -np.inf], np.nan)
    result = pd.concat([df[['open', 'high', 'low', 'close', 'volume']], features], axis=1)

    return result


def should_trade(result: PredictionResult, strategy: FilterStrategy) -> bool:
    """Check if trade passes strategy filters."""
    avg_attention = (result.attention_score_5m + result.attention_score_15m) / 2

    # Attention filter
    if avg_attention < strategy.attention_min or avg_attention > strategy.attention_max:
        return False

    # Confidence filter
    if strategy.require_confidence and not result.confidence:
        return False

    if result.confidence_proba < strategy.confidence_min:
        return False

    return True


def calculate_rr(result: PredictionResult, strategy: FilterStrategy, direction: str) -> float:
    """Calculate risk:reward ratio based on strategy."""
    if not strategy.use_dynamic_rr:
        return strategy.base_rr

    # Dynamic R:R based on predicted deltas
    delta_high = abs(result.delta_high_final)
    delta_low = abs(result.delta_low_final)

    if direction == 'long':
        # For long: TP based on high, SL based on low
        if delta_low > 0:
            dynamic_rr = delta_high / delta_low
        else:
            dynamic_rr = strategy.base_rr
    else:
        # For short: TP based on low, SL based on high
        if delta_high > 0:
            dynamic_rr = delta_low / delta_high
        else:
            dynamic_rr = strategy.base_rr

    # Clamp to range
    return max(strategy.min_rr, min(strategy.max_rr, dynamic_rr))


def run_backtest(
    pipeline: HierarchicalPipeline,
    df_5m: pd.DataFrame,
    df_15m: pd.DataFrame,
    symbol: str,
    strategy: FilterStrategy,
    horizon_bars: int = 3,
    step_bars: int = 1
) -> List[TradeResult]:
    """Run backtest with specific strategy."""
    trades = []
    min_lookback = 100

    df_5m = df_5m.sort_index()
    df_15m = df_15m.sort_index()

    df_5m_feat = generate_features(df_5m)
    df_15m_feat = generate_features(df_15m)

    valid_start_5m = df_5m_feat.index[min_lookback * 3]
    valid_start_15m = df_15m_feat.index[min_lookback]
    common_start = max(valid_start_5m, valid_start_15m)

    df_15m_test = df_15m_feat[df_15m_feat.index >= common_start].iloc[:-horizon_bars]

    logger.info(f"Backtesting {len(df_15m_test)} bars with strategy '{strategy.name}'...")

    for i in range(0, len(df_15m_test), step_bars):
        current_time = df_15m_test.index[i]

        df_5m_slice = df_5m_feat[df_5m_feat.index <= current_time].tail(min_lookback * 3)
        df_15m_slice = df_15m_feat[df_15m_feat.index <= current_time].tail(min_lookback)

        if len(df_5m_slice) < min_lookback or len(df_15m_slice) < 50:
            continue

        try:
            result = pipeline.predict(df_5m_slice, df_15m_slice, symbol)

            entry_price = float(df_15m_slice['close'].iloc[-1])

            # Determine direction
            delta_high = result.delta_high_final
            delta_low = result.delta_low_final

            if delta_high > delta_low * 1.1:
                direction = 'long'
            elif delta_low > delta_high * 1.1:
                direction = 'short'
            else:
                momentum = (df_15m_slice['close'].iloc[-1] / df_15m_slice['close'].iloc[-5]) - 1
                direction = 'long' if momentum > 0 else 'short'

            # Check if trade passes filters
            passed_filter = should_trade(result, strategy)

            # Calculate R:R
            rr = calculate_rr(result, strategy, direction)

            # Calculate SL and TP
            if direction == 'long':
                stop_loss = entry_price - delta_low
                risk = entry_price - stop_loss
                take_profit = entry_price + (risk * rr)
            else:
                stop_loss = entry_price + delta_high
                risk = stop_loss - entry_price
                take_profit = entry_price - (risk * rr)

            # Get future data
            future_start_idx = df_15m_feat.index.get_loc(current_time)
            future_end_idx = min(future_start_idx + horizon_bars, len(df_15m_feat))
            future_data = df_15m_feat.iloc[future_start_idx:future_end_idx]

            if len(future_data) < 2:
                continue

            actual_high = future_data['high'].max()
            actual_low = future_data['low'].min()

            # Determine outcome
            if direction == 'long':
                hit_tp = actual_high >= take_profit
                hit_sl = actual_low <= stop_loss

                if hit_tp and hit_sl:
                    high_dist = actual_high - entry_price
                    low_dist = entry_price - actual_low
                    hit_tp = high_dist >= low_dist
                    hit_sl = not hit_tp

                if hit_tp:
                    profit_r = rr
                elif hit_sl:
                    profit_r = -1.0
                else:
                    actual_pnl = future_data['close'].iloc[-1] - entry_price
                    profit_r = actual_pnl / risk if risk > 0 else 0
            else:
                hit_tp = actual_low <= take_profit
                hit_sl = actual_high >= stop_loss

                if hit_tp and hit_sl:
                    high_dist = actual_high - entry_price
                    low_dist = entry_price - actual_low
                    hit_tp = low_dist >= high_dist
                    hit_sl = not hit_tp

                if hit_tp:
                    profit_r = rr
                elif hit_sl:
                    profit_r = -1.0
                else:
                    actual_pnl = entry_price - future_data['close'].iloc[-1]
                    profit_r = actual_pnl / risk if risk > 0 else 0

            avg_attention = (result.attention_score_5m + result.attention_score_15m) / 2

            trade = TradeResult(
                timestamp=current_time,
                symbol=symbol,
                direction=direction,
                entry_price=entry_price,
                stop_loss=stop_loss,
                take_profit=take_profit,
                risk=risk,
                reward=risk * rr,
                risk_reward=rr,
                actual_high=actual_high,
                actual_low=actual_low,
                hit_tp=hit_tp,
                hit_sl=hit_sl,
                profit_r=profit_r,
                attention_score=avg_attention,
                attention_class_5m=result.attention_class_5m,
                attention_class_15m=result.attention_class_15m,
                confidence=result.confidence,
                confidence_proba=result.confidence_proba,
                delta_high_pred=delta_high,
                delta_low_pred=delta_low,
                strategy=strategy.name,
                passed_filter=passed_filter
            )
            trades.append(trade)

        except Exception as e:
            logger.debug(f"Prediction failed at {current_time}: {e}")
            continue

        if (i + 1) % 1000 == 0:
            logger.info(f"  Processed {i + 1}/{len(df_15m_test)} bars...")

    return trades


def calculate_metrics(trades: List[TradeResult], strategy: FilterStrategy, symbol: str) -> StrategyMetrics:
    """Calculate strategy metrics."""
    if not trades:
        return None

    all_trades = trades
    total_signals = len(all_trades)

    executed = [t for t in trades if t.passed_filter]
    filtered_out = total_signals - len(executed)
    filter_rate = filtered_out / total_signals if total_signals > 0 else 0

    if not executed:
        return StrategyMetrics(
            strategy_name=strategy.name,
            strategy_description=strategy.description,
            symbol=symbol,
            period=f"{min(t.timestamp for t in trades).strftime('%Y-%m-%d')} to {max(t.timestamp for t in trades).strftime('%Y-%m-%d')}",
            total_signals=total_signals,
            filtered_out=filtered_out,
            executed_trades=0,
            filter_rate=filter_rate,
            wins=0, losses=0, win_rate=0,
            total_profit_r=0, avg_profit_r=0, expectancy=0, profit_factor=0,
            max_consecutive_losses=0, max_drawdown_r=0,
            avg_attention_winners=0, avg_attention_losers=0,
            avg_confidence_winners=0, avg_confidence_losers=0,
            avg_rr_used=strategy.base_rr
        )

    wins = [t for t in executed if t.profit_r > 0]
    losses = [t for t in executed if t.profit_r <= 0]

    win_rate = len(wins) / len(executed) if executed else 0

    total_profit_r = sum(t.profit_r for t in executed)
    avg_profit_r = total_profit_r / len(executed) if executed else 0

    avg_win = sum(t.profit_r for t in wins) / len(wins) if wins else 0
    avg_loss = abs(sum(t.profit_r for t in losses) / len(losses)) if losses else 0
    expectancy = (win_rate * avg_win) - ((1 - win_rate) * avg_loss)

    gross_profit = sum(t.profit_r for t in wins)
    gross_loss = abs(sum(t.profit_r for t in losses))
    profit_factor = gross_profit / gross_loss if gross_loss > 0 else float('inf')

    # Risk metrics
    consecutive_losses = 0
    max_consecutive_losses = 0
    equity_curve = []
    cumulative = 0

    for t in executed:
        cumulative += t.profit_r
        equity_curve.append(cumulative)
        if t.profit_r <= 0:
            consecutive_losses += 1
            max_consecutive_losses = max(max_consecutive_losses, consecutive_losses)
        else:
            consecutive_losses = 0

    peak = 0
    max_dd = 0
    for eq in equity_curve:
        if eq > peak:
            peak = eq
        dd = peak - eq
        if dd > max_dd:
            max_dd = dd

    # Analysis
    avg_attention_winners = np.mean([t.attention_score for t in wins]) if wins else 0
    avg_attention_losers = np.mean([t.attention_score for t in losses]) if losses else 0
    avg_confidence_winners = np.mean([t.confidence_proba for t in wins]) if wins else 0
    avg_confidence_losers = np.mean([t.confidence_proba for t in losses]) if losses else 0
    avg_rr_used = np.mean([t.risk_reward for t in executed]) if executed else strategy.base_rr

    start_date = min(t.timestamp for t in trades)
    end_date = max(t.timestamp for t in trades)
    period = f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"

    return StrategyMetrics(
        strategy_name=strategy.name,
        strategy_description=strategy.description,
        symbol=symbol,
        period=period,
        total_signals=total_signals,
        filtered_out=filtered_out,
        executed_trades=len(executed),
        filter_rate=round(filter_rate, 4),
        wins=len(wins),
        losses=len(losses),
        win_rate=round(win_rate, 4),
        total_profit_r=round(total_profit_r, 2),
        avg_profit_r=round(avg_profit_r, 4),
        expectancy=round(expectancy, 4),
        profit_factor=round(profit_factor, 2),
        max_consecutive_losses=max_consecutive_losses,
        max_drawdown_r=round(max_dd, 2),
        avg_attention_winners=round(avg_attention_winners, 3),
        avg_attention_losers=round(avg_attention_losers, 3),
        avg_confidence_winners=round(avg_confidence_winners, 3),
        avg_confidence_losers=round(avg_confidence_losers, 3),
        avg_rr_used=round(avg_rr_used, 2)
    )


def print_metrics(metrics: StrategyMetrics):
    """Print strategy metrics."""
    print(f"\n{'=' * 70}")
    print(f"STRATEGY: {metrics.strategy_name}")
    print(f"Description: {metrics.strategy_description}")
    print(f"{'=' * 70}")
    print(f"Symbol: {metrics.symbol} | Period: {metrics.period}")

    print(f"\n--- Trade Statistics ---")
    print(f"Total Signals: {metrics.total_signals}")
    print(f"Filtered Out: {metrics.filtered_out} ({metrics.filter_rate * 100:.1f}%)")
    print(f"Executed Trades: {metrics.executed_trades}")
    print(f"Wins: {metrics.wins} | Losses: {metrics.losses}")

    # Win Rate
    wr_status = "PASS" if metrics.win_rate >= 0.40 else "FAIL"
    print(f"\n--- Key Metrics ---")
    print(f"Win Rate: {metrics.win_rate * 100:.1f}% (target: 40%) [{wr_status}]")

    # Expectancy
    exp_status = "PASS" if metrics.expectancy >= 0.10 else ("IMPROVED" if metrics.expectancy > -0.04 else "FAIL")
    print(f"Expectancy: {metrics.expectancy:.4f} (target: 0.10) [{exp_status}]")

    print(f"Profit Factor: {metrics.profit_factor:.2f}")
    print(f"Total Profit (R): {metrics.total_profit_r:.2f}")
    print(f"Avg R:R Used: {metrics.avg_rr_used:.2f}")

    print(f"\n--- Risk ---")
    print(f"Max Consecutive Losses: {metrics.max_consecutive_losses}")
    print(f"Max Drawdown (R): {metrics.max_drawdown_r:.2f}")

    print(f"\n--- Analysis ---")
    print(f"Avg Attention (Winners): {metrics.avg_attention_winners:.3f}")
    print(f"Avg Attention (Losers): {metrics.avg_attention_losers:.3f}")
    print(f"Avg Confidence (Winners): {metrics.avg_confidence_winners:.3f}")
    print(f"Avg Confidence (Losers): {metrics.avg_confidence_losers:.3f}")


def print_comparison(all_metrics: List[StrategyMetrics]):
    """Print comparison table."""
    print(f"\n{'=' * 90}")
    print("STRATEGY COMPARISON")
    print(f"{'=' * 90}")
    print(f"{'Strategy':<25} {'Trades':>8} {'Filter%':>8} {'WinRate':>8} {'Expect':>10} {'PF':>6} {'Profit(R)':>10}")
    print("-" * 90)

    for m in sorted(all_metrics, key=lambda x: x.expectancy, reverse=True):
        wr_str = f"{m.win_rate * 100:.1f}%"
        print(f"{m.strategy_name:<25} {m.executed_trades:>8} {m.filter_rate * 100:>7.1f}% {wr_str:>8} {m.expectancy:>10.4f} {m.profit_factor:>6.2f} {m.total_profit_r:>10.2f}")

    print(f"{'=' * 90}")

    # Find best strategy
    best = max(all_metrics, key=lambda x: x.expectancy)
    print(f"\nBest Strategy by Expectancy: {best.strategy_name}")
    print(f"  Expectancy: {best.expectancy:.4f}")
    print(f"  Win Rate: {best.win_rate * 100:.1f}%")
    print(f"  Profit Factor: {best.profit_factor:.2f}")


def main():
    parser = argparse.ArgumentParser(description='Enhanced Hierarchical Pipeline Backtest')
    parser.add_argument('--symbols', nargs='+', default=['XAUUSD'],
                        help='Symbols to backtest')
    parser.add_argument('--start-date', type=str, default='2024-09-01')
    parser.add_argument('--end-date', type=str, default='2024-12-31')
    parser.add_argument('--strategy', type=str, default='all',
                        choices=['all'] + list(STRATEGIES.keys()),
                        help='Strategy to test')
    parser.add_argument('--step', type=int, default=3)
    parser.add_argument('--models-dir', type=str, default='models')
    parser.add_argument('--output-dir', type=str, default='models/backtest_results_v2')

    args = parser.parse_args()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    setup_logging(output_dir / 'logs', 'hierarchical_backtest_v2')

    logger.info("=" * 70)
    logger.info("HIERARCHICAL PIPELINE BACKTEST V2 - STRATEGY COMPARISON")
    logger.info("=" * 70)

    # Initialize pipeline
    config = PipelineConfig(
        attention_model_path=f'{args.models_dir}/attention',
        base_model_path=f'{args.models_dir}/symbol_timeframe_models',
        metamodel_path=f'{args.models_dir}/metamodels'
    )
    pipeline = HierarchicalPipeline(config)

    # Determine strategies to test
    if args.strategy == 'all':
        strategies_to_test = list(STRATEGIES.values())
    else:
        strategies_to_test = [STRATEGIES[args.strategy]]

    all_results = []

    for symbol in args.symbols:
        logger.info(f"\nProcessing {symbol}...")

        if not pipeline.load_models(symbol):
            logger.warning(f"Could not load models for {symbol}")
            continue

        # Load data once
        try:
            df_5m = load_ohlcv_from_mysql(symbol, '5m', args.start_date, args.end_date)
            df_15m = load_ohlcv_from_mysql(symbol, '15m', args.start_date, args.end_date)

            if df_5m.empty or df_15m.empty:
                continue
        except Exception as e:
            logger.error(f"Data loading failed: {e}")
            continue

        symbol_metrics = []

        for strategy in strategies_to_test:
            logger.info(f"\nTesting strategy: {strategy.name}")

            trades = run_backtest(
                pipeline=pipeline,
                df_5m=df_5m,
                df_15m=df_15m,
                symbol=symbol,
                strategy=strategy,
                step_bars=args.step
            )

            if trades:
                metrics = calculate_metrics(trades, strategy, symbol)
                if metrics:
                    symbol_metrics.append(metrics)
                    print_metrics(metrics)

        if symbol_metrics:
            print_comparison(symbol_metrics)
            all_results.extend(symbol_metrics)

    # Save results
    if all_results:
        results_file = output_dir / f'strategy_comparison_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'
        with open(results_file, 'w') as f:
            json.dump([asdict(m) for m in all_results], f, indent=2, default=str)
        logger.info(f"\nResults saved to: {results_file}")

    logger.info("\nBACKTEST V2 COMPLETE")


if __name__ == "__main__":
    main()