trading-platform-ml-engine-v2/scripts/run_visualization.py

#!/usr/bin/env python3
"""
ML Models Visualization Script
==============================
Visualizes predictions from all 5 ML models for a specified date range.

Models Visualized:
1. RangePredictor - Predicts delta high/low as percentage
2. EnhancedRangePredictor - Enhanced predictor with dual-horizon ensemble
3. MovementMagnitudePredictor - Predicts movement magnitude in USD
4. AMDDetectorML - Detects AMD phases (Accumulation, Manipulation, Distribution)
5. TPSLClassifier - Predicts TP/SL probability

Default period: Second week of January 2025 (out-of-sample)

Usage:
    python scripts/run_visualization.py --symbol XAUUSD --timeframe 15m --start 2025-01-06 --end 2025-01-12
    python scripts/run_visualization.py --symbol BTCUSD --timeframe 5m
    python scripts/run_visualization.py --all-symbols --timeframe 15m

Author: ML-Specialist (NEXUS v4.0)
Date: 2026-01-05
"""

import sys
import os

# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))

import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
import argparse
from typing import Dict, List, Optional, Tuple, Any
import json
from loguru import logger
import joblib
import yaml

# Visualization libraries
try:
    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates
    from matplotlib.patches import Rectangle
    from matplotlib.lines import Line2D
    HAS_MATPLOTLIB = True
except ImportError:
    HAS_MATPLOTLIB = False
    logger.warning("matplotlib not available - install with: pip install matplotlib")

try:
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    import plotly.express as px
    HAS_PLOTLY = True
except ImportError:
    HAS_PLOTLY = False
    logger.warning("plotly not available - install with: pip install plotly kaleido")

# Local imports
from data.database import MySQLConnection
from data.features import FeatureEngineer


# ==============================================================================
# Model Loading Functions
# ==============================================================================

def load_range_predictor(model_path: str, timeframe: str = "15m", horizon: str = "scalping"):
    """Load RangePredictor models."""
    path = Path(model_path) / "range_predictor" / timeframe
    if not path.exists():
        logger.warning(f"RangePredictor not found at {path}")
        return None, None

    models = {}
    metadata = {}

    for model_file in path.glob("*.joblib"):
        name = model_file.stem
        if name == 'metadata':
            metadata = joblib.load(model_file)
        else:
            models[name] = joblib.load(model_file)
            logger.info(f"Loaded RangePredictor model: {name}")

    return models, metadata


def load_movement_predictor(model_path: str, horizon_key: str = "15m_60min"):
    """Load MovementMagnitudePredictor."""
    from models.movement_magnitude_predictor import MovementMagnitudePredictor

    path = Path(model_path) / "movement_predictor" / horizon_key
    if not path.exists():
        logger.warning(f"MovementPredictor not found at {path}")
        return None

    predictor = MovementMagnitudePredictor()
    try:
        predictor.load(str(path))
        logger.info(f"Loaded MovementMagnitudePredictor from {path}")
        return predictor
    except Exception as e:
        logger.error(f"Failed to load MovementPredictor: {e}")
        return None


def load_amd_detector(model_path: str):
    """Load AMDDetectorML."""
    from models.amd_detector_ml import AMDDetectorML

    path = Path(model_path) / "amd_detector"
    if not path.exists():
        logger.warning(f"AMDDetector not found at {path}")
        return None

    detector = AMDDetectorML(use_gpu=False)
    try:
        detector.load(str(path))
        logger.info(f"Loaded AMDDetectorML from {path}")
        return detector
    except Exception as e:
        logger.error(f"Failed to load AMDDetector: {e}")
        return None


def load_tpsl_classifier(model_path: str):
    """Load TPSLClassifier if available."""
    from models.tp_sl_classifier import TPSLClassifier

    path = Path(model_path) / "tpsl_classifier"
    if not path.exists():
        logger.warning(f"TPSLClassifier not found at {path}")
        return None

    classifier = TPSLClassifier()
    try:
        classifier.load(str(path))
        logger.info(f"Loaded TPSLClassifier from {path}")
        return classifier
    except Exception as e:
        logger.error(f"Failed to load TPSLClassifier: {e}")
        return None


# ==============================================================================
# Feature Preparation
# ==============================================================================

def prepare_features(df: pd.DataFrame, expected_features: List[str] = None) -> pd.DataFrame:
    """Prepare features matching training."""
    feature_eng = FeatureEngineer()

    df_processed = df.copy()
    df_processed = feature_eng.create_price_features(df_processed)
    df_processed = feature_eng.create_volume_features(df_processed)
    df_processed = feature_eng.create_time_features(df_processed)
    df_processed = feature_eng.create_rolling_features(
        df_processed,
        columns=['close', 'volume', 'high', 'low'],
        windows=[5, 10, 20]
    )

    # Add missing features
    if 'obv' not in df_processed.columns:
        df_processed['obv'] = (np.sign(df_processed['close'].diff()) * df_processed['volume']).cumsum()

    if 'vpt' not in df_processed.columns:
        df_processed['vpt'] = (df_processed['close'].pct_change() * df_processed['volume']).cumsum()

    # Session features
    if isinstance(df_processed.index, pd.DatetimeIndex):
        hour = df_processed.index.hour
        if 'is_london' not in df_processed.columns:
            df_processed['is_london'] = ((hour >= 8) & (hour < 16)).astype(int)
        if 'is_newyork' not in df_processed.columns:
            df_processed['is_newyork'] = ((hour >= 13) & (hour < 21)).astype(int)
        if 'is_tokyo' not in df_processed.columns:
            df_processed['is_tokyo'] = ((hour >= 0) & (hour < 8)).astype(int)

    # Fill any missing required features with 0
    if expected_features:
        for col in expected_features:
            if col not in df_processed.columns:
                df_processed[col] = 0

    return df_processed.dropna()


def get_feature_columns(df: pd.DataFrame, exclude_ohlcv: bool = True) -> List[str]:
    """Get feature columns excluding OHLCV and targets."""
    exclude = ['open', 'high', 'low', 'close', 'volume', 'vwap'] if exclude_ohlcv else []
    exclude += [c for c in df.columns if c.startswith('target_')]
    exclude += [c for c in df.columns if c.startswith('pred_')]

    return [c for c in df.columns
            if c not in exclude
            and df[c].dtype in ['float64', 'float32', 'int64', 'int32']]


# ==============================================================================
# Prediction Functions
# ==============================================================================

def predict_with_range_models(
    models: Dict,
    X: np.ndarray,
    horizon: str = "scalping"
) -> Dict[str, np.ndarray]:
    """Generate predictions with RangePredictor models."""
    predictions = {}

    for name, model in models.items():
        if horizon in name:
            if 'high' in name and 'direction' not in name:
                predictions['delta_high'] = model.predict(X)
            elif 'low' in name and 'direction' not in name:
                predictions['delta_low'] = model.predict(X)
            elif 'direction' in name:
                predictions['direction'] = model.predict(X)

    return predictions


def predict_with_movement_predictor(
    predictor,
    df: pd.DataFrame,
    feature_cols: List[str] = None
) -> Dict[str, np.ndarray]:
    """Generate predictions with MovementMagnitudePredictor."""
    if predictor is None:
        return {}

    try:
        # Use predictor's stored feature columns if available
        if hasattr(predictor, 'feature_columns') and predictor.feature_columns:
            logger.info(f"Movement predictor expects {len(predictor.feature_columns)} features")
            # Let the predictor create its own features
            predictions_list = predictor.predict(df)
        else:
            predictions_list = predictor.predict(df, feature_cols)

        if not predictions_list:
            return {}

        # Aggregate predictions by index
        result = {
            'high_usd': np.array([p.predicted_high_usd for p in predictions_list]),
            'low_usd': np.array([p.predicted_low_usd for p in predictions_list]),
            'direction': np.array([p.suggested_direction for p in predictions_list]),
            'asymmetry': np.array([p.asymmetry_ratio for p in predictions_list]),
            'confidence': np.array([p.confidence for p in predictions_list])
        }

        return result
    except Exception as e:
        logger.error(f"Movement predictor failed: {e}")
        return {}


def predict_with_amd_detector(
    detector,
    df: pd.DataFrame
) -> Dict[str, Any]:
    """Generate predictions with AMDDetectorML."""
    if detector is None:
        return {}

    try:
        predictions = detector.predict(df)

        if not predictions:
            return {}

        return {
            'phase': np.array([p.phase for p in predictions]),
            'phase_label': np.array([p.phase_label for p in predictions]),
            'confidence': np.array([p.confidence for p in predictions]),
            'trading_bias': np.array([p.trading_bias for p in predictions])
        }
    except Exception as e:
        logger.error(f"AMD detector prediction failed: {e}")
        return {}


# ==============================================================================
# Visualization with Matplotlib
# ==============================================================================

def create_matplotlib_chart(
    df: pd.DataFrame,
    range_preds: Dict,
    movement_preds: Dict,
    amd_preds: Dict,
    symbol: str,
    timeframe: str,
    output_path: Path,
    date_str: str = None
):
    """Create visualization chart with matplotlib."""
    if not HAS_MATPLOTLIB:
        logger.error("matplotlib not available")
        return None

    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates

    # Create figure with subplots
    fig, axes = plt.subplots(4, 1, figsize=(16, 14), sharex=True,
                             gridspec_kw={'height_ratios': [3, 1, 1, 1]})
    fig.suptitle(f'{symbol} - {timeframe} ML Predictions\n{date_str or ""}', fontsize=14)

    # ---- Subplot 1: OHLC with Range Predictions ----
    ax1 = axes[0]

    # Plot candlesticks manually
    for idx, (time, row) in enumerate(df.iterrows()):
        color = 'green' if row['close'] >= row['open'] else 'red'
        # Body
        ax1.add_patch(Rectangle(
            (mdates.date2num(time) - 0.0002, min(row['open'], row['close'])),
            0.0004, abs(row['close'] - row['open']),
            facecolor=color, edgecolor=color, alpha=0.8
        ))
        # Wick
        ax1.plot([mdates.date2num(time), mdates.date2num(time)],
                [row['low'], row['high']], color=color, linewidth=0.5)

    # Plot range predictions as bands
    if range_preds and 'delta_high' in range_preds and 'delta_low' in range_preds:
        close_prices = df['close'].values
        n_preds = min(len(range_preds['delta_high']), len(df))
        times = [mdates.date2num(t) for t in df.index[:n_preds]]

        # Upper band (predicted high delta)
        upper_band = close_prices[:n_preds] * (1 + range_preds['delta_high'][:n_preds])
        # Lower band (predicted low delta)
        lower_band = close_prices[:n_preds] * (1 - abs(range_preds['delta_low'][:n_preds]))

        ax1.fill_between(df.index[:n_preds], lower_band, upper_band,
                        alpha=0.2, color='blue', label='Range Prediction')
        ax1.plot(df.index[:n_preds], upper_band, 'b--', linewidth=0.8, alpha=0.7)
        ax1.plot(df.index[:n_preds], lower_band, 'b--', linewidth=0.8, alpha=0.7)

    # Plot movement predictions as additional bands
    if movement_preds and 'high_usd' in movement_preds:
        close_prices = df['close'].values
        n_preds = min(len(movement_preds['high_usd']), len(df))

        upper_move = close_prices[:n_preds] + movement_preds['high_usd'][:n_preds]
        lower_move = close_prices[:n_preds] - movement_preds['low_usd'][:n_preds]

        ax1.plot(df.index[:n_preds], upper_move, 'g-', linewidth=1.2, alpha=0.7, label='Movement High')
        ax1.plot(df.index[:n_preds], lower_move, 'r-', linewidth=1.2, alpha=0.7, label='Movement Low')

    ax1.set_ylabel('Price')
    ax1.legend(loc='upper left')
    ax1.grid(True, alpha=0.3)

    # ---- Subplot 2: AMD Phase Detection ----
    ax2 = axes[1]

    if amd_preds and 'phase_label' in amd_preds:
        phase_labels = amd_preds['phase_label']
        n_preds = min(len(phase_labels), len(df))

        # Color mapping for phases
        phase_colors = {
            0: 'gray',      # Unknown
            1: 'green',     # Accumulation
            2: 'yellow',    # Manipulation
            3: 'red'        # Distribution
        }

        for i in range(n_preds):
            color = phase_colors.get(phase_labels[i], 'gray')
            ax2.axvspan(df.index[i], df.index[min(i+1, len(df)-1)],
                       alpha=0.5, color=color)

        # Legend for AMD phases
        from matplotlib.patches import Patch
        legend_elements = [
            Patch(facecolor='green', alpha=0.5, label='Accumulation'),
            Patch(facecolor='yellow', alpha=0.5, label='Manipulation'),
            Patch(facecolor='red', alpha=0.5, label='Distribution'),
            Patch(facecolor='gray', alpha=0.5, label='Unknown')
        ]
        ax2.legend(handles=legend_elements, loc='upper right', fontsize=8)
    else:
        ax2.text(0.5, 0.5, 'AMD Detector not loaded', transform=ax2.transAxes,
                ha='center', va='center', fontsize=12, color='gray')

    ax2.set_ylabel('AMD Phase')
    ax2.set_yticks([])
    ax2.grid(True, alpha=0.3)

    # ---- Subplot 3: Movement Magnitude Confidence ----
    ax3 = axes[2]

    if movement_preds and 'confidence' in movement_preds:
        n_preds = min(len(movement_preds['confidence']), len(df))
        ax3.bar(df.index[:n_preds], movement_preds['confidence'][:n_preds],
               width=0.0005, alpha=0.7, color='purple')
        ax3.axhline(y=0.6, color='red', linestyle='--', linewidth=1, label='Confidence Threshold')
    else:
        ax3.text(0.5, 0.5, 'Movement Predictor not loaded', transform=ax3.transAxes,
                ha='center', va='center', fontsize=12, color='gray')

    ax3.set_ylabel('Confidence')
    ax3.set_ylim(0, 1)
    ax3.grid(True, alpha=0.3)

    # ---- Subplot 4: Asymmetry Ratio / Direction Signals ----
    ax4 = axes[3]

    if movement_preds and 'asymmetry' in movement_preds:
        n_preds = min(len(movement_preds['asymmetry']), len(df))
        asymmetry = movement_preds['asymmetry'][:n_preds]

        # Color by direction signal
        colors = ['green' if a > 1.5 else 'red' if a < 0.67 else 'gray' for a in asymmetry]
        ax4.bar(df.index[:n_preds], asymmetry, width=0.0005, color=colors, alpha=0.7)
        ax4.axhline(y=1.5, color='green', linestyle='--', linewidth=1, label='Long Threshold')
        ax4.axhline(y=0.67, color='red', linestyle='--', linewidth=1, label='Short Threshold')
        ax4.axhline(y=1.0, color='black', linestyle='-', linewidth=0.5)

    ax4.set_ylabel('Asymmetry')
    ax4.set_xlabel('Time')
    ax4.grid(True, alpha=0.3)

    # Format x-axis
    ax4.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M'))
    plt.xticks(rotation=45)

    plt.tight_layout()

    # Save chart
    output_file = output_path / f"{symbol}_{timeframe}_{date_str or 'full'}.png"
    plt.savefig(output_file, dpi=150, bbox_inches='tight')
    logger.info(f"Saved chart to {output_file}")

    plt.close(fig)
    return output_file


# ==============================================================================
# Visualization with Plotly (Interactive)
# ==============================================================================

def create_plotly_chart(
    df: pd.DataFrame,
    range_preds: Dict,
    movement_preds: Dict,
    amd_preds: Dict,
    symbol: str,
    timeframe: str,
    output_path: Path,
    date_str: str = None
):
    """Create interactive visualization chart with plotly."""
    if not HAS_PLOTLY:
        logger.error("plotly not available")
        return None

    # Create subplots
    fig = make_subplots(
        rows=4, cols=1,
        shared_xaxes=True,
        vertical_spacing=0.05,
        row_heights=[0.5, 0.15, 0.15, 0.2],
        subplot_titles=(
            f'{symbol} - {timeframe} Price & Predictions',
            'AMD Phase Detection',
            'Movement Confidence',
            'Asymmetry Ratio'
        )
    )

    # ---- Row 1: Candlestick Chart with Predictions ----
    fig.add_trace(
        go.Candlestick(
            x=df.index,
            open=df['open'],
            high=df['high'],
            low=df['low'],
            close=df['close'],
            name='OHLC',
            increasing_line_color='green',
            decreasing_line_color='red'
        ),
        row=1, col=1
    )

    # Add range prediction bands
    if range_preds and 'delta_high' in range_preds and 'delta_low' in range_preds:
        close_prices = df['close'].values
        n_preds = min(len(range_preds['delta_high']), len(df))

        upper_band = close_prices[:n_preds] * (1 + range_preds['delta_high'][:n_preds])
        lower_band = close_prices[:n_preds] * (1 - abs(range_preds['delta_low'][:n_preds]))

        fig.add_trace(
            go.Scatter(
                x=df.index[:n_preds], y=upper_band,
                mode='lines', name='Range Upper',
                line=dict(color='blue', dash='dash', width=1),
                opacity=0.7
            ),
            row=1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=df.index[:n_preds], y=lower_band,
                mode='lines', name='Range Lower',
                line=dict(color='blue', dash='dash', width=1),
                fill='tonexty', fillcolor='rgba(0,0,255,0.1)',
                opacity=0.7
            ),
            row=1, col=1
        )

    # Add movement prediction lines
    if movement_preds and 'high_usd' in movement_preds:
        close_prices = df['close'].values
        n_preds = min(len(movement_preds['high_usd']), len(df))

        upper_move = close_prices[:n_preds] + movement_preds['high_usd'][:n_preds]
        lower_move = close_prices[:n_preds] - movement_preds['low_usd'][:n_preds]

        fig.add_trace(
            go.Scatter(
                x=df.index[:n_preds], y=upper_move,
                mode='lines', name='Move High (USD)',
                line=dict(color='green', width=1.5)
            ),
            row=1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=df.index[:n_preds], y=lower_move,
                mode='lines', name='Move Low (USD)',
                line=dict(color='red', width=1.5)
            ),
            row=1, col=1
        )

    # ---- Row 2: AMD Phase Detection ----
    if amd_preds and 'phase_label' in amd_preds:
        phase_labels = amd_preds['phase_label']
        phase_names = amd_preds.get('phase', phase_labels)
        n_preds = min(len(phase_labels), len(df))

        # Color mapping
        color_map = {
            0: 'gray', 1: 'green', 2: 'orange', 3: 'red'
        }
        colors = [color_map.get(int(p), 'gray') for p in phase_labels[:n_preds]]

        fig.add_trace(
            go.Bar(
                x=df.index[:n_preds],
                y=[1] * n_preds,
                marker_color=colors,
                name='AMD Phase',
                text=phase_names[:n_preds],
                hovertemplate='%{text}<extra></extra>',
                showlegend=False
            ),
            row=2, col=1
        )
    else:
        fig.add_annotation(
            text="AMD Detector not loaded",
            xref="x2 domain", yref="y2 domain",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=14, color="gray"),
            row=2, col=1
        )

    # ---- Row 3: Movement Confidence ----
    if movement_preds and 'confidence' in movement_preds:
        n_preds = min(len(movement_preds['confidence']), len(df))

        fig.add_trace(
            go.Bar(
                x=df.index[:n_preds],
                y=movement_preds['confidence'][:n_preds],
                marker_color='purple',
                name='Confidence',
                opacity=0.7
            ),
            row=3, col=1
        )

        # Threshold line
        fig.add_hline(y=0.6, line_dash="dash", line_color="red", row=3, col=1)
    else:
        fig.add_annotation(
            text="Movement Predictor not loaded",
            xref="x3 domain", yref="y3 domain",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=14, color="gray"),
            row=3, col=1
        )

    # ---- Row 4: Asymmetry Ratio ----
    if movement_preds and 'asymmetry' in movement_preds:
        n_preds = min(len(movement_preds['asymmetry']), len(df))
        asymmetry = movement_preds['asymmetry'][:n_preds]

        # Color by direction
        colors = ['green' if a > 1.5 else 'red' if a < 0.67 else 'gray' for a in asymmetry]

        fig.add_trace(
            go.Bar(
                x=df.index[:n_preds],
                y=asymmetry,
                marker_color=colors,
                name='Asymmetry',
                opacity=0.7
            ),
            row=4, col=1
        )

        # Threshold lines
        fig.add_hline(y=1.5, line_dash="dash", line_color="green", row=4, col=1)
        fig.add_hline(y=0.67, line_dash="dash", line_color="red", row=4, col=1)
        fig.add_hline(y=1.0, line_color="black", line_width=0.5, row=4, col=1)

    # Update layout
    fig.update_layout(
        title=f'{symbol} - {timeframe} ML Model Predictions ({date_str or "Full Period"})',
        height=1000,
        showlegend=True,
        xaxis_rangeslider_visible=False,
        template='plotly_white'
    )

    fig.update_yaxes(title_text="Price", row=1, col=1)
    fig.update_yaxes(title_text="Phase", row=2, col=1)
    fig.update_yaxes(title_text="Confidence", range=[0, 1], row=3, col=1)
    fig.update_yaxes(title_text="Asymmetry", row=4, col=1)
    fig.update_xaxes(title_text="Time", row=4, col=1)

    # Save as HTML
    output_file = output_path / f"{symbol}_{timeframe}_{date_str or 'full'}.html"
    fig.write_html(str(output_file))
    logger.info(f"Saved interactive chart to {output_file}")

    # Also save as PNG if kaleido is available
    try:
        png_file = output_path / f"{symbol}_{timeframe}_{date_str or 'full'}_plotly.png"
        fig.write_image(str(png_file), width=1600, height=1000)
        logger.info(f"Saved PNG chart to {png_file}")
    except Exception as e:
        logger.warning(f"Could not save PNG (install kaleido): {e}")

    return output_file


# ==============================================================================
# Main Visualization Function
# ==============================================================================

def run_visualization(
    symbol: str = "XAUUSD",
    timeframe: str = "15m",
    start_date: str = "2025-01-06",
    end_date: str = "2025-01-12",
    output_format: str = "both",  # 'matplotlib', 'plotly', 'both'
    horizon: str = "scalping",
    model_base_path: str = None
):
    """
    Run visualization for all ML models.

    Args:
        symbol: Trading symbol (XAUUSD, BTCUSD, EURUSD)
        timeframe: Timeframe (5m, 15m)
        start_date: Start date (YYYY-MM-DD)
        end_date: End date (YYYY-MM-DD)
        output_format: Output format (matplotlib, plotly, both)
        horizon: Prediction horizon (scalping, intraday)
        model_base_path: Base path for models
    """
    logger.info("=" * 60)
    logger.info("ML MODELS VISUALIZATION")
    logger.info(f"Symbol: {symbol}")
    logger.info(f"Timeframe: {timeframe}")
    logger.info(f"Period: {start_date} to {end_date}")
    logger.info("=" * 60)

    # Set model base path
    if model_base_path is None:
        model_base_path = f"models/ml_first/{symbol}"

    model_path = Path(model_base_path)
    if not model_path.exists():
        logger.error(f"Model path not found: {model_path}")
        logger.info("Available model paths:")
        for p in Path("models/ml_first").glob("*"):
            logger.info(f"  - {p}")
        return None

    # Create output directory
    output_path = Path("charts") / symbol
    output_path.mkdir(parents=True, exist_ok=True)

    # Load data from database
    logger.info("Loading data from database...")
    try:
        db = MySQLConnection('config/database.yaml')
        df_raw = db.get_ticker_data(
            symbol,
            limit=100000,
            start_date=start_date,
            end_date=end_date
        )
    except Exception as e:
        logger.error(f"Failed to load data from database: {e}")
        logger.info("Attempting to create sample data for demonstration...")
        # Create sample data for demo purposes
        dates = pd.date_range(start=start_date, end=end_date, freq=timeframe)
        n = len(dates)
        np.random.seed(42)
        price = 2650 + np.cumsum(np.random.randn(n) * 2)
        df_raw = pd.DataFrame({
            'open': price + np.random.randn(n) * 0.5,
            'high': price + np.abs(np.random.randn(n)) * 5,
            'low': price - np.abs(np.random.randn(n)) * 5,
            'close': price + np.random.randn(n) * 0.5,
            'volume': np.random.randint(100, 1000, n)
        }, index=dates)
        df_raw['high'] = df_raw[['open', 'high', 'close']].max(axis=1)
        df_raw['low'] = df_raw[['open', 'low', 'close']].min(axis=1)

    if df_raw.empty:
        logger.error(f"No data found for {symbol} in the specified period")
        return None

    logger.info(f"Loaded {len(df_raw)} records from {df_raw.index.min()} to {df_raw.index.max()}")

    # Load models
    logger.info("\nLoading ML models...")

    # 1. RangePredictor
    range_models, range_metadata = load_range_predictor(str(model_path), timeframe, horizon)

    # 2. MovementMagnitudePredictor
    horizon_key = "15m_60min" if timeframe == "15m" else "5m_15min"
    movement_predictor = load_movement_predictor(str(model_path), horizon_key)

    # 3. AMDDetectorML
    amd_detector = load_amd_detector(str(model_path))

    # 4. TPSLClassifier (optional)
    tpsl_classifier = load_tpsl_classifier(str(model_path))

    # Get expected features from metadata
    expected_features = None
    if range_metadata:
        fi = range_metadata.get('feature_importance', {})
        if fi:
            first_key = list(fi.keys())[0]
            expected_features = list(fi[first_key].keys())
            logger.info(f"Models expect {len(expected_features)} features")

    # Prepare features
    logger.info("\nPreparing features...")
    df = prepare_features(df_raw.copy(), expected_features)

    if expected_features:
        feature_cols = expected_features
    else:
        feature_cols = get_feature_columns(df)

    logger.info(f"Using {len(feature_cols)} features")

    # Generate predictions
    logger.info("\nGenerating predictions...")

    # Range predictions
    range_preds = {}
    if range_models:
        # Filter to matching features
        available_features = [f for f in feature_cols if f in df.columns]
        X = df[available_features].values
        range_preds = predict_with_range_models(range_models, X, horizon)
        logger.info(f"Generated range predictions: {list(range_preds.keys())}")

    # Movement predictions
    movement_preds = {}
    if movement_predictor:
        # Pass the raw OHLCV data - predictor will create its own features
        movement_preds = predict_with_movement_predictor(movement_predictor, df_raw)
        if movement_preds:
            logger.info(f"Generated movement predictions: {list(movement_preds.keys())}")
        else:
            logger.warning("Movement predictor returned no predictions")

    # AMD predictions
    amd_preds = {}
    if amd_detector:
        amd_preds = predict_with_amd_detector(amd_detector, df_raw)
        logger.info(f"Generated AMD predictions: {list(amd_preds.keys())}")

    # Create date string for filename
    date_str = f"{start_date}_to_{end_date}".replace("-", "")

    # Generate visualizations
    logger.info("\nGenerating visualizations...")

    if output_format in ['matplotlib', 'both'] and HAS_MATPLOTLIB:
        create_matplotlib_chart(
            df, range_preds, movement_preds, amd_preds,
            symbol, timeframe, output_path, date_str
        )

    if output_format in ['plotly', 'both'] and HAS_PLOTLY:
        create_plotly_chart(
            df, range_preds, movement_preds, amd_preds,
            symbol, timeframe, output_path, date_str
        )

    # Generate summary report
    summary = {
        'symbol': symbol,
        'timeframe': timeframe,
        'period': {'start': start_date, 'end': end_date},
        'data_points': len(df),
        'models_loaded': {
            'range_predictor': bool(range_models),
            'movement_predictor': bool(movement_predictor),
            'amd_detector': bool(amd_detector),
            'tpsl_classifier': bool(tpsl_classifier)
        },
        'predictions_generated': {
            'range': list(range_preds.keys()) if range_preds else [],
            'movement': list(movement_preds.keys()) if movement_preds else [],
            'amd': list(amd_preds.keys()) if amd_preds else []
        },
        'output_path': str(output_path)
    }

    # Save summary
    summary_file = output_path / f"summary_{date_str}.json"
    with open(summary_file, 'w') as f:
        json.dump(summary, f, indent=2, default=str)
    logger.info(f"Saved summary to {summary_file}")

    logger.info("\n" + "=" * 60)
    logger.info("VISUALIZATION COMPLETE")
    logger.info(f"Charts saved to: {output_path}")
    logger.info("=" * 60)

    return summary


# ==============================================================================
# CLI Entry Point
# ==============================================================================

def main():
    parser = argparse.ArgumentParser(
        description='Visualize ML model predictions for trading data',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Visualize XAUUSD for second week of January 2025
  python scripts/run_visualization.py --symbol XAUUSD --timeframe 15m

  # Custom date range
  python scripts/run_visualization.py --symbol BTCUSD --start 2025-01-10 --end 2025-01-15

  # All symbols
  python scripts/run_visualization.py --all-symbols

  # Only matplotlib output
  python scripts/run_visualization.py --format matplotlib
        """
    )

    parser.add_argument('--symbol', default='XAUUSD',
                       help='Trading symbol (default: XAUUSD)')
    parser.add_argument('--timeframe', default='15m',
                       help='Timeframe: 5m or 15m (default: 15m)')
    parser.add_argument('--start', default='2025-01-06',
                       help='Start date YYYY-MM-DD (default: 2025-01-06)')
    parser.add_argument('--end', default='2025-01-12',
                       help='End date YYYY-MM-DD (default: 2025-01-12)')
    parser.add_argument('--format', default='both', choices=['matplotlib', 'plotly', 'both'],
                       help='Output format (default: both)')
    parser.add_argument('--horizon', default='scalping',
                       help='Prediction horizon: scalping or intraday (default: scalping)')
    parser.add_argument('--model-path', default=None,
                       help='Base path for models (default: models/ml_first/{symbol})')
    parser.add_argument('--all-symbols', action='store_true',
                       help='Run for all available symbols')

    args = parser.parse_args()

    # List of symbols to process
    if args.all_symbols:
        symbols = ['XAUUSD', 'BTCUSD', 'EURUSD']
    else:
        symbols = [args.symbol]

    # List of timeframes
    timeframes = [args.timeframe]

    # Run for each combination
    results = []
    for symbol in symbols:
        for timeframe in timeframes:
            logger.info(f"\nProcessing {symbol} - {timeframe}...")
            try:
                result = run_visualization(
                    symbol=symbol,
                    timeframe=timeframe,
                    start_date=args.start,
                    end_date=args.end,
                    output_format=args.format,
                    horizon=args.horizon,
                    model_base_path=args.model_path
                )
                if result:
                    results.append(result)
            except Exception as e:
                logger.error(f"Failed to process {symbol} - {timeframe}: {e}")
                import traceback
                traceback.print_exc()

    # Final summary
    print("\n" + "=" * 60)
    print("VISUALIZATION SUMMARY")
    print("=" * 60)
    print(f"Processed {len(results)} symbol/timeframe combinations")
    for r in results:
        print(f"  - {r['symbol']} / {r['timeframe']}: {r['data_points']} data points")
    print(f"\nCharts saved to: charts/")
    print("=" * 60)


if __name__ == "__main__":
    main()