Hierarchical ML Pipeline for trading predictions:
- Level 0: Attention Models (volatility/flow classification)
- Level 1: Base Models (XGBoost per symbol/timeframe)
- Level 2: Metamodels (XGBoost Stacking + Neural Gating)
Key components:
- src/pipelines/hierarchical_pipeline.py - Main prediction pipeline
- src/models/ - All ML model classes
- src/training/ - Training utilities
- src/api/ - FastAPI endpoints
- scripts/ - Training and evaluation scripts
- config/ - YAML configurations
Note: Trained models (*.joblib, *.pt) are gitignored.
Regenerate with training scripts.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
949 lines
33 KiB
Python
949 lines
33 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
ML Models Visualization Script
|
|
==============================
|
|
Visualizes predictions from all 5 ML models for a specified date range.
|
|
|
|
Models Visualized:
|
|
1. RangePredictor - Predicts delta high/low as percentage
|
|
2. EnhancedRangePredictor - Enhanced predictor with dual-horizon ensemble
|
|
3. MovementMagnitudePredictor - Predicts movement magnitude in USD
|
|
4. AMDDetectorML - Detects AMD phases (Accumulation, Manipulation, Distribution)
|
|
5. TPSLClassifier - Predicts TP/SL probability
|
|
|
|
Default period: Second week of January 2025 (out-of-sample)
|
|
|
|
Usage:
|
|
python scripts/run_visualization.py --symbol XAUUSD --timeframe 15m --start 2025-01-06 --end 2025-01-12
|
|
python scripts/run_visualization.py --symbol BTCUSD --timeframe 5m
|
|
python scripts/run_visualization.py --all-symbols --timeframe 15m
|
|
|
|
Author: ML-Specialist (NEXUS v4.0)
|
|
Date: 2026-01-05
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
|
|
# Add src to path
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
import argparse
|
|
from typing import Dict, List, Optional, Tuple, Any
|
|
import json
|
|
from loguru import logger
|
|
import joblib
|
|
import yaml
|
|
|
|
# Visualization libraries
|
|
try:
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.dates as mdates
|
|
from matplotlib.patches import Rectangle
|
|
from matplotlib.lines import Line2D
|
|
HAS_MATPLOTLIB = True
|
|
except ImportError:
|
|
HAS_MATPLOTLIB = False
|
|
logger.warning("matplotlib not available - install with: pip install matplotlib")
|
|
|
|
try:
|
|
import plotly.graph_objects as go
|
|
from plotly.subplots import make_subplots
|
|
import plotly.express as px
|
|
HAS_PLOTLY = True
|
|
except ImportError:
|
|
HAS_PLOTLY = False
|
|
logger.warning("plotly not available - install with: pip install plotly kaleido")
|
|
|
|
# Local imports
|
|
from data.database import MySQLConnection
|
|
from data.features import FeatureEngineer
|
|
|
|
|
|
# ==============================================================================
|
|
# Model Loading Functions
|
|
# ==============================================================================
|
|
|
|
def load_range_predictor(model_path: str, timeframe: str = "15m", horizon: str = "scalping"):
|
|
"""Load RangePredictor models."""
|
|
path = Path(model_path) / "range_predictor" / timeframe
|
|
if not path.exists():
|
|
logger.warning(f"RangePredictor not found at {path}")
|
|
return None, None
|
|
|
|
models = {}
|
|
metadata = {}
|
|
|
|
for model_file in path.glob("*.joblib"):
|
|
name = model_file.stem
|
|
if name == 'metadata':
|
|
metadata = joblib.load(model_file)
|
|
else:
|
|
models[name] = joblib.load(model_file)
|
|
logger.info(f"Loaded RangePredictor model: {name}")
|
|
|
|
return models, metadata
|
|
|
|
|
|
def load_movement_predictor(model_path: str, horizon_key: str = "15m_60min"):
|
|
"""Load MovementMagnitudePredictor."""
|
|
from models.movement_magnitude_predictor import MovementMagnitudePredictor
|
|
|
|
path = Path(model_path) / "movement_predictor" / horizon_key
|
|
if not path.exists():
|
|
logger.warning(f"MovementPredictor not found at {path}")
|
|
return None
|
|
|
|
predictor = MovementMagnitudePredictor()
|
|
try:
|
|
predictor.load(str(path))
|
|
logger.info(f"Loaded MovementMagnitudePredictor from {path}")
|
|
return predictor
|
|
except Exception as e:
|
|
logger.error(f"Failed to load MovementPredictor: {e}")
|
|
return None
|
|
|
|
|
|
def load_amd_detector(model_path: str):
|
|
"""Load AMDDetectorML."""
|
|
from models.amd_detector_ml import AMDDetectorML
|
|
|
|
path = Path(model_path) / "amd_detector"
|
|
if not path.exists():
|
|
logger.warning(f"AMDDetector not found at {path}")
|
|
return None
|
|
|
|
detector = AMDDetectorML(use_gpu=False)
|
|
try:
|
|
detector.load(str(path))
|
|
logger.info(f"Loaded AMDDetectorML from {path}")
|
|
return detector
|
|
except Exception as e:
|
|
logger.error(f"Failed to load AMDDetector: {e}")
|
|
return None
|
|
|
|
|
|
def load_tpsl_classifier(model_path: str):
|
|
"""Load TPSLClassifier if available."""
|
|
from models.tp_sl_classifier import TPSLClassifier
|
|
|
|
path = Path(model_path) / "tpsl_classifier"
|
|
if not path.exists():
|
|
logger.warning(f"TPSLClassifier not found at {path}")
|
|
return None
|
|
|
|
classifier = TPSLClassifier()
|
|
try:
|
|
classifier.load(str(path))
|
|
logger.info(f"Loaded TPSLClassifier from {path}")
|
|
return classifier
|
|
except Exception as e:
|
|
logger.error(f"Failed to load TPSLClassifier: {e}")
|
|
return None
|
|
|
|
|
|
# ==============================================================================
|
|
# Feature Preparation
|
|
# ==============================================================================
|
|
|
|
def prepare_features(df: pd.DataFrame, expected_features: List[str] = None) -> pd.DataFrame:
|
|
"""Prepare features matching training."""
|
|
feature_eng = FeatureEngineer()
|
|
|
|
df_processed = df.copy()
|
|
df_processed = feature_eng.create_price_features(df_processed)
|
|
df_processed = feature_eng.create_volume_features(df_processed)
|
|
df_processed = feature_eng.create_time_features(df_processed)
|
|
df_processed = feature_eng.create_rolling_features(
|
|
df_processed,
|
|
columns=['close', 'volume', 'high', 'low'],
|
|
windows=[5, 10, 20]
|
|
)
|
|
|
|
# Add missing features
|
|
if 'obv' not in df_processed.columns:
|
|
df_processed['obv'] = (np.sign(df_processed['close'].diff()) * df_processed['volume']).cumsum()
|
|
|
|
if 'vpt' not in df_processed.columns:
|
|
df_processed['vpt'] = (df_processed['close'].pct_change() * df_processed['volume']).cumsum()
|
|
|
|
# Session features
|
|
if isinstance(df_processed.index, pd.DatetimeIndex):
|
|
hour = df_processed.index.hour
|
|
if 'is_london' not in df_processed.columns:
|
|
df_processed['is_london'] = ((hour >= 8) & (hour < 16)).astype(int)
|
|
if 'is_newyork' not in df_processed.columns:
|
|
df_processed['is_newyork'] = ((hour >= 13) & (hour < 21)).astype(int)
|
|
if 'is_tokyo' not in df_processed.columns:
|
|
df_processed['is_tokyo'] = ((hour >= 0) & (hour < 8)).astype(int)
|
|
|
|
# Fill any missing required features with 0
|
|
if expected_features:
|
|
for col in expected_features:
|
|
if col not in df_processed.columns:
|
|
df_processed[col] = 0
|
|
|
|
return df_processed.dropna()
|
|
|
|
|
|
def get_feature_columns(df: pd.DataFrame, exclude_ohlcv: bool = True) -> List[str]:
|
|
"""Get feature columns excluding OHLCV and targets."""
|
|
exclude = ['open', 'high', 'low', 'close', 'volume', 'vwap'] if exclude_ohlcv else []
|
|
exclude += [c for c in df.columns if c.startswith('target_')]
|
|
exclude += [c for c in df.columns if c.startswith('pred_')]
|
|
|
|
return [c for c in df.columns
|
|
if c not in exclude
|
|
and df[c].dtype in ['float64', 'float32', 'int64', 'int32']]
|
|
|
|
|
|
# ==============================================================================
|
|
# Prediction Functions
|
|
# ==============================================================================
|
|
|
|
def predict_with_range_models(
|
|
models: Dict,
|
|
X: np.ndarray,
|
|
horizon: str = "scalping"
|
|
) -> Dict[str, np.ndarray]:
|
|
"""Generate predictions with RangePredictor models."""
|
|
predictions = {}
|
|
|
|
for name, model in models.items():
|
|
if horizon in name:
|
|
if 'high' in name and 'direction' not in name:
|
|
predictions['delta_high'] = model.predict(X)
|
|
elif 'low' in name and 'direction' not in name:
|
|
predictions['delta_low'] = model.predict(X)
|
|
elif 'direction' in name:
|
|
predictions['direction'] = model.predict(X)
|
|
|
|
return predictions
|
|
|
|
|
|
def predict_with_movement_predictor(
|
|
predictor,
|
|
df: pd.DataFrame,
|
|
feature_cols: List[str] = None
|
|
) -> Dict[str, np.ndarray]:
|
|
"""Generate predictions with MovementMagnitudePredictor."""
|
|
if predictor is None:
|
|
return {}
|
|
|
|
try:
|
|
# Use predictor's stored feature columns if available
|
|
if hasattr(predictor, 'feature_columns') and predictor.feature_columns:
|
|
logger.info(f"Movement predictor expects {len(predictor.feature_columns)} features")
|
|
# Let the predictor create its own features
|
|
predictions_list = predictor.predict(df)
|
|
else:
|
|
predictions_list = predictor.predict(df, feature_cols)
|
|
|
|
if not predictions_list:
|
|
return {}
|
|
|
|
# Aggregate predictions by index
|
|
result = {
|
|
'high_usd': np.array([p.predicted_high_usd for p in predictions_list]),
|
|
'low_usd': np.array([p.predicted_low_usd for p in predictions_list]),
|
|
'direction': np.array([p.suggested_direction for p in predictions_list]),
|
|
'asymmetry': np.array([p.asymmetry_ratio for p in predictions_list]),
|
|
'confidence': np.array([p.confidence for p in predictions_list])
|
|
}
|
|
|
|
return result
|
|
except Exception as e:
|
|
logger.error(f"Movement predictor failed: {e}")
|
|
return {}
|
|
|
|
|
|
def predict_with_amd_detector(
|
|
detector,
|
|
df: pd.DataFrame
|
|
) -> Dict[str, Any]:
|
|
"""Generate predictions with AMDDetectorML."""
|
|
if detector is None:
|
|
return {}
|
|
|
|
try:
|
|
predictions = detector.predict(df)
|
|
|
|
if not predictions:
|
|
return {}
|
|
|
|
return {
|
|
'phase': np.array([p.phase for p in predictions]),
|
|
'phase_label': np.array([p.phase_label for p in predictions]),
|
|
'confidence': np.array([p.confidence for p in predictions]),
|
|
'trading_bias': np.array([p.trading_bias for p in predictions])
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"AMD detector prediction failed: {e}")
|
|
return {}
|
|
|
|
|
|
# ==============================================================================
|
|
# Visualization with Matplotlib
|
|
# ==============================================================================
|
|
|
|
def create_matplotlib_chart(
|
|
df: pd.DataFrame,
|
|
range_preds: Dict,
|
|
movement_preds: Dict,
|
|
amd_preds: Dict,
|
|
symbol: str,
|
|
timeframe: str,
|
|
output_path: Path,
|
|
date_str: str = None
|
|
):
|
|
"""Create visualization chart with matplotlib."""
|
|
if not HAS_MATPLOTLIB:
|
|
logger.error("matplotlib not available")
|
|
return None
|
|
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.dates as mdates
|
|
|
|
# Create figure with subplots
|
|
fig, axes = plt.subplots(4, 1, figsize=(16, 14), sharex=True,
|
|
gridspec_kw={'height_ratios': [3, 1, 1, 1]})
|
|
fig.suptitle(f'{symbol} - {timeframe} ML Predictions\n{date_str or ""}', fontsize=14)
|
|
|
|
# ---- Subplot 1: OHLC with Range Predictions ----
|
|
ax1 = axes[0]
|
|
|
|
# Plot candlesticks manually
|
|
for idx, (time, row) in enumerate(df.iterrows()):
|
|
color = 'green' if row['close'] >= row['open'] else 'red'
|
|
# Body
|
|
ax1.add_patch(Rectangle(
|
|
(mdates.date2num(time) - 0.0002, min(row['open'], row['close'])),
|
|
0.0004, abs(row['close'] - row['open']),
|
|
facecolor=color, edgecolor=color, alpha=0.8
|
|
))
|
|
# Wick
|
|
ax1.plot([mdates.date2num(time), mdates.date2num(time)],
|
|
[row['low'], row['high']], color=color, linewidth=0.5)
|
|
|
|
# Plot range predictions as bands
|
|
if range_preds and 'delta_high' in range_preds and 'delta_low' in range_preds:
|
|
close_prices = df['close'].values
|
|
n_preds = min(len(range_preds['delta_high']), len(df))
|
|
times = [mdates.date2num(t) for t in df.index[:n_preds]]
|
|
|
|
# Upper band (predicted high delta)
|
|
upper_band = close_prices[:n_preds] * (1 + range_preds['delta_high'][:n_preds])
|
|
# Lower band (predicted low delta)
|
|
lower_band = close_prices[:n_preds] * (1 - abs(range_preds['delta_low'][:n_preds]))
|
|
|
|
ax1.fill_between(df.index[:n_preds], lower_band, upper_band,
|
|
alpha=0.2, color='blue', label='Range Prediction')
|
|
ax1.plot(df.index[:n_preds], upper_band, 'b--', linewidth=0.8, alpha=0.7)
|
|
ax1.plot(df.index[:n_preds], lower_band, 'b--', linewidth=0.8, alpha=0.7)
|
|
|
|
# Plot movement predictions as additional bands
|
|
if movement_preds and 'high_usd' in movement_preds:
|
|
close_prices = df['close'].values
|
|
n_preds = min(len(movement_preds['high_usd']), len(df))
|
|
|
|
upper_move = close_prices[:n_preds] + movement_preds['high_usd'][:n_preds]
|
|
lower_move = close_prices[:n_preds] - movement_preds['low_usd'][:n_preds]
|
|
|
|
ax1.plot(df.index[:n_preds], upper_move, 'g-', linewidth=1.2, alpha=0.7, label='Movement High')
|
|
ax1.plot(df.index[:n_preds], lower_move, 'r-', linewidth=1.2, alpha=0.7, label='Movement Low')
|
|
|
|
ax1.set_ylabel('Price')
|
|
ax1.legend(loc='upper left')
|
|
ax1.grid(True, alpha=0.3)
|
|
|
|
# ---- Subplot 2: AMD Phase Detection ----
|
|
ax2 = axes[1]
|
|
|
|
if amd_preds and 'phase_label' in amd_preds:
|
|
phase_labels = amd_preds['phase_label']
|
|
n_preds = min(len(phase_labels), len(df))
|
|
|
|
# Color mapping for phases
|
|
phase_colors = {
|
|
0: 'gray', # Unknown
|
|
1: 'green', # Accumulation
|
|
2: 'yellow', # Manipulation
|
|
3: 'red' # Distribution
|
|
}
|
|
|
|
for i in range(n_preds):
|
|
color = phase_colors.get(phase_labels[i], 'gray')
|
|
ax2.axvspan(df.index[i], df.index[min(i+1, len(df)-1)],
|
|
alpha=0.5, color=color)
|
|
|
|
# Legend for AMD phases
|
|
from matplotlib.patches import Patch
|
|
legend_elements = [
|
|
Patch(facecolor='green', alpha=0.5, label='Accumulation'),
|
|
Patch(facecolor='yellow', alpha=0.5, label='Manipulation'),
|
|
Patch(facecolor='red', alpha=0.5, label='Distribution'),
|
|
Patch(facecolor='gray', alpha=0.5, label='Unknown')
|
|
]
|
|
ax2.legend(handles=legend_elements, loc='upper right', fontsize=8)
|
|
else:
|
|
ax2.text(0.5, 0.5, 'AMD Detector not loaded', transform=ax2.transAxes,
|
|
ha='center', va='center', fontsize=12, color='gray')
|
|
|
|
ax2.set_ylabel('AMD Phase')
|
|
ax2.set_yticks([])
|
|
ax2.grid(True, alpha=0.3)
|
|
|
|
# ---- Subplot 3: Movement Magnitude Confidence ----
|
|
ax3 = axes[2]
|
|
|
|
if movement_preds and 'confidence' in movement_preds:
|
|
n_preds = min(len(movement_preds['confidence']), len(df))
|
|
ax3.bar(df.index[:n_preds], movement_preds['confidence'][:n_preds],
|
|
width=0.0005, alpha=0.7, color='purple')
|
|
ax3.axhline(y=0.6, color='red', linestyle='--', linewidth=1, label='Confidence Threshold')
|
|
else:
|
|
ax3.text(0.5, 0.5, 'Movement Predictor not loaded', transform=ax3.transAxes,
|
|
ha='center', va='center', fontsize=12, color='gray')
|
|
|
|
ax3.set_ylabel('Confidence')
|
|
ax3.set_ylim(0, 1)
|
|
ax3.grid(True, alpha=0.3)
|
|
|
|
# ---- Subplot 4: Asymmetry Ratio / Direction Signals ----
|
|
ax4 = axes[3]
|
|
|
|
if movement_preds and 'asymmetry' in movement_preds:
|
|
n_preds = min(len(movement_preds['asymmetry']), len(df))
|
|
asymmetry = movement_preds['asymmetry'][:n_preds]
|
|
|
|
# Color by direction signal
|
|
colors = ['green' if a > 1.5 else 'red' if a < 0.67 else 'gray' for a in asymmetry]
|
|
ax4.bar(df.index[:n_preds], asymmetry, width=0.0005, color=colors, alpha=0.7)
|
|
ax4.axhline(y=1.5, color='green', linestyle='--', linewidth=1, label='Long Threshold')
|
|
ax4.axhline(y=0.67, color='red', linestyle='--', linewidth=1, label='Short Threshold')
|
|
ax4.axhline(y=1.0, color='black', linestyle='-', linewidth=0.5)
|
|
|
|
ax4.set_ylabel('Asymmetry')
|
|
ax4.set_xlabel('Time')
|
|
ax4.grid(True, alpha=0.3)
|
|
|
|
# Format x-axis
|
|
ax4.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M'))
|
|
plt.xticks(rotation=45)
|
|
|
|
plt.tight_layout()
|
|
|
|
# Save chart
|
|
output_file = output_path / f"{symbol}_{timeframe}_{date_str or 'full'}.png"
|
|
plt.savefig(output_file, dpi=150, bbox_inches='tight')
|
|
logger.info(f"Saved chart to {output_file}")
|
|
|
|
plt.close(fig)
|
|
return output_file
|
|
|
|
|
|
# ==============================================================================
|
|
# Visualization with Plotly (Interactive)
|
|
# ==============================================================================
|
|
|
|
def create_plotly_chart(
|
|
df: pd.DataFrame,
|
|
range_preds: Dict,
|
|
movement_preds: Dict,
|
|
amd_preds: Dict,
|
|
symbol: str,
|
|
timeframe: str,
|
|
output_path: Path,
|
|
date_str: str = None
|
|
):
|
|
"""Create interactive visualization chart with plotly."""
|
|
if not HAS_PLOTLY:
|
|
logger.error("plotly not available")
|
|
return None
|
|
|
|
# Create subplots
|
|
fig = make_subplots(
|
|
rows=4, cols=1,
|
|
shared_xaxes=True,
|
|
vertical_spacing=0.05,
|
|
row_heights=[0.5, 0.15, 0.15, 0.2],
|
|
subplot_titles=(
|
|
f'{symbol} - {timeframe} Price & Predictions',
|
|
'AMD Phase Detection',
|
|
'Movement Confidence',
|
|
'Asymmetry Ratio'
|
|
)
|
|
)
|
|
|
|
# ---- Row 1: Candlestick Chart with Predictions ----
|
|
fig.add_trace(
|
|
go.Candlestick(
|
|
x=df.index,
|
|
open=df['open'],
|
|
high=df['high'],
|
|
low=df['low'],
|
|
close=df['close'],
|
|
name='OHLC',
|
|
increasing_line_color='green',
|
|
decreasing_line_color='red'
|
|
),
|
|
row=1, col=1
|
|
)
|
|
|
|
# Add range prediction bands
|
|
if range_preds and 'delta_high' in range_preds and 'delta_low' in range_preds:
|
|
close_prices = df['close'].values
|
|
n_preds = min(len(range_preds['delta_high']), len(df))
|
|
|
|
upper_band = close_prices[:n_preds] * (1 + range_preds['delta_high'][:n_preds])
|
|
lower_band = close_prices[:n_preds] * (1 - abs(range_preds['delta_low'][:n_preds]))
|
|
|
|
fig.add_trace(
|
|
go.Scatter(
|
|
x=df.index[:n_preds], y=upper_band,
|
|
mode='lines', name='Range Upper',
|
|
line=dict(color='blue', dash='dash', width=1),
|
|
opacity=0.7
|
|
),
|
|
row=1, col=1
|
|
)
|
|
fig.add_trace(
|
|
go.Scatter(
|
|
x=df.index[:n_preds], y=lower_band,
|
|
mode='lines', name='Range Lower',
|
|
line=dict(color='blue', dash='dash', width=1),
|
|
fill='tonexty', fillcolor='rgba(0,0,255,0.1)',
|
|
opacity=0.7
|
|
),
|
|
row=1, col=1
|
|
)
|
|
|
|
# Add movement prediction lines
|
|
if movement_preds and 'high_usd' in movement_preds:
|
|
close_prices = df['close'].values
|
|
n_preds = min(len(movement_preds['high_usd']), len(df))
|
|
|
|
upper_move = close_prices[:n_preds] + movement_preds['high_usd'][:n_preds]
|
|
lower_move = close_prices[:n_preds] - movement_preds['low_usd'][:n_preds]
|
|
|
|
fig.add_trace(
|
|
go.Scatter(
|
|
x=df.index[:n_preds], y=upper_move,
|
|
mode='lines', name='Move High (USD)',
|
|
line=dict(color='green', width=1.5)
|
|
),
|
|
row=1, col=1
|
|
)
|
|
fig.add_trace(
|
|
go.Scatter(
|
|
x=df.index[:n_preds], y=lower_move,
|
|
mode='lines', name='Move Low (USD)',
|
|
line=dict(color='red', width=1.5)
|
|
),
|
|
row=1, col=1
|
|
)
|
|
|
|
# ---- Row 2: AMD Phase Detection ----
|
|
if amd_preds and 'phase_label' in amd_preds:
|
|
phase_labels = amd_preds['phase_label']
|
|
phase_names = amd_preds.get('phase', phase_labels)
|
|
n_preds = min(len(phase_labels), len(df))
|
|
|
|
# Color mapping
|
|
color_map = {
|
|
0: 'gray', 1: 'green', 2: 'orange', 3: 'red'
|
|
}
|
|
colors = [color_map.get(int(p), 'gray') for p in phase_labels[:n_preds]]
|
|
|
|
fig.add_trace(
|
|
go.Bar(
|
|
x=df.index[:n_preds],
|
|
y=[1] * n_preds,
|
|
marker_color=colors,
|
|
name='AMD Phase',
|
|
text=phase_names[:n_preds],
|
|
hovertemplate='%{text}<extra></extra>',
|
|
showlegend=False
|
|
),
|
|
row=2, col=1
|
|
)
|
|
else:
|
|
fig.add_annotation(
|
|
text="AMD Detector not loaded",
|
|
xref="x2 domain", yref="y2 domain",
|
|
x=0.5, y=0.5, showarrow=False,
|
|
font=dict(size=14, color="gray"),
|
|
row=2, col=1
|
|
)
|
|
|
|
# ---- Row 3: Movement Confidence ----
|
|
if movement_preds and 'confidence' in movement_preds:
|
|
n_preds = min(len(movement_preds['confidence']), len(df))
|
|
|
|
fig.add_trace(
|
|
go.Bar(
|
|
x=df.index[:n_preds],
|
|
y=movement_preds['confidence'][:n_preds],
|
|
marker_color='purple',
|
|
name='Confidence',
|
|
opacity=0.7
|
|
),
|
|
row=3, col=1
|
|
)
|
|
|
|
# Threshold line
|
|
fig.add_hline(y=0.6, line_dash="dash", line_color="red", row=3, col=1)
|
|
else:
|
|
fig.add_annotation(
|
|
text="Movement Predictor not loaded",
|
|
xref="x3 domain", yref="y3 domain",
|
|
x=0.5, y=0.5, showarrow=False,
|
|
font=dict(size=14, color="gray"),
|
|
row=3, col=1
|
|
)
|
|
|
|
# ---- Row 4: Asymmetry Ratio ----
|
|
if movement_preds and 'asymmetry' in movement_preds:
|
|
n_preds = min(len(movement_preds['asymmetry']), len(df))
|
|
asymmetry = movement_preds['asymmetry'][:n_preds]
|
|
|
|
# Color by direction
|
|
colors = ['green' if a > 1.5 else 'red' if a < 0.67 else 'gray' for a in asymmetry]
|
|
|
|
fig.add_trace(
|
|
go.Bar(
|
|
x=df.index[:n_preds],
|
|
y=asymmetry,
|
|
marker_color=colors,
|
|
name='Asymmetry',
|
|
opacity=0.7
|
|
),
|
|
row=4, col=1
|
|
)
|
|
|
|
# Threshold lines
|
|
fig.add_hline(y=1.5, line_dash="dash", line_color="green", row=4, col=1)
|
|
fig.add_hline(y=0.67, line_dash="dash", line_color="red", row=4, col=1)
|
|
fig.add_hline(y=1.0, line_color="black", line_width=0.5, row=4, col=1)
|
|
|
|
# Update layout
|
|
fig.update_layout(
|
|
title=f'{symbol} - {timeframe} ML Model Predictions ({date_str or "Full Period"})',
|
|
height=1000,
|
|
showlegend=True,
|
|
xaxis_rangeslider_visible=False,
|
|
template='plotly_white'
|
|
)
|
|
|
|
fig.update_yaxes(title_text="Price", row=1, col=1)
|
|
fig.update_yaxes(title_text="Phase", row=2, col=1)
|
|
fig.update_yaxes(title_text="Confidence", range=[0, 1], row=3, col=1)
|
|
fig.update_yaxes(title_text="Asymmetry", row=4, col=1)
|
|
fig.update_xaxes(title_text="Time", row=4, col=1)
|
|
|
|
# Save as HTML
|
|
output_file = output_path / f"{symbol}_{timeframe}_{date_str or 'full'}.html"
|
|
fig.write_html(str(output_file))
|
|
logger.info(f"Saved interactive chart to {output_file}")
|
|
|
|
# Also save as PNG if kaleido is available
|
|
try:
|
|
png_file = output_path / f"{symbol}_{timeframe}_{date_str or 'full'}_plotly.png"
|
|
fig.write_image(str(png_file), width=1600, height=1000)
|
|
logger.info(f"Saved PNG chart to {png_file}")
|
|
except Exception as e:
|
|
logger.warning(f"Could not save PNG (install kaleido): {e}")
|
|
|
|
return output_file
|
|
|
|
|
|
# ==============================================================================
|
|
# Main Visualization Function
|
|
# ==============================================================================
|
|
|
|
def run_visualization(
|
|
symbol: str = "XAUUSD",
|
|
timeframe: str = "15m",
|
|
start_date: str = "2025-01-06",
|
|
end_date: str = "2025-01-12",
|
|
output_format: str = "both", # 'matplotlib', 'plotly', 'both'
|
|
horizon: str = "scalping",
|
|
model_base_path: str = None
|
|
):
|
|
"""
|
|
Run visualization for all ML models.
|
|
|
|
Args:
|
|
symbol: Trading symbol (XAUUSD, BTCUSD, EURUSD)
|
|
timeframe: Timeframe (5m, 15m)
|
|
start_date: Start date (YYYY-MM-DD)
|
|
end_date: End date (YYYY-MM-DD)
|
|
output_format: Output format (matplotlib, plotly, both)
|
|
horizon: Prediction horizon (scalping, intraday)
|
|
model_base_path: Base path for models
|
|
"""
|
|
logger.info("=" * 60)
|
|
logger.info("ML MODELS VISUALIZATION")
|
|
logger.info(f"Symbol: {symbol}")
|
|
logger.info(f"Timeframe: {timeframe}")
|
|
logger.info(f"Period: {start_date} to {end_date}")
|
|
logger.info("=" * 60)
|
|
|
|
# Set model base path
|
|
if model_base_path is None:
|
|
model_base_path = f"models/ml_first/{symbol}"
|
|
|
|
model_path = Path(model_base_path)
|
|
if not model_path.exists():
|
|
logger.error(f"Model path not found: {model_path}")
|
|
logger.info("Available model paths:")
|
|
for p in Path("models/ml_first").glob("*"):
|
|
logger.info(f" - {p}")
|
|
return None
|
|
|
|
# Create output directory
|
|
output_path = Path("charts") / symbol
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Load data from database
|
|
logger.info("Loading data from database...")
|
|
try:
|
|
db = MySQLConnection('config/database.yaml')
|
|
df_raw = db.get_ticker_data(
|
|
symbol,
|
|
limit=100000,
|
|
start_date=start_date,
|
|
end_date=end_date
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to load data from database: {e}")
|
|
logger.info("Attempting to create sample data for demonstration...")
|
|
# Create sample data for demo purposes
|
|
dates = pd.date_range(start=start_date, end=end_date, freq=timeframe)
|
|
n = len(dates)
|
|
np.random.seed(42)
|
|
price = 2650 + np.cumsum(np.random.randn(n) * 2)
|
|
df_raw = pd.DataFrame({
|
|
'open': price + np.random.randn(n) * 0.5,
|
|
'high': price + np.abs(np.random.randn(n)) * 5,
|
|
'low': price - np.abs(np.random.randn(n)) * 5,
|
|
'close': price + np.random.randn(n) * 0.5,
|
|
'volume': np.random.randint(100, 1000, n)
|
|
}, index=dates)
|
|
df_raw['high'] = df_raw[['open', 'high', 'close']].max(axis=1)
|
|
df_raw['low'] = df_raw[['open', 'low', 'close']].min(axis=1)
|
|
|
|
if df_raw.empty:
|
|
logger.error(f"No data found for {symbol} in the specified period")
|
|
return None
|
|
|
|
logger.info(f"Loaded {len(df_raw)} records from {df_raw.index.min()} to {df_raw.index.max()}")
|
|
|
|
# Load models
|
|
logger.info("\nLoading ML models...")
|
|
|
|
# 1. RangePredictor
|
|
range_models, range_metadata = load_range_predictor(str(model_path), timeframe, horizon)
|
|
|
|
# 2. MovementMagnitudePredictor
|
|
horizon_key = "15m_60min" if timeframe == "15m" else "5m_15min"
|
|
movement_predictor = load_movement_predictor(str(model_path), horizon_key)
|
|
|
|
# 3. AMDDetectorML
|
|
amd_detector = load_amd_detector(str(model_path))
|
|
|
|
# 4. TPSLClassifier (optional)
|
|
tpsl_classifier = load_tpsl_classifier(str(model_path))
|
|
|
|
# Get expected features from metadata
|
|
expected_features = None
|
|
if range_metadata:
|
|
fi = range_metadata.get('feature_importance', {})
|
|
if fi:
|
|
first_key = list(fi.keys())[0]
|
|
expected_features = list(fi[first_key].keys())
|
|
logger.info(f"Models expect {len(expected_features)} features")
|
|
|
|
# Prepare features
|
|
logger.info("\nPreparing features...")
|
|
df = prepare_features(df_raw.copy(), expected_features)
|
|
|
|
if expected_features:
|
|
feature_cols = expected_features
|
|
else:
|
|
feature_cols = get_feature_columns(df)
|
|
|
|
logger.info(f"Using {len(feature_cols)} features")
|
|
|
|
# Generate predictions
|
|
logger.info("\nGenerating predictions...")
|
|
|
|
# Range predictions
|
|
range_preds = {}
|
|
if range_models:
|
|
# Filter to matching features
|
|
available_features = [f for f in feature_cols if f in df.columns]
|
|
X = df[available_features].values
|
|
range_preds = predict_with_range_models(range_models, X, horizon)
|
|
logger.info(f"Generated range predictions: {list(range_preds.keys())}")
|
|
|
|
# Movement predictions
|
|
movement_preds = {}
|
|
if movement_predictor:
|
|
# Pass the raw OHLCV data - predictor will create its own features
|
|
movement_preds = predict_with_movement_predictor(movement_predictor, df_raw)
|
|
if movement_preds:
|
|
logger.info(f"Generated movement predictions: {list(movement_preds.keys())}")
|
|
else:
|
|
logger.warning("Movement predictor returned no predictions")
|
|
|
|
# AMD predictions
|
|
amd_preds = {}
|
|
if amd_detector:
|
|
amd_preds = predict_with_amd_detector(amd_detector, df_raw)
|
|
logger.info(f"Generated AMD predictions: {list(amd_preds.keys())}")
|
|
|
|
# Create date string for filename
|
|
date_str = f"{start_date}_to_{end_date}".replace("-", "")
|
|
|
|
# Generate visualizations
|
|
logger.info("\nGenerating visualizations...")
|
|
|
|
if output_format in ['matplotlib', 'both'] and HAS_MATPLOTLIB:
|
|
create_matplotlib_chart(
|
|
df, range_preds, movement_preds, amd_preds,
|
|
symbol, timeframe, output_path, date_str
|
|
)
|
|
|
|
if output_format in ['plotly', 'both'] and HAS_PLOTLY:
|
|
create_plotly_chart(
|
|
df, range_preds, movement_preds, amd_preds,
|
|
symbol, timeframe, output_path, date_str
|
|
)
|
|
|
|
# Generate summary report
|
|
summary = {
|
|
'symbol': symbol,
|
|
'timeframe': timeframe,
|
|
'period': {'start': start_date, 'end': end_date},
|
|
'data_points': len(df),
|
|
'models_loaded': {
|
|
'range_predictor': bool(range_models),
|
|
'movement_predictor': bool(movement_predictor),
|
|
'amd_detector': bool(amd_detector),
|
|
'tpsl_classifier': bool(tpsl_classifier)
|
|
},
|
|
'predictions_generated': {
|
|
'range': list(range_preds.keys()) if range_preds else [],
|
|
'movement': list(movement_preds.keys()) if movement_preds else [],
|
|
'amd': list(amd_preds.keys()) if amd_preds else []
|
|
},
|
|
'output_path': str(output_path)
|
|
}
|
|
|
|
# Save summary
|
|
summary_file = output_path / f"summary_{date_str}.json"
|
|
with open(summary_file, 'w') as f:
|
|
json.dump(summary, f, indent=2, default=str)
|
|
logger.info(f"Saved summary to {summary_file}")
|
|
|
|
logger.info("\n" + "=" * 60)
|
|
logger.info("VISUALIZATION COMPLETE")
|
|
logger.info(f"Charts saved to: {output_path}")
|
|
logger.info("=" * 60)
|
|
|
|
return summary
|
|
|
|
|
|
# ==============================================================================
|
|
# CLI Entry Point
|
|
# ==============================================================================
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Visualize ML model predictions for trading data',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Visualize XAUUSD for second week of January 2025
|
|
python scripts/run_visualization.py --symbol XAUUSD --timeframe 15m
|
|
|
|
# Custom date range
|
|
python scripts/run_visualization.py --symbol BTCUSD --start 2025-01-10 --end 2025-01-15
|
|
|
|
# All symbols
|
|
python scripts/run_visualization.py --all-symbols
|
|
|
|
# Only matplotlib output
|
|
python scripts/run_visualization.py --format matplotlib
|
|
"""
|
|
)
|
|
|
|
parser.add_argument('--symbol', default='XAUUSD',
|
|
help='Trading symbol (default: XAUUSD)')
|
|
parser.add_argument('--timeframe', default='15m',
|
|
help='Timeframe: 5m or 15m (default: 15m)')
|
|
parser.add_argument('--start', default='2025-01-06',
|
|
help='Start date YYYY-MM-DD (default: 2025-01-06)')
|
|
parser.add_argument('--end', default='2025-01-12',
|
|
help='End date YYYY-MM-DD (default: 2025-01-12)')
|
|
parser.add_argument('--format', default='both', choices=['matplotlib', 'plotly', 'both'],
|
|
help='Output format (default: both)')
|
|
parser.add_argument('--horizon', default='scalping',
|
|
help='Prediction horizon: scalping or intraday (default: scalping)')
|
|
parser.add_argument('--model-path', default=None,
|
|
help='Base path for models (default: models/ml_first/{symbol})')
|
|
parser.add_argument('--all-symbols', action='store_true',
|
|
help='Run for all available symbols')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# List of symbols to process
|
|
if args.all_symbols:
|
|
symbols = ['XAUUSD', 'BTCUSD', 'EURUSD']
|
|
else:
|
|
symbols = [args.symbol]
|
|
|
|
# List of timeframes
|
|
timeframes = [args.timeframe]
|
|
|
|
# Run for each combination
|
|
results = []
|
|
for symbol in symbols:
|
|
for timeframe in timeframes:
|
|
logger.info(f"\nProcessing {symbol} - {timeframe}...")
|
|
try:
|
|
result = run_visualization(
|
|
symbol=symbol,
|
|
timeframe=timeframe,
|
|
start_date=args.start,
|
|
end_date=args.end,
|
|
output_format=args.format,
|
|
horizon=args.horizon,
|
|
model_base_path=args.model_path
|
|
)
|
|
if result:
|
|
results.append(result)
|
|
except Exception as e:
|
|
logger.error(f"Failed to process {symbol} - {timeframe}: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
# Final summary
|
|
print("\n" + "=" * 60)
|
|
print("VISUALIZATION SUMMARY")
|
|
print("=" * 60)
|
|
print(f"Processed {len(results)} symbol/timeframe combinations")
|
|
for r in results:
|
|
print(f" - {r['symbol']} / {r['timeframe']}: {r['data_points']} data points")
|
|
print(f"\nCharts saved to: charts/")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|