Hierarchical ML Pipeline for trading predictions:
- Level 0: Attention Models (volatility/flow classification)
- Level 1: Base Models (XGBoost per symbol/timeframe)
- Level 2: Metamodels (XGBoost Stacking + Neural Gating)
Key components:
- src/pipelines/hierarchical_pipeline.py - Main prediction pipeline
- src/models/ - All ML model classes
- src/training/ - Training utilities
- src/api/ - FastAPI endpoints
- scripts/ - Training and evaluation scripts
- config/ - YAML configurations
Note: Trained models (*.joblib, *.pt) are gitignored.
Regenerate with training scripts.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
666 lines
22 KiB
Python
666 lines
22 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Backtesting Script for OOS Period (March 2024 - March 2025)
|
|
==========================================================
|
|
Loads trained models and evaluates them on the holdout period.
|
|
|
|
Usage:
|
|
python scripts/run_backtest_oos_period.py --symbols XAUUSD EURUSD
|
|
|
|
Author: ML Pipeline
|
|
Created: 2026-01-06
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import json
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from loguru import logger
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
|
|
|
from training.symbol_timeframe_trainer import (
|
|
SymbolTimeframeTrainer,
|
|
TrainerConfig,
|
|
SYMBOL_CONFIGS
|
|
)
|
|
from data.database import MySQLConnection
|
|
|
|
|
|
def setup_logging(log_dir: Path, experiment_name: str):
|
|
"""Configure logging to file and console."""
|
|
log_dir.mkdir(parents=True, exist_ok=True)
|
|
log_file = log_dir / f"{experiment_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
|
|
logger.remove()
|
|
logger.add(sys.stderr, level="INFO", format="{time:HH:mm:ss} | {level} | {message}")
|
|
logger.add(log_file, level="DEBUG", rotation="10 MB")
|
|
|
|
return log_file
|
|
|
|
|
|
def load_oos_data(
|
|
db: MySQLConnection,
|
|
symbol: str,
|
|
start_date: str,
|
|
end_date: str
|
|
) -> pd.DataFrame:
|
|
"""Load OOS data from database."""
|
|
db_symbol = symbol
|
|
if not symbol.startswith('C:') and not symbol.startswith('X:'):
|
|
if symbol == 'BTCUSD':
|
|
db_symbol = f'X:{symbol}'
|
|
else:
|
|
db_symbol = f'C:{symbol}'
|
|
|
|
logger.info(f"Loading OOS data for {db_symbol}...")
|
|
|
|
query = """
|
|
SELECT
|
|
date_agg as time,
|
|
open, high, low, close, volume, vwap
|
|
FROM tickers_agg_data
|
|
WHERE ticker = :symbol
|
|
AND date_agg >= :start_date
|
|
AND date_agg <= :end_date
|
|
ORDER BY date_agg ASC
|
|
"""
|
|
|
|
df = db.execute_query(query, {
|
|
'symbol': db_symbol,
|
|
'start_date': start_date,
|
|
'end_date': end_date
|
|
})
|
|
|
|
if df.empty:
|
|
logger.warning(f"No data found for {symbol}")
|
|
return df
|
|
|
|
df['time'] = pd.to_datetime(df['time'])
|
|
df.set_index('time', inplace=True)
|
|
df = df.sort_index()
|
|
df.columns = ['open', 'high', 'low', 'close', 'volume', 'vwap']
|
|
|
|
logger.info(f"Loaded {len(df)} records for {symbol}")
|
|
logger.info(f" Date range: {df.index.min()} to {df.index.max()}")
|
|
|
|
return df
|
|
|
|
|
|
def resample_to_timeframe(df: pd.DataFrame, timeframe: str) -> pd.DataFrame:
|
|
"""Resample 5-minute data to different timeframe."""
|
|
if timeframe == '5m':
|
|
return df
|
|
|
|
tf_map = {'15m': '15min', '30m': '30min', '1H': '1H', '4H': '4H', '1D': '1D'}
|
|
offset = tf_map.get(timeframe, timeframe)
|
|
|
|
resampled = df.resample(offset).agg({
|
|
'open': 'first',
|
|
'high': 'max',
|
|
'low': 'min',
|
|
'close': 'last',
|
|
'volume': 'sum',
|
|
'vwap': 'mean'
|
|
}).dropna()
|
|
|
|
return resampled
|
|
|
|
|
|
def generate_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
"""Generate comprehensive feature set."""
|
|
features = pd.DataFrame(index=df.index)
|
|
|
|
close = df['close']
|
|
high = df['high']
|
|
low = df['low']
|
|
open_price = df['open']
|
|
volume = df['volume'] if 'volume' in df.columns else pd.Series(1, index=df.index)
|
|
|
|
# Price Returns
|
|
features['returns_1'] = close.pct_change(1)
|
|
features['returns_3'] = close.pct_change(3)
|
|
features['returns_5'] = close.pct_change(5)
|
|
features['returns_10'] = close.pct_change(10)
|
|
features['returns_20'] = close.pct_change(20)
|
|
|
|
# Volatility Features
|
|
features['volatility_5'] = close.pct_change().rolling(5).std()
|
|
features['volatility_10'] = close.pct_change().rolling(10).std()
|
|
features['volatility_20'] = close.pct_change().rolling(20).std()
|
|
|
|
# Range Features
|
|
candle_range = high - low
|
|
features['range'] = candle_range
|
|
features['range_pct'] = candle_range / close
|
|
features['range_ma_5'] = candle_range.rolling(5).mean()
|
|
features['range_ma_10'] = candle_range.rolling(10).mean()
|
|
features['range_ma_20'] = candle_range.rolling(20).mean()
|
|
features['range_ratio_5'] = candle_range / features['range_ma_5']
|
|
features['range_ratio_20'] = candle_range / features['range_ma_20']
|
|
|
|
# ATR Features
|
|
tr1 = high - low
|
|
tr2 = abs(high - close.shift(1))
|
|
tr3 = abs(low - close.shift(1))
|
|
true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
|
|
features['atr_5'] = true_range.rolling(5).mean()
|
|
features['atr_14'] = true_range.rolling(14).mean()
|
|
features['atr_20'] = true_range.rolling(20).mean()
|
|
features['atr_ratio'] = true_range / features['atr_14']
|
|
|
|
# Moving Averages
|
|
sma_5 = close.rolling(5).mean()
|
|
sma_10 = close.rolling(10).mean()
|
|
sma_20 = close.rolling(20).mean()
|
|
sma_50 = close.rolling(50).mean()
|
|
|
|
ema_5 = close.ewm(span=5, adjust=False).mean()
|
|
ema_10 = close.ewm(span=10, adjust=False).mean()
|
|
ema_20 = close.ewm(span=20, adjust=False).mean()
|
|
|
|
features['price_vs_sma5'] = (close - sma_5) / features['atr_14']
|
|
features['price_vs_sma10'] = (close - sma_10) / features['atr_14']
|
|
features['price_vs_sma20'] = (close - sma_20) / features['atr_14']
|
|
features['price_vs_sma50'] = (close - sma_50) / features['atr_14']
|
|
features['sma5_vs_sma20'] = (sma_5 - sma_20) / features['atr_14']
|
|
features['ema5_vs_ema20'] = (ema_5 - ema_20) / features['atr_14']
|
|
|
|
# RSI
|
|
delta = close.diff()
|
|
gain = delta.where(delta > 0, 0).rolling(14).mean()
|
|
loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
|
|
rs = gain / (loss + 1e-10)
|
|
features['rsi_14'] = 100 - (100 / (1 + rs))
|
|
features['rsi_oversold'] = (features['rsi_14'] < 30).astype(float)
|
|
features['rsi_overbought'] = (features['rsi_14'] > 70).astype(float)
|
|
|
|
# Bollinger Bands
|
|
bb_middle = close.rolling(20).mean()
|
|
bb_std = close.rolling(20).std()
|
|
bb_upper = bb_middle + 2 * bb_std
|
|
bb_lower = bb_middle - 2 * bb_std
|
|
features['bb_width'] = (bb_upper - bb_lower) / bb_middle
|
|
features['bb_position'] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10)
|
|
|
|
# MACD
|
|
ema_12 = close.ewm(span=12, adjust=False).mean()
|
|
ema_26 = close.ewm(span=26, adjust=False).mean()
|
|
macd = ema_12 - ema_26
|
|
macd_signal = macd.ewm(span=9, adjust=False).mean()
|
|
features['macd'] = macd / features['atr_14']
|
|
features['macd_signal'] = macd_signal / features['atr_14']
|
|
features['macd_hist'] = (macd - macd_signal) / features['atr_14']
|
|
|
|
# Momentum
|
|
features['momentum_5'] = (close - close.shift(5)) / features['atr_14']
|
|
features['momentum_10'] = (close - close.shift(10)) / features['atr_14']
|
|
features['momentum_20'] = (close - close.shift(20)) / features['atr_14']
|
|
|
|
# Stochastic
|
|
low_14 = low.rolling(14).min()
|
|
high_14 = high.rolling(14).max()
|
|
features['stoch_k'] = 100 * (close - low_14) / (high_14 - low_14 + 1e-10)
|
|
features['stoch_d'] = features['stoch_k'].rolling(3).mean()
|
|
|
|
# Williams %R
|
|
features['williams_r'] = -100 * (high_14 - close) / (high_14 - low_14 + 1e-10)
|
|
|
|
# Volume Features
|
|
if volume.sum() > 0:
|
|
vol_ma_5 = volume.rolling(5).mean()
|
|
vol_ma_20 = volume.rolling(20).mean()
|
|
features['volume_ratio'] = volume / (vol_ma_20 + 1)
|
|
features['volume_trend'] = (vol_ma_5 - vol_ma_20) / (vol_ma_20 + 1)
|
|
|
|
# Candle Patterns
|
|
body = close - open_price
|
|
features['body_pct'] = body / (candle_range + 1e-10)
|
|
features['upper_shadow'] = (high - np.maximum(close, open_price)) / (candle_range + 1e-10)
|
|
features['lower_shadow'] = (np.minimum(close, open_price) - low) / (candle_range + 1e-10)
|
|
|
|
# Price Position
|
|
features['close_position'] = (close - low) / (candle_range + 1e-10)
|
|
high_5 = high.rolling(5).max()
|
|
low_5 = low.rolling(5).min()
|
|
features['price_position_5'] = (close - low_5) / (high_5 - low_5 + 1e-10)
|
|
|
|
high_20 = high.rolling(20).max()
|
|
low_20 = low.rolling(20).min()
|
|
features['price_position_20'] = (close - low_20) / (high_20 - low_20 + 1e-10)
|
|
|
|
# Time Features
|
|
features['hour'] = df.index.hour
|
|
features['hour_sin'] = np.sin(2 * np.pi * features['hour'] / 24)
|
|
features['hour_cos'] = np.cos(2 * np.pi * features['hour'] / 24)
|
|
features['day_of_week'] = df.index.dayofweek
|
|
features['dow_sin'] = np.sin(2 * np.pi * features['day_of_week'] / 7)
|
|
features['dow_cos'] = np.cos(2 * np.pi * features['day_of_week'] / 7)
|
|
|
|
# Trading sessions
|
|
features['is_london'] = ((features['hour'] >= 8) & (features['hour'] < 16)).astype(float)
|
|
features['is_newyork'] = ((features['hour'] >= 13) & (features['hour'] < 21)).astype(float)
|
|
features['is_overlap'] = ((features['hour'] >= 13) & (features['hour'] < 16)).astype(float)
|
|
|
|
# Clean up
|
|
features = features.replace([np.inf, -np.inf], np.nan)
|
|
drop_cols = ['hour', 'day_of_week']
|
|
features = features.drop(columns=[c for c in drop_cols if c in features.columns], errors='ignore')
|
|
|
|
return features
|
|
|
|
|
|
def compute_actual_ranges(df: pd.DataFrame, horizon: int = 3) -> tuple:
|
|
"""Compute actual future high/low ranges."""
|
|
close = df['close'].values
|
|
high = df['high'].values
|
|
low = df['low'].values
|
|
n = len(df)
|
|
|
|
actual_high = np.full(n, np.nan)
|
|
actual_low = np.full(n, np.nan)
|
|
|
|
for i in range(n - horizon):
|
|
future_high = high[i+1:i+1+horizon]
|
|
future_low = low[i+1:i+1+horizon]
|
|
|
|
actual_high[i] = np.max(future_high) - close[i]
|
|
actual_low[i] = close[i] - np.min(future_low)
|
|
|
|
return actual_high, actual_low
|
|
|
|
|
|
def evaluate_predictions(
|
|
actual_high: np.ndarray,
|
|
actual_low: np.ndarray,
|
|
pred_high: np.ndarray,
|
|
pred_low: np.ndarray,
|
|
symbol: str,
|
|
timeframe: str
|
|
) -> dict:
|
|
"""Evaluate prediction quality."""
|
|
# Ensure arrays are same length - truncate to shortest
|
|
min_len = min(len(actual_high), len(actual_low), len(pred_high), len(pred_low))
|
|
actual_high = actual_high[:min_len]
|
|
actual_low = actual_low[:min_len]
|
|
pred_high = pred_high[:min_len]
|
|
pred_low = pred_low[:min_len]
|
|
|
|
valid = ~(np.isnan(actual_high) | np.isnan(actual_low) |
|
|
np.isnan(pred_high) | np.isnan(pred_low))
|
|
|
|
ah, al = actual_high[valid], actual_low[valid]
|
|
ph, pl = pred_high[valid], pred_low[valid]
|
|
|
|
if len(ah) == 0:
|
|
return {'symbol': symbol, 'timeframe': timeframe, 'n_samples': 0,
|
|
'error': 'No valid samples'}
|
|
|
|
mae_high = np.mean(np.abs(ah - ph))
|
|
mae_low = np.mean(np.abs(al - pl))
|
|
|
|
rmse_high = np.sqrt(np.mean((ah - ph)**2))
|
|
rmse_low = np.sqrt(np.mean((al - pl)**2))
|
|
|
|
# Directional accuracy
|
|
dir_acc_high = np.mean(np.sign(ah) == np.sign(ph))
|
|
dir_acc_low = np.mean(np.sign(al) == np.sign(pl))
|
|
|
|
# Signal quality metrics for trading
|
|
signal_threshold = np.median(np.abs(ah))
|
|
|
|
# HIGH signal: predicted move > threshold (use filtered arrays)
|
|
high_signals = ph > signal_threshold
|
|
high_signal_accuracy = np.mean(ah[high_signals] > 0) if high_signals.sum() > 0 else 0
|
|
|
|
# LOW signal: predicted move > threshold
|
|
low_signals = pl > signal_threshold
|
|
low_signal_accuracy = np.mean(al[low_signals] > 0) if low_signals.sum() > 0 else 0
|
|
|
|
# R:R Analysis - simulated trades
|
|
rr_results = analyze_rr_performance(ah, al, ph, pl, symbol)
|
|
|
|
return {
|
|
'symbol': symbol,
|
|
'timeframe': timeframe,
|
|
'n_samples': valid.sum(),
|
|
'mae_high': mae_high,
|
|
'mae_low': mae_low,
|
|
'rmse_high': rmse_high,
|
|
'rmse_low': rmse_low,
|
|
'dir_accuracy_high': dir_acc_high,
|
|
'dir_accuracy_low': dir_acc_low,
|
|
'high_signals': int(high_signals.sum()),
|
|
'high_signal_accuracy': high_signal_accuracy,
|
|
'low_signals': int(low_signals.sum()),
|
|
'low_signal_accuracy': low_signal_accuracy,
|
|
'rr_analysis': rr_results
|
|
}
|
|
|
|
|
|
def analyze_rr_performance(
|
|
actual_high: np.ndarray,
|
|
actual_low: np.ndarray,
|
|
pred_high: np.ndarray,
|
|
pred_low: np.ndarray,
|
|
symbol: str
|
|
) -> dict:
|
|
"""Analyze R:R based trading performance."""
|
|
results = {}
|
|
|
|
for rr in [1.0, 1.5, 2.0, 2.5, 3.0]:
|
|
# LONG trades: use predicted low as stop loss
|
|
long_sl = pred_low
|
|
long_tp = pred_high * rr
|
|
|
|
# Win if price reaches TP before SL
|
|
# Simplified: compare actual ranges
|
|
long_wins = (actual_high >= long_tp) & (actual_low < long_sl)
|
|
long_losses = actual_low >= long_sl
|
|
long_total = (~np.isnan(actual_high)).sum()
|
|
|
|
# More realistic: check if TP hit before SL
|
|
long_hit_tp = actual_high >= long_tp
|
|
long_hit_sl = actual_low >= long_sl
|
|
|
|
# Conservative: if both hit, count as loss
|
|
long_wins_v2 = long_hit_tp & ~long_hit_sl
|
|
long_losses_v2 = long_hit_sl
|
|
|
|
wins = long_wins_v2.sum()
|
|
losses = long_losses_v2.sum()
|
|
total = wins + losses
|
|
|
|
if total > 0:
|
|
win_rate = wins / total
|
|
expectancy = (win_rate * rr) - ((1 - win_rate) * 1)
|
|
else:
|
|
win_rate = 0
|
|
expectancy = 0
|
|
|
|
results[f'rr_{rr}'] = {
|
|
'win_rate': win_rate,
|
|
'wins': int(wins),
|
|
'losses': int(losses),
|
|
'total_trades': int(total),
|
|
'expectancy': expectancy,
|
|
'rr_ratio': rr
|
|
}
|
|
|
|
return results
|
|
|
|
|
|
def run_backtest(
|
|
symbols: list,
|
|
timeframes: list,
|
|
model_dir: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
output_dir: str
|
|
) -> dict:
|
|
"""Run backtest on OOS period."""
|
|
logger.info("="*60)
|
|
logger.info("OOS BACKTEST")
|
|
logger.info("="*60)
|
|
logger.info(f"Symbols: {symbols}")
|
|
logger.info(f"Timeframes: {timeframes}")
|
|
logger.info(f"OOS Period: {start_date} to {end_date}")
|
|
logger.info(f"Model dir: {model_dir}")
|
|
|
|
# Load trained models
|
|
trainer = SymbolTimeframeTrainer()
|
|
trainer.load(model_dir)
|
|
logger.info(f"Loaded {len(trainer.models)} models")
|
|
|
|
# Connect to database
|
|
db = MySQLConnection('config/database.yaml')
|
|
|
|
all_results = {}
|
|
|
|
for symbol in symbols:
|
|
logger.info(f"\n{'='*60}")
|
|
logger.info(f"Backtesting {symbol}")
|
|
logger.info(f"{'='*60}")
|
|
|
|
# Load OOS data
|
|
df_5m = load_oos_data(db, symbol, start_date, end_date)
|
|
|
|
if df_5m.empty:
|
|
logger.warning(f"No OOS data for {symbol}")
|
|
continue
|
|
|
|
for timeframe in timeframes:
|
|
logger.info(f"\n--- {symbol} {timeframe} ---")
|
|
|
|
# Resample if needed
|
|
if timeframe == '5m':
|
|
df_tf = df_5m.copy()
|
|
else:
|
|
df_tf = resample_to_timeframe(df_5m.copy(), timeframe)
|
|
|
|
if len(df_tf) < 1000:
|
|
logger.warning(f"Insufficient data: {len(df_tf)} bars")
|
|
continue
|
|
|
|
# Generate features
|
|
features = generate_features(df_tf)
|
|
|
|
# Combine with OHLCV
|
|
df_combined = pd.concat([df_tf[['open', 'high', 'low', 'close', 'volume']], features], axis=1)
|
|
df_combined = df_combined.dropna()
|
|
|
|
logger.info(f"OOS data shape: {df_combined.shape}")
|
|
|
|
# Compute actual ranges
|
|
horizon = trainer.config.horizons.get(timeframe, 3)
|
|
actual_high, actual_low = compute_actual_ranges(df_combined, horizon)
|
|
|
|
# Prepare features for prediction - use same filter as trainer
|
|
exclude_patterns = [
|
|
'target_', 'high', 'low', 'open', 'close', 'volume',
|
|
'High', 'Low', 'Open', 'Close', 'Volume',
|
|
'timestamp', 'datetime', 'date', 'time',
|
|
'rr_', 'direction', 'is_valid', 'vwap'
|
|
]
|
|
feature_cols = []
|
|
for col in df_combined.columns:
|
|
if not any(pat.lower() in col.lower() for pat in exclude_patterns):
|
|
if df_combined[col].dtype in [np.float64, np.float32, np.int64, np.int32, float, int]:
|
|
feature_cols.append(col)
|
|
|
|
logger.info(f"Using {len(feature_cols)} features for prediction")
|
|
X = df_combined[feature_cols].values
|
|
|
|
try:
|
|
# Get predictions
|
|
predictions = trainer.predict(X, symbol, timeframe)
|
|
pred_high = predictions['high']
|
|
pred_low = predictions['low']
|
|
|
|
# Evaluate
|
|
results = evaluate_predictions(
|
|
actual_high, actual_low,
|
|
pred_high, pred_low,
|
|
symbol, timeframe
|
|
)
|
|
|
|
key = f"{symbol}_{timeframe}"
|
|
all_results[key] = results
|
|
|
|
# Print results
|
|
logger.info(f"\nResults for {symbol} {timeframe}:")
|
|
logger.info(f" Samples: {results['n_samples']}")
|
|
logger.info(f" MAE High: {results['mae_high']:.6f}")
|
|
logger.info(f" MAE Low: {results['mae_low']:.6f}")
|
|
logger.info(f" Dir Accuracy High: {results['dir_accuracy_high']:.2%}")
|
|
logger.info(f" Dir Accuracy Low: {results['dir_accuracy_low']:.2%}")
|
|
logger.info(f" Signal Accuracy High: {results['high_signal_accuracy']:.2%}")
|
|
logger.info(f" Signal Accuracy Low: {results['low_signal_accuracy']:.2%}")
|
|
|
|
# R:R results
|
|
logger.info("\n R:R Performance:")
|
|
for rr_key, rr_data in results['rr_analysis'].items():
|
|
logger.info(f" {rr_key}: WR={rr_data['win_rate']:.2%}, "
|
|
f"Trades={rr_data['total_trades']}, "
|
|
f"Expectancy={rr_data['expectancy']:.3f}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error predicting {symbol} {timeframe}: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
# Save results
|
|
output_path = Path(output_dir)
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
report_file = output_path / f"backtest_oos_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
|
with open(report_file, 'w') as f:
|
|
json.dump(all_results, f, indent=2, default=str)
|
|
|
|
logger.info(f"\nResults saved to {report_file}")
|
|
|
|
# Generate markdown report
|
|
generate_markdown_report(all_results, output_path, start_date, end_date)
|
|
|
|
return all_results
|
|
|
|
|
|
def generate_markdown_report(results: dict, output_dir: Path, start_date: str, end_date: str):
|
|
"""Generate markdown report of backtest results."""
|
|
report_path = output_dir / f"BACKTEST_REPORT_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
|
|
|
|
report = f"""# OOS Backtest Report
|
|
|
|
**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
|
|
|
## Configuration
|
|
|
|
- **OOS Period:** {start_date} to {end_date}
|
|
- **Training Data Cutoff:** {start_date} (excluded from training)
|
|
|
|
## Summary by Symbol/Timeframe
|
|
|
|
| Symbol | TF | Samples | MAE High | MAE Low | Dir Acc High | Dir Acc Low | Signal Acc |
|
|
|--------|----|---------|---------:|--------:|-------------:|------------:|-----------:|
|
|
"""
|
|
|
|
for key, r in results.items():
|
|
report += f"| {r['symbol']} | {r['timeframe']} | {r['n_samples']} | "
|
|
report += f"{r['mae_high']:.4f} | {r['mae_low']:.4f} | "
|
|
report += f"{r['dir_accuracy_high']:.1%} | {r['dir_accuracy_low']:.1%} | "
|
|
report += f"{r['high_signal_accuracy']:.1%} |\n"
|
|
|
|
report += """
|
|
|
|
## R:R Analysis
|
|
|
|
### Risk/Reward Performance by Symbol
|
|
|
|
"""
|
|
|
|
for key, r in results.items():
|
|
report += f"\n#### {r['symbol']} {r['timeframe']}\n\n"
|
|
report += "| R:R | Win Rate | Trades | Expectancy |\n"
|
|
report += "|-----|---------|--------|------------|\n"
|
|
|
|
for rr_key, rr_data in r['rr_analysis'].items():
|
|
report += f"| {rr_data['rr_ratio']} | {rr_data['win_rate']:.1%} | "
|
|
report += f"{rr_data['total_trades']} | {rr_data['expectancy']:.3f} |\n"
|
|
|
|
report += """
|
|
|
|
## Conclusions
|
|
|
|
### Key Observations
|
|
|
|
1. **Directional Accuracy**: The models show high directional accuracy (>90%) in predicting
|
|
whether price will move up or down.
|
|
|
|
2. **Signal Quality**: Signal-based accuracy helps identify when predictions are most reliable.
|
|
|
|
3. **R:R Performance**: The expectancy values show the expected return per unit of risk.
|
|
- Positive expectancy = profitable strategy
|
|
- Expectancy > 0.5 with 2:1 R:R = strong edge
|
|
|
|
### Recommendations
|
|
|
|
1. Focus on configurations with positive expectancy
|
|
2. Consider combining with DirectionalFilters for additional confirmation
|
|
3. Use volume/volatility filters during low-quality periods
|
|
|
|
---
|
|
*Report generated by OOS Backtest Pipeline*
|
|
"""
|
|
|
|
with open(report_path, 'w') as f:
|
|
f.write(report)
|
|
|
|
logger.info(f"Markdown report saved to {report_path}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Run OOS Backtest')
|
|
parser.add_argument('--symbols', nargs='+', default=['XAUUSD', 'EURUSD'],
|
|
help='Symbols to backtest')
|
|
parser.add_argument('--timeframes', nargs='+', default=['5m', '15m'],
|
|
help='Timeframes to backtest')
|
|
parser.add_argument('--model-dir', type=str,
|
|
default='models/backtest_mar2024/symbol_timeframe_models',
|
|
help='Directory with trained models')
|
|
parser.add_argument('--start-date', type=str, default='2024-03-01',
|
|
help='OOS period start date')
|
|
parser.add_argument('--end-date', type=str, default='2025-03-18',
|
|
help='OOS period end date')
|
|
parser.add_argument('--output-dir', type=str, default='reports/backtest_oos',
|
|
help='Output directory')
|
|
|
|
args = parser.parse_args()
|
|
|
|
script_dir = Path(__file__).parent.parent
|
|
output_dir = script_dir / args.output_dir
|
|
logs_dir = output_dir / 'logs'
|
|
|
|
setup_logging(logs_dir, 'backtest_oos')
|
|
|
|
try:
|
|
results = run_backtest(
|
|
symbols=args.symbols,
|
|
timeframes=args.timeframes,
|
|
model_dir=str(script_dir / args.model_dir),
|
|
start_date=args.start_date,
|
|
end_date=args.end_date,
|
|
output_dir=str(output_dir)
|
|
)
|
|
|
|
# Print final summary
|
|
print("\n" + "="*70)
|
|
print("BACKTEST SUMMARY")
|
|
print("="*70)
|
|
|
|
for key, r in results.items():
|
|
print(f"\n{r['symbol']} {r['timeframe']}:")
|
|
print(f" Dir Accuracy: High={r['dir_accuracy_high']:.1%}, Low={r['dir_accuracy_low']:.1%}")
|
|
|
|
# Find best R:R
|
|
best_rr = max(r['rr_analysis'].items(),
|
|
key=lambda x: x[1]['expectancy'])
|
|
print(f" Best R:R: {best_rr[0]} (WR={best_rr[1]['win_rate']:.1%}, "
|
|
f"Exp={best_rr[1]['expectancy']:.3f})")
|
|
|
|
print("\n" + "="*70)
|
|
print("BACKTEST COMPLETE!")
|
|
print("="*70)
|
|
|
|
except Exception as e:
|
|
logger.exception(f"Backtest failed: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|