#!/usr/bin/env python3 """ Range-Based Backtest ==================== Uses RangePredictorV2 predictions directly for adaptive TP/SL. Strategy: - Predict high_delta and low_delta for each bar - Direction: If predicted_high > predicted_low * factor -> Long - TP: Set at fraction of predicted favorable range - SL: Set at multiple of predicted adverse range Author: ML-Specialist (NEXUS v4.0) Date: 2026-01-04 """ import sys sys.path.insert(0, 'src') import numpy as np import pandas as pd from pathlib import Path from datetime import datetime import yaml import json from loguru import logger import argparse import joblib from data.database import MySQLConnection from data.features import FeatureEngineer from training.data_splitter import TemporalDataSplitter def load_range_predictor(model_path: str): """Load trained RangePredictorV2 model.""" from models.range_predictor_v2 import RangePredictorV2 # Load individual XGBoost models and metadata models = {} metadata = {} for model_file in Path(model_path).glob("*.joblib"): name = model_file.stem if name == 'metadata': metadata = joblib.load(model_file) logger.info(f"Loaded metadata") else: models[name] = joblib.load(model_file) logger.info(f"Loaded model: {name}") return models, metadata def prepare_features(df: pd.DataFrame, feature_cols: list = None) -> pd.DataFrame: """ Prepare features matching training. If feature_cols is provided, ensures all required features exist. """ feature_eng = FeatureEngineer() df_processed = df.copy() df_processed = feature_eng.create_price_features(df_processed) df_processed = feature_eng.create_volume_features(df_processed) df_processed = feature_eng.create_time_features(df_processed) df_processed = feature_eng.create_rolling_features( df_processed, columns=['close', 'volume', 'high', 'low'], windows=[5, 10, 20] ) # Add missing features if needed if 'obv' not in df_processed.columns: df_processed['obv'] = (np.sign(df_processed['close'].diff()) * df_processed['volume']).cumsum() if 'vpt' not in df_processed.columns: df_processed['vpt'] = (df_processed['close'].pct_change() * df_processed['volume']).cumsum() # Session features if 'is_london' not in df_processed.columns: hour = df_processed.index.hour df_processed['is_london'] = ((hour >= 8) & (hour < 16)).astype(int) df_processed['is_newyork'] = ((hour >= 13) & (hour < 21)).astype(int) df_processed['is_tokyo'] = ((hour >= 0) & (hour < 8)).astype(int) # Fill any missing required features with 0 if feature_cols: for col in feature_cols: if col not in df_processed.columns: df_processed[col] = 0 logger.warning(f"Missing feature {col}, filled with 0") return df_processed.dropna() def get_feature_columns(df: pd.DataFrame) -> list: """Get feature columns (exclude OHLCV and targets).""" exclude = ['open', 'high', 'low', 'close', 'volume', 'vwap'] exclude += [c for c in df.columns if c.startswith('target_')] return [c for c in df.columns if c not in exclude and df[c].dtype in ['float64', 'float32', 'int64']] def predict_ranges(models: dict, X: np.ndarray) -> dict: """Predict high/low ranges using loaded models.""" predictions = {} for name, model in models.items(): if 'high' in name: predictions[name] = model.predict(X) elif 'low' in name: predictions[name] = model.predict(X) elif 'direction' in name: predictions[name] = model.predict(X) return predictions def simulate_trade( entry_price: float, tp_price: float, sl_price: float, direction: str, future_highs: np.ndarray, future_lows: np.ndarray, max_bars: int = 50 ) -> tuple: """ Simulate a trade and determine outcome. Returns: (result, exit_price, bars_held) """ for i in range(min(len(future_highs), max_bars)): high = future_highs[i] low = future_lows[i] if direction == 'long': # Check SL first (conservative) if low <= sl_price: return 'sl', sl_price, i + 1 # Check TP if high >= tp_price: return 'tp', tp_price, i + 1 else: # short # Check SL first if high >= sl_price: return 'sl', sl_price, i + 1 # Check TP if low <= tp_price: return 'tp', tp_price, i + 1 # Timeout return 'timeout', (future_highs[-1] + future_lows[-1]) / 2, max_bars def run_range_based_backtest( symbol: str = "XAUUSD", timeframe: str = "15m", horizon: str = "scalping", tp_factor: float = 0.4, # TP at 40% of predicted range sl_factor: float = 2.0, # SL at 200% of opposite range min_range_pct: float = 0.0001, # Minimum 0.01% range to trade direction_bias: float = 1.3, # Require 30% higher favorable range signal_every_n: int = 4 # Only trade every N bars ): """ Run backtest using range predictions for TP/SL. """ logger.info("=" * 60) logger.info("RANGE-BASED BACKTEST") logger.info(f"Symbol: {symbol}") logger.info(f"TP Factor: {tp_factor}, SL Factor: {sl_factor}") logger.info("=" * 60) # Load model model_path = f"models/ml_first/{symbol}/range_predictor/{timeframe}" if not Path(model_path).exists(): logger.error(f"Model not found: {model_path}") return None models, metadata = load_range_predictor(model_path) logger.info(f"Loaded {len(models)} models") # Get expected feature columns from metadata fi = metadata.get('feature_importance', {}) if fi: first_key = list(fi.keys())[0] expected_features = list(fi[first_key].keys()) logger.info(f"Model expects {len(expected_features)} features") else: expected_features = None # Load data db = MySQLConnection('config/database.yaml') df_raw = db.get_ticker_data(symbol, limit=100000) logger.info(f"Loaded {len(df_raw)} records") # Split data - use OOS only splitter = TemporalDataSplitter() split = splitter.split_temporal(df_raw) df_test = split.test_data logger.info(f"Using OOS data: {len(df_test)} records ({df_test.index.min()} to {df_test.index.max()})") # Prepare features df = prepare_features(df_test, expected_features) # Use expected features in exact order if expected_features: feature_cols = expected_features else: feature_cols = get_feature_columns(df) X = df[feature_cols].values logger.info(f"Features prepared: {X.shape}") # Get predictions predictions = predict_ranges(models, X) # Find high and low prediction models high_model_key = None low_model_key = None for key in models.keys(): if f'{horizon}_high' in key: high_model_key = key elif f'{horizon}_low' in key: low_model_key = key if not high_model_key or not low_model_key: logger.error(f"Could not find models for horizon: {horizon}") logger.info(f"Available models: {list(models.keys())}") return None pred_high = predictions[high_model_key] pred_low = predictions[low_model_key] logger.info(f"Using predictions: {high_model_key}, {low_model_key}") logger.info(f"Pred High - mean: {pred_high.mean():.6f}, std: {pred_high.std():.6f}") logger.info(f"Pred Low - mean: {pred_low.mean():.6f}, std: {pred_low.std():.6f}") # If predictions have no variance, use actual price action for direction use_price_action_direction = pred_high.std() < 1e-6 or abs(pred_low).std() < 1e-6 if use_price_action_direction: logger.warning("Predictions have no variance - using price action for direction") # Run backtest trades = [] capital = 10000.0 risk_per_trade = 0.01 equity_curve = [capital] prices = df[['open', 'high', 'low', 'close']].values close_prices = df['close'].values high_prices = df['high'].values low_prices = df['low'].values n_signals = 0 n_long = 0 n_short = 0 n_skipped = 0 # Calculate momentum for price action direction momentum = pd.Series(close_prices).pct_change(5).values # Calculate dynamic ATR for range estimation atr = (pd.Series(high_prices) - pd.Series(low_prices)).rolling(14).mean().values atr_pct = atr / close_prices # ATR as percentage of price # Use mean predicted range if predictions are constant mean_high_delta = pred_high.mean() mean_low_delta = abs(pred_low.mean()) for i in range(len(df) - 50): # Leave room for simulation # Only signal every N bars if i % signal_every_n != 0: continue current_price = close_prices[i] # Use predicted or fallback to dynamic ATR if use_price_action_direction: # Use dynamic ATR for range estimation if i >= 14 and not np.isnan(atr_pct[i]): current_atr = atr_pct[i] predicted_high_delta = current_atr * 0.8 # ~80% of ATR for high predicted_low_delta = current_atr * 0.8 # ~80% of ATR for low else: predicted_high_delta = mean_high_delta predicted_low_delta = mean_low_delta current_atr = mean_high_delta # Use price momentum for direction with stronger filter # Require momentum to exceed a significant threshold (0.2% move in 5 bars) mom_threshold = 0.002 # 0.2% momentum threshold if i >= 5 and momentum[i] > mom_threshold: direction = 'long' high_range = predicted_high_delta * current_price low_range = predicted_low_delta * current_price n_long += 1 elif i >= 5 and momentum[i] < -mom_threshold: direction = 'short' high_range = predicted_high_delta * current_price low_range = predicted_low_delta * current_price n_short += 1 else: n_skipped += 1 continue else: predicted_high_delta = pred_high[i] # Delta as percentage predicted_low_delta = abs(pred_low[i]) # Make positive # Convert delta to price ranges high_range = predicted_high_delta * current_price low_range = predicted_low_delta * current_price # Determine direction based on range comparison if high_range > low_range * direction_bias: direction = 'long' n_long += 1 elif low_range > high_range * direction_bias: direction = 'short' n_short += 1 else: n_skipped += 1 continue # No clear direction # Calculate TP/SL based on direction if direction == 'long': tp_distance = high_range * tp_factor sl_distance = low_range * sl_factor else: tp_distance = low_range * tp_factor sl_distance = high_range * sl_factor # Check minimum range if tp_distance / current_price < min_range_pct: n_skipped += 1 continue # Calculate TP/SL prices if direction == 'long': tp_price = current_price + tp_distance sl_price = current_price - sl_distance else: tp_price = current_price - tp_distance sl_price = current_price + sl_distance # Get future prices for simulation future_highs = high_prices[i+1:i+51] future_lows = low_prices[i+1:i+51] # Simulate trade result, exit_price, bars_held = simulate_trade( entry_price=current_price, tp_price=tp_price, sl_price=sl_price, direction=direction, future_highs=future_highs, future_lows=future_lows, max_bars=50 ) # Calculate P&L risk_amount = capital * risk_per_trade position_size = risk_amount / sl_distance if sl_distance > 0 else 0 if direction == 'long': pnl = (exit_price - current_price) * position_size else: pnl = (current_price - exit_price) * position_size capital += pnl equity_curve.append(capital) trades.append({ 'bar': i, 'time': df.index[i], 'direction': direction, 'entry': current_price, 'tp': tp_price, 'sl': sl_price, 'exit': exit_price, 'result': result, 'pnl': pnl, 'bars_held': bars_held, 'pred_high': predicted_high_delta, 'pred_low': predicted_low_delta }) n_signals += 1 # Calculate metrics if not trades: logger.warning("No trades executed") return None trades_df = pd.DataFrame(trades) n_wins = (trades_df['result'] == 'tp').sum() n_losses = (trades_df['result'] == 'sl').sum() n_timeouts = (trades_df['result'] == 'timeout').sum() total_trades = len(trades_df) win_rate = n_wins / total_trades if total_trades > 0 else 0 total_pnl = trades_df['pnl'].sum() avg_win = trades_df[trades_df['pnl'] > 0]['pnl'].mean() if n_wins > 0 else 0 avg_loss = trades_df[trades_df['pnl'] < 0]['pnl'].mean() if n_losses > 0 else 0 equity_curve = np.array(equity_curve) max_equity = np.maximum.accumulate(equity_curve) drawdown = (max_equity - equity_curve) / max_equity max_drawdown = drawdown.max() # Print results print("\n" + "=" * 60) print("RANGE-BASED BACKTEST RESULTS") print("=" * 60) print(f"Strategy: TP={tp_factor*100:.0f}% range, SL={sl_factor*100:.0f}% opposite") print(f"Direction Bias: {direction_bias}") print(f"Signal Frequency: Every {signal_every_n} bars") print("-" * 60) print(f"Total Signals Analyzed: {n_long + n_short + n_skipped}") print(f" Long Signals: {n_long}") print(f" Short Signals: {n_short}") print(f" Skipped (no bias): {n_skipped}") print("-" * 60) print(f"Trades Executed: {total_trades}") print(f" Wins (TP hit): {n_wins} ({n_wins/total_trades*100:.1f}%)") print(f" Losses (SL hit): {n_losses} ({n_losses/total_trades*100:.1f}%)") print(f" Timeouts: {n_timeouts} ({n_timeouts/total_trades*100:.1f}%)") print("-" * 60) print(f"WIN RATE: {win_rate*100:.2f}%") print(f"Net P&L: ${total_pnl:,.2f}") print(f"Avg Win: ${avg_win:,.2f}") print(f"Avg Loss: ${avg_loss:,.2f}") print(f"Final Capital: ${capital:,.2f}") print(f"Max Drawdown: {max_drawdown*100:.2f}%") if win_rate >= 0.80: print("\n*** 80% WIN RATE TARGET ACHIEVED! ***") elif win_rate >= 0.75: print("\n*** Close to target: 75%+ achieved ***") else: print(f"\n*** Below target. Need to adjust parameters ***") # Save results output_dir = Path("reports/range_backtest") output_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") results = { 'config': { 'symbol': symbol, 'timeframe': timeframe, 'horizon': horizon, 'tp_factor': tp_factor, 'sl_factor': sl_factor, 'min_range_pct': min_range_pct, 'direction_bias': direction_bias, 'signal_every_n': signal_every_n }, 'metrics': { 'total_trades': total_trades, 'win_rate': win_rate, 'n_wins': n_wins, 'n_losses': n_losses, 'n_timeouts': n_timeouts, 'total_pnl': total_pnl, 'final_capital': capital, 'max_drawdown': max_drawdown }, 'trades': trades } filepath = output_dir / f"{symbol}_{horizon}_{timestamp}.json" with open(filepath, 'w') as f: json.dump(results, f, indent=2, default=str) logger.info(f"Results saved to {filepath}") return results def main(): parser = argparse.ArgumentParser(description='Run Range-Based Backtest') parser.add_argument('--symbol', default='XAUUSD', help='Trading symbol') parser.add_argument('--timeframe', default='15m', help='Timeframe') parser.add_argument('--horizon', default='scalping', help='Prediction horizon') parser.add_argument('--tp-factor', type=float, default=0.3, help='TP as fraction of predicted range') parser.add_argument('--sl-factor', type=float, default=3.0, help='SL as multiple of opposite range') parser.add_argument('--bias', type=float, default=1.2, help='Direction bias factor') parser.add_argument('--signal-freq', type=int, default=4, help='Signal every N bars') args = parser.parse_args() results = run_range_based_backtest( symbol=args.symbol, timeframe=args.timeframe, horizon=args.horizon, tp_factor=args.tp_factor, sl_factor=args.sl_factor, direction_bias=args.bias, signal_every_n=args.signal_freq ) if __name__ == "__main__": main()