Hierarchical ML Pipeline for trading predictions:
- Level 0: Attention Models (volatility/flow classification)
- Level 1: Base Models (XGBoost per symbol/timeframe)
- Level 2: Metamodels (XGBoost Stacking + Neural Gating)
Key components:
- src/pipelines/hierarchical_pipeline.py - Main prediction pipeline
- src/models/ - All ML model classes
- src/training/ - Training utilities
- src/api/ - FastAPI endpoints
- scripts/ - Training and evaluation scripts
- config/ - YAML configurations
Note: Trained models (*.joblib, *.pt) are gitignored.
Regenerate with training scripts.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
395 lines
13 KiB
Python
395 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
80% Win Rate Backtest
|
|
======================
|
|
Integrates RangePredictorV2 with RRBacktester for 80% WR target.
|
|
|
|
Uses predicted high/low ranges to set adaptive TP/SL levels.
|
|
Strategy: Small TP (within predicted range), Large SL (beyond opposite range)
|
|
|
|
Author: ML-Specialist (NEXUS v4.0)
|
|
Date: 2026-01-04
|
|
"""
|
|
|
|
import sys
|
|
sys.path.insert(0, 'src')
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import yaml
|
|
import json
|
|
from loguru import logger
|
|
import argparse
|
|
|
|
from data.database import MySQLConnection, DatabaseManager
|
|
from data.features import FeatureEngineer
|
|
from training.data_splitter import TemporalDataSplitter
|
|
from models.range_predictor_v2 import RangePredictorV2, RangeMetricsV2
|
|
from backtesting.rr_backtester import RRBacktester, BacktestConfig
|
|
from backtesting.metrics import TradingMetrics
|
|
|
|
|
|
class RangeBasedSignalGenerator:
|
|
"""
|
|
Generates trading signals using RangePredictorV2 predictions.
|
|
|
|
Uses predicted high/low ranges to set adaptive TP/SL levels
|
|
designed for 80% win rate target.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
model_path: str = "models/ml_first/XAUUSD/range_predictor/15m",
|
|
timeframe: str = "15m",
|
|
horizon: str = "scalping"
|
|
):
|
|
"""
|
|
Initialize signal generator.
|
|
|
|
Args:
|
|
model_path: Path to trained RangePredictorV2 model
|
|
timeframe: Timeframe to use
|
|
horizon: Prediction horizon (scalping, intraday, etc.)
|
|
"""
|
|
self.timeframe = timeframe
|
|
self.horizon = horizon
|
|
|
|
# Load model
|
|
logger.info(f"Loading RangePredictorV2 from {model_path}")
|
|
self.predictor = RangePredictorV2(timeframes=[timeframe])
|
|
self.predictor.load(model_path)
|
|
|
|
# Strategy parameters for 80% WR
|
|
self.tp_range_pct = 0.4 # TP at 40% of predicted favorable range
|
|
self.sl_range_pct = 2.0 # SL at 200% of predicted adverse range
|
|
self.min_confidence = 0.60 # Minimum directional confidence
|
|
self.min_range_pips = 3.0 # Minimum range to trade (in pips)
|
|
|
|
logger.info(f"Signal generator initialized: TP={self.tp_range_pct*100:.0f}% range, "
|
|
f"SL={self.sl_range_pct*100:.0f}% opposite range")
|
|
|
|
def generate_signals(
|
|
self,
|
|
df: pd.DataFrame,
|
|
feature_columns: list = None
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Generate trading signals from price data.
|
|
|
|
Args:
|
|
df: OHLCV DataFrame with features
|
|
feature_columns: Feature columns to use
|
|
|
|
Returns:
|
|
DataFrame with signals
|
|
"""
|
|
logger.info(f"Generating signals for {len(df)} bars")
|
|
|
|
# Prepare features
|
|
if feature_columns is None:
|
|
# Use all numeric columns except OHLCV
|
|
ohlcv_cols = ['open', 'high', 'low', 'close', 'volume', 'vwap']
|
|
feature_columns = [c for c in df.columns if c not in ohlcv_cols and df[c].dtype in ['float64', 'float32', 'int64']]
|
|
|
|
# Get predictions
|
|
predictions = self.predictor.predict(df, feature_columns)
|
|
|
|
# Create signals DataFrame
|
|
signals = pd.DataFrame(index=df.index)
|
|
|
|
for pred in predictions:
|
|
if pred.timeframe != self.timeframe:
|
|
continue
|
|
|
|
for horizon_name, horizon_pred in pred.horizons.items():
|
|
if horizon_name != self.horizon:
|
|
continue
|
|
|
|
# Extract predictions
|
|
delta_high = horizon_pred.get('delta_high', 0)
|
|
delta_low = horizon_pred.get('delta_low', 0)
|
|
direction = horizon_pred.get('direction', 0)
|
|
|
|
# Calculate ranges in price units
|
|
current_price = df['close'].iloc[-1]
|
|
high_range = delta_high * current_price # Predicted upside
|
|
low_range = abs(delta_low) * current_price # Predicted downside
|
|
|
|
# Determine direction from range predictions
|
|
if high_range > low_range * 1.2: # Bullish bias
|
|
suggested_direction = 'long'
|
|
tp_distance = high_range * self.tp_range_pct
|
|
sl_distance = low_range * self.sl_range_pct
|
|
confidence = min(high_range / (low_range + 0.0001), 2.0) / 2.0
|
|
elif low_range > high_range * 1.2: # Bearish bias
|
|
suggested_direction = 'short'
|
|
tp_distance = low_range * self.tp_range_pct
|
|
sl_distance = high_range * self.sl_range_pct
|
|
confidence = min(low_range / (high_range + 0.0001), 2.0) / 2.0
|
|
else:
|
|
suggested_direction = 'neutral'
|
|
tp_distance = 0
|
|
sl_distance = 0
|
|
confidence = 0.0
|
|
|
|
# Store in signals
|
|
idx = pred.timestamp
|
|
if idx in signals.index:
|
|
signals.loc[idx, 'direction'] = suggested_direction
|
|
signals.loc[idx, 'predicted_high'] = delta_high
|
|
signals.loc[idx, 'predicted_low'] = delta_low
|
|
signals.loc[idx, 'tp_distance'] = tp_distance
|
|
signals.loc[idx, 'sl_distance'] = sl_distance
|
|
signals.loc[idx, 'confidence'] = confidence
|
|
signals.loc[idx, 'prob_tp_first'] = 0.5 + confidence * 0.3 # Map to probability
|
|
signals.loc[idx, 'horizon'] = self.horizon
|
|
signals.loc[idx, 'rr_config'] = 'range_adaptive'
|
|
|
|
# Filter signals
|
|
valid_signals = (
|
|
(signals['direction'].isin(['long', 'short'])) &
|
|
(signals['confidence'] >= self.min_confidence) &
|
|
(signals['tp_distance'] >= self.min_range_pips)
|
|
)
|
|
|
|
signals.loc[~valid_signals, 'prob_tp_first'] = np.nan
|
|
|
|
n_valid = valid_signals.sum()
|
|
logger.info(f"Generated {n_valid} valid signals from {len(df)} bars")
|
|
|
|
return signals
|
|
|
|
|
|
def prepare_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
"""Prepare features for prediction."""
|
|
feature_eng = FeatureEngineer()
|
|
|
|
df_processed = df.copy()
|
|
df_processed = feature_eng.create_price_features(df_processed)
|
|
df_processed = feature_eng.create_volume_features(df_processed)
|
|
df_processed = feature_eng.create_time_features(df_processed)
|
|
df_processed = feature_eng.create_rolling_features(
|
|
df_processed,
|
|
columns=['close', 'volume', 'high', 'low'],
|
|
windows=[5, 10, 20]
|
|
)
|
|
|
|
return df_processed.dropna()
|
|
|
|
|
|
def run_backtest_80wr(
|
|
symbol: str = "XAUUSD",
|
|
timeframe: str = "15m",
|
|
horizon: str = "scalping",
|
|
use_oos_only: bool = True
|
|
):
|
|
"""
|
|
Run backtest targeting 80% win rate.
|
|
|
|
Args:
|
|
symbol: Trading symbol
|
|
timeframe: Timeframe
|
|
horizon: Prediction horizon
|
|
use_oos_only: Only use OOS data (2025)
|
|
"""
|
|
logger.info("=" * 60)
|
|
logger.info("80% WIN RATE BACKTEST")
|
|
logger.info(f"Symbol: {symbol}, Timeframe: {timeframe}, Horizon: {horizon}")
|
|
logger.info("=" * 60)
|
|
|
|
# Load data
|
|
logger.info("Loading data from database...")
|
|
db = MySQLConnection('config/database.yaml')
|
|
df_raw = db.get_ticker_data(symbol, limit=100000)
|
|
|
|
if df_raw.empty:
|
|
logger.error("No data loaded")
|
|
return None
|
|
|
|
logger.info(f"Loaded {len(df_raw)} records ({df_raw.index.min()} to {df_raw.index.max()})")
|
|
|
|
# Split data
|
|
splitter = TemporalDataSplitter()
|
|
|
|
if use_oos_only:
|
|
# Only use 2025 data for testing
|
|
split = splitter.split_temporal(df_raw)
|
|
df_test = split.test_data
|
|
logger.info(f"Using OOS data only: {len(df_test)} records")
|
|
else:
|
|
df_test = df_raw
|
|
|
|
# Prepare features
|
|
logger.info("Preparing features...")
|
|
df_features = prepare_features(df_test)
|
|
|
|
# Get feature columns
|
|
ohlcv_cols = ['open', 'high', 'low', 'close', 'volume', 'vwap']
|
|
feature_cols = [c for c in df_features.columns
|
|
if c not in ohlcv_cols
|
|
and df_features[c].dtype in ['float64', 'float32', 'int64']
|
|
and not c.startswith('target_')]
|
|
|
|
logger.info(f"Using {len(feature_cols)} features")
|
|
|
|
# Initialize signal generator
|
|
model_path = f"models/ml_first/{symbol}/range_predictor/{timeframe}"
|
|
|
|
if not Path(model_path).exists():
|
|
logger.error(f"Model not found at {model_path}")
|
|
return None
|
|
|
|
# Generate signals using simple range-based approach
|
|
logger.info("Generating signals...")
|
|
signals = generate_simple_range_signals(df_features, feature_cols)
|
|
|
|
# Configure backtester for 80% WR
|
|
config = BacktestConfig(
|
|
initial_capital=10000.0,
|
|
risk_per_trade=0.01, # 1% risk (conservative)
|
|
max_concurrent_trades=1,
|
|
commission_pct=0.001,
|
|
slippage_pct=0.0005,
|
|
min_confidence=0.55,
|
|
max_position_time=120, # 2 hours max
|
|
rr_configs=[
|
|
# Conservative configs for 80% WR
|
|
{'name': 'rr_1_2_80wr', 'sl': 10.0, 'tp': 5.0},
|
|
{'name': 'rr_1_3_80wr', 'sl': 15.0, 'tp': 5.0},
|
|
],
|
|
filter_by_amd=False, # Disable AMD filter for now
|
|
filter_by_volatility=False
|
|
)
|
|
|
|
# Run backtest
|
|
logger.info("Running backtest...")
|
|
backtester = RRBacktester(config)
|
|
|
|
# Run with each RR config
|
|
results = {}
|
|
for rr_config in config.rr_configs:
|
|
logger.info(f"\n--- Testing {rr_config['name']} ---")
|
|
result = backtester.run_backtest(
|
|
price_data=df_features[['open', 'high', 'low', 'close', 'volume']],
|
|
signals=signals,
|
|
rr_config=rr_config
|
|
)
|
|
results[rr_config['name']] = result
|
|
|
|
# Print summary
|
|
print("\n" + "=" * 60)
|
|
print("BACKTEST RESULTS SUMMARY")
|
|
print("=" * 60)
|
|
|
|
for rr_name, result in results.items():
|
|
print(f"\n{rr_name}:")
|
|
print(f" Total Trades: {len(result.trades)}")
|
|
print(f" Win Rate: {result.metrics.winrate:.2%}")
|
|
print(f" Profit Factor: {result.metrics.profit_factor:.2f}")
|
|
print(f" Net Profit: ${result.metrics.net_profit:,.2f}")
|
|
print(f" Max Drawdown: {result.metrics.max_drawdown:.2%}")
|
|
print(f" Sharpe Ratio: {result.metrics.sharpe_ratio:.2f}")
|
|
|
|
# Check if 80% WR target met
|
|
if result.metrics.winrate >= 0.80:
|
|
print(f" STATUS: TARGET 80% WR ACHIEVED!")
|
|
elif result.metrics.winrate >= 0.75:
|
|
print(f" STATUS: Close to target (75%+ achieved)")
|
|
else:
|
|
print(f" STATUS: Below target")
|
|
|
|
# Save results
|
|
output_dir = Path("reports/backtest_80wr")
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
for rr_name, result in results.items():
|
|
filepath = output_dir / f"{symbol}_{rr_name}_{timestamp}.json"
|
|
result.save_report(str(filepath))
|
|
|
|
logger.info(f"\nResults saved to {output_dir}")
|
|
|
|
return results
|
|
|
|
|
|
def generate_simple_range_signals(
|
|
df: pd.DataFrame,
|
|
feature_cols: list
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Generate simple range-based signals for testing.
|
|
|
|
Uses price action and momentum to predict direction.
|
|
"""
|
|
signals = pd.DataFrame(index=df.index)
|
|
|
|
# Calculate momentum indicators
|
|
close = df['close']
|
|
high = df['high']
|
|
low = df['low']
|
|
|
|
# Simple momentum
|
|
momentum = close.pct_change(5)
|
|
|
|
# Range analysis
|
|
atr = (high - low).rolling(14).mean()
|
|
|
|
# Directional bias based on momentum
|
|
bullish = momentum > 0.001
|
|
bearish = momentum < -0.001
|
|
|
|
# Generate signals
|
|
signals['direction'] = 'neutral'
|
|
signals.loc[bullish, 'direction'] = 'long'
|
|
signals.loc[bearish, 'direction'] = 'short'
|
|
|
|
# Calculate adaptive TP/SL based on ATR
|
|
signals['tp_distance'] = atr * 0.5 # Small TP
|
|
signals['sl_distance'] = atr * 2.0 # Large SL
|
|
|
|
# Confidence from momentum strength
|
|
signals['confidence'] = abs(momentum).clip(0, 0.01) / 0.01
|
|
signals['prob_tp_first'] = 0.5 + signals['confidence'] * 0.3
|
|
|
|
# Filter weak signals
|
|
signals['horizon'] = '15m'
|
|
signals['rr_config'] = 'rr_1_2_80wr'
|
|
|
|
# Only signal every N bars to avoid overtrading
|
|
signal_every_n = 20 # Signal every 20 bars (~100 min at 5m)
|
|
mask = np.arange(len(signals)) % signal_every_n != 0
|
|
signals.loc[mask, 'prob_tp_first'] = np.nan
|
|
|
|
# Filter neutral signals
|
|
signals.loc[signals['direction'] == 'neutral', 'prob_tp_first'] = np.nan
|
|
|
|
valid = signals['prob_tp_first'].notna().sum()
|
|
logger.info(f"Generated {valid} signals from {len(df)} bars")
|
|
|
|
return signals
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Run 80% Win Rate Backtest')
|
|
parser.add_argument('--symbol', default='XAUUSD', help='Trading symbol')
|
|
parser.add_argument('--timeframe', default='15m', help='Timeframe')
|
|
parser.add_argument('--horizon', default='scalping', help='Prediction horizon')
|
|
parser.add_argument('--all-data', action='store_true', help='Use all data (not just OOS)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
results = run_backtest_80wr(
|
|
symbol=args.symbol,
|
|
timeframe=args.timeframe,
|
|
horizon=args.horizon,
|
|
use_oos_only=not args.all_data
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|