trading-platform-ml-engine-v2/scripts/train_neural_gating_simple.py
rckrdmrd 75c4d07690 feat: Initial commit - ML Engine codebase
Hierarchical ML Pipeline for trading predictions:
- Level 0: Attention Models (volatility/flow classification)
- Level 1: Base Models (XGBoost per symbol/timeframe)
- Level 2: Metamodels (XGBoost Stacking + Neural Gating)

Key components:
- src/pipelines/hierarchical_pipeline.py - Main prediction pipeline
- src/models/ - All ML model classes
- src/training/ - Training utilities
- src/api/ - FastAPI endpoints
- scripts/ - Training and evaluation scripts
- config/ - YAML configurations

Note: Trained models (*.joblib, *.pt) are gitignored.
      Regenerate with training scripts.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 04:27:40 -06:00

314 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Simple Neural Gating Training Script.
Uses the existing HierarchicalPipeline to generate training data
and trains the Neural Gating metamodel as an alternative to XGBoost.
Usage:
python scripts/train_neural_gating_simple.py --symbol XAUUSD
"""
import sys
import os
from pathlib import Path
# Add both root and src directories to path
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, root_dir)
sys.path.insert(0, os.path.join(root_dir, 'src'))
import argparse
import numpy as np
import pandas as pd
from datetime import datetime
from loguru import logger
import joblib
# Configure logging
logger.remove()
logger.add(sys.stdout, level="INFO", format="{time:HH:mm:ss} | {level} | {message}")
def load_ohlcv_data(symbol: str, start_date: str, end_date: str, timeframe: str = '15m'):
"""Load OHLCV data from database."""
from data.database import MySQLConnection
# Map symbol to database ticker format
ticker_map = {
'XAUUSD': 'C:XAUUSD',
'EURUSD': 'C:EURUSD',
'GBPUSD': 'C:GBPUSD',
'USDJPY': 'C:USDJPY',
'BTCUSD': 'X:BTCUSD'
}
ticker = ticker_map.get(symbol, f'C:{symbol}')
db = MySQLConnection()
query = f"""
SELECT date_agg as timestamp, open, high, low, close, volume
FROM tickers_agg_data
WHERE ticker = '{ticker}'
AND date_agg >= '{start_date}' AND date_agg <= '{end_date}'
ORDER BY date_agg ASC
"""
df = pd.read_sql(query, db.engine)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)
# Resample
agg_dict = {
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum'
}
if timeframe == '5m':
df = df.resample('5min').agg(agg_dict).dropna()
elif timeframe == '15m':
df = df.resample('15min').agg(agg_dict).dropna()
return df.reset_index()
def generate_training_data(symbol: str):
"""Generate training data using HierarchicalPipeline."""
from src.pipelines.hierarchical_pipeline import HierarchicalPipeline, PipelineConfig
logger.info(f"Generating training data for {symbol}...")
# Initialize pipeline
config = PipelineConfig(
attention_model_path='models/attention',
base_model_path='models/symbol_timeframe_models',
metamodel_path='models/metamodels'
)
pipeline = HierarchicalPipeline(config)
if not pipeline.load_models(symbol):
raise ValueError(f"Failed to load models for {symbol}")
# Load OOS data (Jan 2024 - Aug 2024)
df_5m = load_ohlcv_data(symbol, '2024-01-01', '2024-08-31', '5m')
df_15m = load_ohlcv_data(symbol, '2024-01-01', '2024-08-31', '15m')
logger.info(f"Loaded data: 5m={len(df_5m)}, 15m={len(df_15m)}")
# Generate predictions and extract meta features
meta_features_list = []
targets_high = []
targets_low = []
# Process in batches to avoid memory issues
batch_size = 100
lookback = 200 # Features require lookback
for i in range(lookback, len(df_15m) - 1, batch_size):
batch_end = min(i + batch_size, len(df_15m) - 1)
for j in range(i, batch_end):
# Get feature windows
df_15m_window = df_15m.iloc[j-lookback:j+1].copy()
df_5m_idx = j * 3 # Approximate 5m index
if df_5m_idx + 1 >= len(df_5m):
continue
df_5m_window = df_5m.iloc[max(0, df_5m_idx-lookback*3):df_5m_idx+1].copy()
if len(df_5m_window) < 50 or len(df_15m_window) < 50:
continue
try:
# Generate features using pipeline's internal method
features_5m = pipeline._generate_features(df_5m_window)
features_15m = pipeline._generate_features(df_15m_window)
if features_5m is None or features_15m is None:
continue
# Get attention scores
att_5m, att_class_5m = pipeline.attention_models[f'{symbol}_5m'].predict_single(features_5m)
att_15m, att_class_15m = pipeline.attention_models[f'{symbol}_15m'].predict_single(features_15m)
# Get base predictions
base_feat_5m = np.concatenate([features_5m, [att_5m, att_class_5m]])
base_feat_15m = np.concatenate([features_15m, [att_15m, att_class_15m]])
pred_high_5m = pipeline.base_models[f'{symbol}_5m_high'].predict(base_feat_5m.reshape(1, -1))[0]
pred_low_5m = pipeline.base_models[f'{symbol}_5m_low'].predict(base_feat_5m.reshape(1, -1))[0]
pred_high_15m = pipeline.base_models[f'{symbol}_15m_high'].predict(base_feat_15m.reshape(1, -1))[0]
pred_low_15m = pipeline.base_models[f'{symbol}_15m_low'].predict(base_feat_15m.reshape(1, -1))[0]
# Context features
atr = df_15m_window['high'].iloc[-50:].values - df_15m_window['low'].iloc[-50:].values
atr_ratio = atr[-1] / np.median(atr) if np.median(atr) > 0 else 1.0
vol = df_15m_window['volume'].iloc[-20:].values
volume_z = (vol[-1] - np.mean(vol)) / (np.std(vol) + 1e-8)
# Meta features
meta_features_list.append({
'pred_high_5m': pred_high_5m,
'pred_low_5m': pred_low_5m,
'pred_high_15m': pred_high_15m,
'pred_low_15m': pred_low_15m,
'attention_5m': att_5m,
'attention_15m': att_15m,
'attention_class_5m': att_class_5m,
'attention_class_15m': att_class_15m,
'ATR_ratio': atr_ratio,
'volume_z': volume_z
})
# Targets (actual movement in next bar)
if j + 1 < len(df_15m):
next_bar = df_15m.iloc[j + 1]
current_close = df_15m.iloc[j]['close']
targets_high.append(next_bar['high'] - current_close)
targets_low.append(current_close - next_bar['low'])
else:
targets_high.append(np.nan)
targets_low.append(np.nan)
except Exception as e:
continue
if len(meta_features_list) % 500 == 0:
logger.info(f" Processed {len(meta_features_list)} samples...")
# Convert to arrays
meta_features = pd.DataFrame(meta_features_list)
target_high = np.array(targets_high[:len(meta_features)])
target_low = np.array(targets_low[:len(meta_features)])
# Remove NaN
valid_mask = ~np.isnan(target_high) & ~np.isnan(target_low)
meta_features = meta_features[valid_mask]
target_high = target_high[valid_mask]
target_low = target_low[valid_mask]
# Ensure non-negative targets
target_high = np.maximum(target_high, 0)
target_low = np.maximum(target_low, 0)
logger.info(f"Generated {len(meta_features)} training samples")
return meta_features, target_high, target_low
def main():
parser = argparse.ArgumentParser(description='Train Neural Gating Metamodel')
parser.add_argument('--symbol', type=str, default='XAUUSD', help='Symbol to train')
parser.add_argument('--epochs', type=int, default=50, help='Training epochs')
parser.add_argument('--compare', action='store_true', help='Compare with XGBoost')
args = parser.parse_args()
symbol = args.symbol
output_dir = Path('models/metamodels_neural')
output_dir.mkdir(parents=True, exist_ok=True)
logger.info("=" * 60)
logger.info(f"NEURAL GATING TRAINING - {symbol}")
logger.info("=" * 60)
# Check PyTorch
try:
import torch
logger.info(f"PyTorch: {torch.__version__}, CUDA: {torch.cuda.is_available()}")
except ImportError:
logger.error("PyTorch required!")
return
from src.models.neural_gating_metamodel import (
NeuralGatingMetamodelWrapper,
NeuralGatingConfig
)
# Check for cached training data
cache_path = output_dir / f'{symbol}_training_cache.joblib'
if cache_path.exists():
logger.info(f"Loading cached training data from {cache_path}")
cache = joblib.load(cache_path)
meta_features = cache['meta_features']
target_high = cache['target_high']
target_low = cache['target_low']
else:
# Generate training data
meta_features, target_high, target_low = generate_training_data(symbol)
# Cache for future use
joblib.dump({
'meta_features': meta_features,
'target_high': target_high,
'target_low': target_low
}, cache_path)
logger.info(f"Cached training data to {cache_path}")
logger.info(f"Training samples: {len(meta_features)}")
# Configure and train
config = NeuralGatingConfig(
epochs=args.epochs,
early_stopping_patience=10,
learning_rate=0.001,
batch_size=256,
gating_hidden_dims=[32, 16],
residual_hidden_dims=[64, 32],
confidence_hidden_dims=[32, 16],
dropout=0.2
)
model = NeuralGatingMetamodelWrapper(symbol, config)
model.fit(meta_features, target_high, target_low)
# Save
model_path = output_dir / symbol
model.save(str(model_path))
summary = model.get_training_summary()
# Compare with XGBoost
if args.compare:
from src.models.asset_metamodel import AssetMetamodel
xgb_path = Path(f'models/metamodels/{symbol}')
if xgb_path.exists():
xgb_model = AssetMetamodel.load(str(xgb_path))
xgb_summary = xgb_model.get_training_summary()
logger.info(f"\n{'='*60}")
logger.info("COMPARISON: Neural Gating vs XGBoost")
logger.info(f"{'='*60}")
neural = summary['metrics']
xgb = xgb_summary['metrics']
logger.info(f"{'Metric':<25} {'Neural':<15} {'XGBoost':<15}")
logger.info("-" * 55)
neural_mae = (neural['mae_high'] + neural['mae_low']) / 2
xgb_mae = (xgb['mae_high'] + xgb['mae_low']) / 2
logger.info(f"{'MAE (avg)':<25} {neural_mae:<15.4f} {xgb_mae:<15.4f}")
neural_r2 = (neural['r2_high'] + neural['r2_low']) / 2
xgb_r2 = (xgb['r2_high'] + xgb['r2_low']) / 2
logger.info(f"{'R2 (avg)':<25} {neural_r2:<15.4f} {xgb_r2:<15.4f}")
logger.info(f"{'Alpha HIGH mean':<25} {neural['alpha_high_mean']:<15.3f} {'N/A':<15}")
logger.info(f"{'Alpha LOW mean':<25} {neural['alpha_low_mean']:<15.3f} {'N/A':<15}")
logger.info(f"\n{'='*60}")
logger.info("TRAINING COMPLETE")
logger.info(f"Model saved to: {model_path}")
logger.info(f"{'='*60}")
if __name__ == '__main__':
main()