#!/usr/bin/env python3 """ Simple Neural Gating Training Script. Uses the existing HierarchicalPipeline to generate training data and trains the Neural Gating metamodel as an alternative to XGBoost. Usage: python scripts/train_neural_gating_simple.py --symbol XAUUSD """ import sys import os from pathlib import Path # Add both root and src directories to path root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, root_dir) sys.path.insert(0, os.path.join(root_dir, 'src')) import argparse import numpy as np import pandas as pd from datetime import datetime from loguru import logger import joblib # Configure logging logger.remove() logger.add(sys.stdout, level="INFO", format="{time:HH:mm:ss} | {level} | {message}") def load_ohlcv_data(symbol: str, start_date: str, end_date: str, timeframe: str = '15m'): """Load OHLCV data from database.""" from data.database import MySQLConnection # Map symbol to database ticker format ticker_map = { 'XAUUSD': 'C:XAUUSD', 'EURUSD': 'C:EURUSD', 'GBPUSD': 'C:GBPUSD', 'USDJPY': 'C:USDJPY', 'BTCUSD': 'X:BTCUSD' } ticker = ticker_map.get(symbol, f'C:{symbol}') db = MySQLConnection() query = f""" SELECT date_agg as timestamp, open, high, low, close, volume FROM tickers_agg_data WHERE ticker = '{ticker}' AND date_agg >= '{start_date}' AND date_agg <= '{end_date}' ORDER BY date_agg ASC """ df = pd.read_sql(query, db.engine) df['timestamp'] = pd.to_datetime(df['timestamp']) df.set_index('timestamp', inplace=True) # Resample agg_dict = { 'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum' } if timeframe == '5m': df = df.resample('5min').agg(agg_dict).dropna() elif timeframe == '15m': df = df.resample('15min').agg(agg_dict).dropna() return df.reset_index() def generate_training_data(symbol: str): """Generate training data using HierarchicalPipeline.""" from src.pipelines.hierarchical_pipeline import HierarchicalPipeline, PipelineConfig logger.info(f"Generating training data for {symbol}...") # Initialize pipeline config = PipelineConfig( attention_model_path='models/attention', base_model_path='models/symbol_timeframe_models', metamodel_path='models/metamodels' ) pipeline = HierarchicalPipeline(config) if not pipeline.load_models(symbol): raise ValueError(f"Failed to load models for {symbol}") # Load OOS data (Jan 2024 - Aug 2024) df_5m = load_ohlcv_data(symbol, '2024-01-01', '2024-08-31', '5m') df_15m = load_ohlcv_data(symbol, '2024-01-01', '2024-08-31', '15m') logger.info(f"Loaded data: 5m={len(df_5m)}, 15m={len(df_15m)}") # Generate predictions and extract meta features meta_features_list = [] targets_high = [] targets_low = [] # Process in batches to avoid memory issues batch_size = 100 lookback = 200 # Features require lookback for i in range(lookback, len(df_15m) - 1, batch_size): batch_end = min(i + batch_size, len(df_15m) - 1) for j in range(i, batch_end): # Get feature windows df_15m_window = df_15m.iloc[j-lookback:j+1].copy() df_5m_idx = j * 3 # Approximate 5m index if df_5m_idx + 1 >= len(df_5m): continue df_5m_window = df_5m.iloc[max(0, df_5m_idx-lookback*3):df_5m_idx+1].copy() if len(df_5m_window) < 50 or len(df_15m_window) < 50: continue try: # Generate features using pipeline's internal method features_5m = pipeline._generate_features(df_5m_window) features_15m = pipeline._generate_features(df_15m_window) if features_5m is None or features_15m is None: continue # Get attention scores att_5m, att_class_5m = pipeline.attention_models[f'{symbol}_5m'].predict_single(features_5m) att_15m, att_class_15m = pipeline.attention_models[f'{symbol}_15m'].predict_single(features_15m) # Get base predictions base_feat_5m = np.concatenate([features_5m, [att_5m, att_class_5m]]) base_feat_15m = np.concatenate([features_15m, [att_15m, att_class_15m]]) pred_high_5m = pipeline.base_models[f'{symbol}_5m_high'].predict(base_feat_5m.reshape(1, -1))[0] pred_low_5m = pipeline.base_models[f'{symbol}_5m_low'].predict(base_feat_5m.reshape(1, -1))[0] pred_high_15m = pipeline.base_models[f'{symbol}_15m_high'].predict(base_feat_15m.reshape(1, -1))[0] pred_low_15m = pipeline.base_models[f'{symbol}_15m_low'].predict(base_feat_15m.reshape(1, -1))[0] # Context features atr = df_15m_window['high'].iloc[-50:].values - df_15m_window['low'].iloc[-50:].values atr_ratio = atr[-1] / np.median(atr) if np.median(atr) > 0 else 1.0 vol = df_15m_window['volume'].iloc[-20:].values volume_z = (vol[-1] - np.mean(vol)) / (np.std(vol) + 1e-8) # Meta features meta_features_list.append({ 'pred_high_5m': pred_high_5m, 'pred_low_5m': pred_low_5m, 'pred_high_15m': pred_high_15m, 'pred_low_15m': pred_low_15m, 'attention_5m': att_5m, 'attention_15m': att_15m, 'attention_class_5m': att_class_5m, 'attention_class_15m': att_class_15m, 'ATR_ratio': atr_ratio, 'volume_z': volume_z }) # Targets (actual movement in next bar) if j + 1 < len(df_15m): next_bar = df_15m.iloc[j + 1] current_close = df_15m.iloc[j]['close'] targets_high.append(next_bar['high'] - current_close) targets_low.append(current_close - next_bar['low']) else: targets_high.append(np.nan) targets_low.append(np.nan) except Exception as e: continue if len(meta_features_list) % 500 == 0: logger.info(f" Processed {len(meta_features_list)} samples...") # Convert to arrays meta_features = pd.DataFrame(meta_features_list) target_high = np.array(targets_high[:len(meta_features)]) target_low = np.array(targets_low[:len(meta_features)]) # Remove NaN valid_mask = ~np.isnan(target_high) & ~np.isnan(target_low) meta_features = meta_features[valid_mask] target_high = target_high[valid_mask] target_low = target_low[valid_mask] # Ensure non-negative targets target_high = np.maximum(target_high, 0) target_low = np.maximum(target_low, 0) logger.info(f"Generated {len(meta_features)} training samples") return meta_features, target_high, target_low def main(): parser = argparse.ArgumentParser(description='Train Neural Gating Metamodel') parser.add_argument('--symbol', type=str, default='XAUUSD', help='Symbol to train') parser.add_argument('--epochs', type=int, default=50, help='Training epochs') parser.add_argument('--compare', action='store_true', help='Compare with XGBoost') args = parser.parse_args() symbol = args.symbol output_dir = Path('models/metamodels_neural') output_dir.mkdir(parents=True, exist_ok=True) logger.info("=" * 60) logger.info(f"NEURAL GATING TRAINING - {symbol}") logger.info("=" * 60) # Check PyTorch try: import torch logger.info(f"PyTorch: {torch.__version__}, CUDA: {torch.cuda.is_available()}") except ImportError: logger.error("PyTorch required!") return from src.models.neural_gating_metamodel import ( NeuralGatingMetamodelWrapper, NeuralGatingConfig ) # Check for cached training data cache_path = output_dir / f'{symbol}_training_cache.joblib' if cache_path.exists(): logger.info(f"Loading cached training data from {cache_path}") cache = joblib.load(cache_path) meta_features = cache['meta_features'] target_high = cache['target_high'] target_low = cache['target_low'] else: # Generate training data meta_features, target_high, target_low = generate_training_data(symbol) # Cache for future use joblib.dump({ 'meta_features': meta_features, 'target_high': target_high, 'target_low': target_low }, cache_path) logger.info(f"Cached training data to {cache_path}") logger.info(f"Training samples: {len(meta_features)}") # Configure and train config = NeuralGatingConfig( epochs=args.epochs, early_stopping_patience=10, learning_rate=0.001, batch_size=256, gating_hidden_dims=[32, 16], residual_hidden_dims=[64, 32], confidence_hidden_dims=[32, 16], dropout=0.2 ) model = NeuralGatingMetamodelWrapper(symbol, config) model.fit(meta_features, target_high, target_low) # Save model_path = output_dir / symbol model.save(str(model_path)) summary = model.get_training_summary() # Compare with XGBoost if args.compare: from src.models.asset_metamodel import AssetMetamodel xgb_path = Path(f'models/metamodels/{symbol}') if xgb_path.exists(): xgb_model = AssetMetamodel.load(str(xgb_path)) xgb_summary = xgb_model.get_training_summary() logger.info(f"\n{'='*60}") logger.info("COMPARISON: Neural Gating vs XGBoost") logger.info(f"{'='*60}") neural = summary['metrics'] xgb = xgb_summary['metrics'] logger.info(f"{'Metric':<25} {'Neural':<15} {'XGBoost':<15}") logger.info("-" * 55) neural_mae = (neural['mae_high'] + neural['mae_low']) / 2 xgb_mae = (xgb['mae_high'] + xgb['mae_low']) / 2 logger.info(f"{'MAE (avg)':<25} {neural_mae:<15.4f} {xgb_mae:<15.4f}") neural_r2 = (neural['r2_high'] + neural['r2_low']) / 2 xgb_r2 = (xgb['r2_high'] + xgb['r2_low']) / 2 logger.info(f"{'R2 (avg)':<25} {neural_r2:<15.4f} {xgb_r2:<15.4f}") logger.info(f"{'Alpha HIGH mean':<25} {neural['alpha_high_mean']:<15.3f} {'N/A':<15}") logger.info(f"{'Alpha LOW mean':<25} {neural['alpha_low_mean']:<15.3f} {'N/A':<15}") logger.info(f"\n{'='*60}") logger.info("TRAINING COMPLETE") logger.info(f"Model saved to: {model_path}") logger.info(f"{'='*60}") if __name__ == '__main__': main()