""" Tests for SymbolTimeframeTrainer ================================ Tests for the improved symbol-timeframe trainer with: - ATR-normalized targets - Reduced sample weighting aggressiveness - Optimized XGBoost hyperparameters Author: Trading Platform Team Version: 1.0.0 Created: 2026-01-07 """ import pytest import numpy as np import pandas as pd from datetime import datetime, timedelta from pathlib import Path import tempfile import shutil # Import the module under test import sys sys.path.insert(0, str(Path(__file__).parent.parent / 'src')) from training.symbol_timeframe_trainer import ( SymbolTimeframeTrainer, TrainerConfig, SymbolConfig, ModelKey, TrainingResult, SYMBOL_CONFIGS ) class TestATRComputation: """Tests for ATR computation with shift(1)""" def create_sample_df(self, n: int = 100) -> pd.DataFrame: """Create sample OHLCV data for testing""" np.random.seed(42) dates = pd.date_range('2025-01-01', periods=n, freq='15min') price = 2000 + np.cumsum(np.random.randn(n) * 2) df = pd.DataFrame({ 'open': price, 'high': price + np.abs(np.random.randn(n)) * 5, 'low': price - np.abs(np.random.randn(n)) * 5, 'close': price + np.random.randn(n) * 0.5, 'volume': np.random.randint(100, 1000, n) }, index=dates) return df def test_atr_computation_returns_correct_shape(self): """ATR should return same length as input""" config = TrainerConfig(symbols=['XAUUSD'], timeframes=['15m']) trainer = SymbolTimeframeTrainer(config) df = self.create_sample_df(100) atr = trainer._compute_atr(df, period=14) assert len(atr) == len(df) def test_atr_has_shift_one(self): """ATR should have NaN at first position due to shift(1)""" config = TrainerConfig(symbols=['XAUUSD'], timeframes=['15m']) trainer = SymbolTimeframeTrainer(config) df = self.create_sample_df(100) atr = trainer._compute_atr(df, period=14) # First few values should be NaN due to rolling + shift assert np.isnan(atr[0]) # After warmup period, values should be valid assert not np.isnan(atr[20]) def test_atr_no_future_leakage(self): """ATR at time t should not use data from time t+1""" config = TrainerConfig(symbols=['XAUUSD'], timeframes=['15m']) trainer = SymbolTimeframeTrainer(config) # Create data where last row has extreme values df = self.create_sample_df(50) atr_before = trainer._compute_atr(df, period=14) # Add extreme row at the end df_extended = df.copy() new_row = pd.DataFrame({ 'open': [df['close'].iloc[-1]], 'high': [df['close'].iloc[-1] + 1000], # Extreme high 'low': [df['close'].iloc[-1] - 1000], # Extreme low 'close': [df['close'].iloc[-1]], 'volume': [500] }, index=[df.index[-1] + timedelta(minutes=15)]) df_extended = pd.concat([df_extended, new_row]) atr_after = trainer._compute_atr(df_extended, period=14) # ATR at position 49 should be the same in both cases # because shift(1) means we don't use row 50's data assert atr_before[49] == atr_after[49] def test_atr_values_are_positive(self): """ATR should always be non-negative""" config = TrainerConfig(symbols=['XAUUSD'], timeframes=['15m']) trainer = SymbolTimeframeTrainer(config) df = self.create_sample_df(100) atr = trainer._compute_atr(df, period=14) valid_atr = atr[~np.isnan(atr)] assert np.all(valid_atr >= 0) class TestTargetNormalization: """Tests for target normalization by ATR""" def create_sample_df(self, n: int = 100) -> pd.DataFrame: """Create sample OHLCV data""" np.random.seed(42) dates = pd.date_range('2025-01-01', periods=n, freq='15min') price = 2000 + np.cumsum(np.random.randn(n) * 2) df = pd.DataFrame({ 'open': price, 'high': price + np.abs(np.random.randn(n)) * 5, 'low': price - np.abs(np.random.randn(n)) * 5, 'close': price + np.random.randn(n) * 0.5, 'volume': np.random.randint(100, 1000, n) }, index=dates) return df def test_normalized_targets_scale(self): """Normalized targets should be in reasonable scale (ATR multiples)""" config = TrainerConfig(symbols=['XAUUSD'], timeframes=['15m']) trainer = SymbolTimeframeTrainer(config) df = self.create_sample_df(100) target_high, target_low, atr = trainer._compute_targets(df, horizon_bars=3, normalize=True) # Remove NaN values valid_high = target_high[~np.isnan(target_high)] valid_low = target_low[~np.isnan(target_low)] # Normalized targets should be clipped to [-5, 5] assert np.all(valid_high >= -5) assert np.all(valid_high <= 5) assert np.all(valid_low >= -5) assert np.all(valid_low <= 5) def test_raw_targets_different_from_normalized(self): """Raw and normalized targets should be different""" config = TrainerConfig(symbols=['XAUUSD'], timeframes=['15m']) trainer = SymbolTimeframeTrainer(config) df = self.create_sample_df(100) target_high_norm, target_low_norm, _ = trainer._compute_targets(df, horizon_bars=3, normalize=True) target_high_raw, target_low_raw, _ = trainer._compute_targets(df, horizon_bars=3, normalize=False) # They should not be equal (except for edge cases) valid_mask = ~(np.isnan(target_high_norm) | np.isnan(target_high_raw)) if valid_mask.sum() > 0: assert not np.allclose(target_high_norm[valid_mask], target_high_raw[valid_mask]) def test_targets_have_correct_sign(self): """target_high should be >= 0, target_low should be >= 0""" config = TrainerConfig(symbols=['XAUUSD'], timeframes=['15m']) trainer = SymbolTimeframeTrainer(config) df = self.create_sample_df(100) # Use raw targets to check sign (before normalization) target_high, target_low, _ = trainer._compute_targets(df, horizon_bars=3, normalize=False) valid_high = target_high[~np.isnan(target_high)] valid_low = target_low[~np.isnan(target_low)] # High should be positive (future_high > close) assert np.mean(valid_high >= 0) > 0.9 # Most should be positive # Low should be positive (close > future_low) assert np.mean(valid_low >= 0) > 0.9 class TestSampleWeighting: """Tests for sample weighting configuration""" def test_default_weighting_parameters(self): """Default parameters should be the optimized values""" config = TrainerConfig() # Check new default values assert config.softplus_beta == 2.0, "softplus_beta should be 2.0 (reduced from 4.0)" assert config.softplus_w_max == 2.0, "softplus_w_max should be 2.0 (reduced from 3.0)" def test_weighting_can_be_disabled(self): """Sample weighting should be disableable""" config = TrainerConfig(use_dynamic_factor_weighting=False) trainer = SymbolTimeframeTrainer(config) # Create sample data np.random.seed(42) n = 100 dates = pd.date_range('2025-01-01', periods=n, freq='15min') price = 2000 + np.cumsum(np.random.randn(n) * 2) df = pd.DataFrame({ 'open': price, 'high': price + np.abs(np.random.randn(n)) * 5, 'low': price - np.abs(np.random.randn(n)) * 5, 'close': price + np.random.randn(n) * 0.5, 'volume': np.random.randint(100, 1000, n) }, index=dates) target_high = np.random.randn(n) target_low = np.random.randn(n) weights = trainer._compute_sample_weights(df, target_high, target_low) # When disabled, all weights should be 1.0 assert np.allclose(weights, 1.0) class TestXGBoostHyperparameters: """Tests for XGBoost hyperparameter configuration""" def test_default_hyperparameters_are_optimized(self): """Default hyperparameters should be the optimized values""" config = TrainerConfig() params = config.xgb_params # Check optimized values assert params['n_estimators'] == 150, "n_estimators should be 150" assert params['max_depth'] == 4, "max_depth should be 4" assert params['learning_rate'] == 0.02, "learning_rate should be 0.02" assert params['min_child_weight'] == 20, "min_child_weight should be 20" assert params['gamma'] == 0.3, "gamma should be 0.3" assert params['reg_alpha'] == 0.5, "reg_alpha should be 0.5" assert params['reg_lambda'] == 5.0, "reg_lambda should be 5.0" def test_regularization_is_stronger(self): """New config should have stronger regularization""" config = TrainerConfig() params = config.xgb_params # These should be higher than before for more regularization assert params['reg_alpha'] >= 0.5, "L1 regularization should be >= 0.5" assert params['reg_lambda'] >= 5.0, "L2 regularization should be >= 5.0" assert params['gamma'] >= 0.3, "gamma should be >= 0.3" class TestModelKey: """Tests for ModelKey dataclass""" def test_model_key_string_representation(self): """ModelKey should have correct string format""" key = ModelKey(symbol='XAUUSD', timeframe='15m', target_type='high', horizon_bars=3) assert str(key) == 'XAUUSD_15m_high_h3' def test_model_key_path_representation(self): """ModelKey should have correct path format""" key = ModelKey(symbol='XAUUSD', timeframe='15m', target_type='high', horizon_bars=3) assert key.to_path() == 'XAUUSD/15m/high_h3' class TestSymbolConfigs: """Tests for symbol configurations""" def test_common_symbols_configured(self): """Common trading symbols should be configured""" expected_symbols = ['XAUUSD', 'BTCUSD', 'EURUSD', 'GBPUSD', 'USDJPY'] for symbol in expected_symbols: assert symbol in SYMBOL_CONFIGS, f"{symbol} should be in SYMBOL_CONFIGS" def test_symbol_config_has_required_fields(self): """Each symbol config should have required fields""" for symbol, config in SYMBOL_CONFIGS.items(): assert hasattr(config, 'symbol'), f"{symbol} config should have 'symbol'" assert hasattr(config, 'base_factor'), f"{symbol} config should have 'base_factor'" assert hasattr(config, 'pip_value'), f"{symbol} config should have 'pip_value'" class TestTrainerIntegration: """Integration tests for the trainer""" def create_training_data(self, n: int = 1000) -> pd.DataFrame: """Create sample training data""" np.random.seed(42) # Generate 2 years of 15m data dates = pd.date_range('2023-01-01', periods=n, freq='15min') price = 2000 + np.cumsum(np.random.randn(n) * 0.5) # Add varying volatility volatility = np.where( (dates.hour >= 13) & (dates.hour < 16), 5.0, 2.0 ) df = pd.DataFrame({ 'open': price, 'high': price + np.abs(np.random.randn(n)) * volatility, 'low': price - np.abs(np.random.randn(n)) * volatility, 'close': price + np.random.randn(n) * 0.5, 'volume': np.random.randint(100, 1000, n), # Features 'rsi': 50 + np.random.randn(n) * 10, 'macd': np.random.randn(n), 'bb_width': 10 + np.random.randn(n) }, index=dates) return df def test_trainer_initialization(self): """Trainer should initialize correctly""" config = TrainerConfig( symbols=['XAUUSD'], timeframes=['15m'], min_train_samples=100 ) trainer = SymbolTimeframeTrainer(config) assert trainer.config == config assert 'XAUUSD' in trainer.symbol_configs def test_trainer_can_train_single(self): """Trainer should be able to train on single symbol/timeframe""" config = TrainerConfig( symbols=['XAUUSD'], timeframes=['15m'], train_years=1.5, # Increased to cover more data holdout_years=0.3, # Smaller holdout min_train_samples=100, xgb_params={ 'n_estimators': 10, # Small for fast test 'max_depth': 3, 'learning_rate': 0.1, 'tree_method': 'hist', 'random_state': 42 } ) trainer = SymbolTimeframeTrainer(config) # Create more data to ensure enough for training and holdout df = self.create_training_data(50000) # ~1 year of 15m data results = trainer.train_single(df, 'XAUUSD', '15m') # Should have results for high and low assert len(results) == 2 for key, result in results.items(): assert isinstance(result, TrainingResult) assert result.n_train > 0 assert result.n_val > 0 def test_trainer_save_and_load(self): """Trainer should be able to save and load models""" config = TrainerConfig( symbols=['XAUUSD'], timeframes=['15m'], train_years=0.5, holdout_years=0.1, min_train_samples=100, xgb_params={ 'n_estimators': 10, 'max_depth': 3, 'learning_rate': 0.1, 'tree_method': 'hist', 'random_state': 42 } ) trainer = SymbolTimeframeTrainer(config) df = self.create_training_data(1000) # Train trainer.train_single(df, 'XAUUSD', '15m') # Save to temp directory with tempfile.TemporaryDirectory() as tmpdir: trainer.save(tmpdir) # Load into new trainer new_trainer = SymbolTimeframeTrainer(config) new_trainer.load(tmpdir) # Check models are loaded assert len(new_trainer.models) == len(trainer.models) if __name__ == '__main__': pytest.main([__file__, '-v'])