# ML_INVENTORY.yml - Inventario de Componentes ML Engine # Trading Platform Trading Platform # Ultima actualizacion: 2026-01-25 metadata: version: "2.2.0" last_updated: "2026-01-25" epic: "OQI-006" description: "Inventario de modelos, features y servicios del ML Engine" changelog: - version: "2.2.0" date: "2026-01-25" changes: - "Database migration: MySQL deprecated, now using PostgreSQL exclusively" - "Added src/data/database.py module for PostgreSQL access" - "Created .env with PostgreSQL credentials" - "Updated config/database.yaml to mark MySQL as deprecated" - "Data loaded from Polygon API into local PostgreSQL (6 tickers, 365 days)" - version: "2.1.0" date: "2026-01-07" changes: - "Added models ML-008 to ML-018 (previously undocumented)" - "Added SVC-ML-005 HierarchicalPredictorService" - "Alignment validation completed" - version: "2.0.0" date: "2026-01-07" changes: - "Added AttentionScoreModel (ML-005)" - "Added SymbolTimeframeModel with attention (ML-006)" - "Added AssetMetamodel (ML-007 - planned)" - "Added attention features (FA-001 to FA-009)" - version: "1.0.0" date: "2025-12-05" changes: - "Initial inventory creation" # ============================================ # MODELOS DE MACHINE LEARNING # ============================================ models: - id: "ML-001" name: "PricePredictor" description: "Modelo de predicción de dirección de precio" type: "classification" framework: "PyTorch" input_features: 45 output: "bullish/bearish/neutral" confidence_range: "0.0-1.0" horizons: - "1h" - "4h" - "1d" symbols_supported: - "stocks_us" - "crypto_major" training_frequency: "weekly" accuracy_target: "65%" related_rf: "RF-ML-001" status: "planned" - id: "ML-002" name: "TrendDetector" description: "Detector de tendencias y cambios de tendencia" type: "classification" framework: "PyTorch" input_features: 30 output: "uptrend/downtrend/ranging" horizons: - "4h" - "1d" - "1w" related_rf: "RF-ML-002" status: "planned" - id: "ML-003" name: "VolatilityPredictor" description: "Predictor de volatilidad futura" type: "regression" framework: "PyTorch" input_features: 25 output: "volatility_percent" related_rf: "RF-ML-003" status: "planned" - id: "ML-004" name: "SentimentAnalyzer" description: "Análisis de sentimiento de noticias" type: "classification" framework: "Transformers" model_base: "FinBERT" output: "positive/negative/neutral" related_rf: "RF-ML-004" status: "planned" - id: "ML-005" name: "AttentionScoreModel" description: "Modelo de atencion que aprende CUANDO prestar atencion al mercado (Nivel 0 de arquitectura jerarquica)" type: "dual (regression + classification)" framework: "XGBoost" input_features: 9 features: - "volume_ratio" - "volume_z" - "ATR" - "ATR_ratio" - "CMF" - "MFI" - "OBV_delta" - "BB_width" - "displacement" output: regression: "attention_score (0-3)" classification: "flow_class (0=low, 1=medium, 2=high)" target: "move_multiplier = future_range / rolling_median(range)" symbols_supported: - "XAUUSD" - "EURUSD" - "BTCUSD" - "GBPUSD" - "USDJPY" timeframes: - "5m" - "15m" training_frequency: "weekly" metrics: r2_regression: "0.12-0.22" classification_accuracy: "54-61%" related_et: "ET-ML-007" files: model: "src/models/attention_score_model.py" trainer: "src/training/attention_trainer.py" script: "scripts/train_attention_model.py" status: "implemented" implementation_date: "2026-01-06" - id: "ML-006" name: "SymbolTimeframeModel" description: "Modelo base de prediccion de rango con attention features (Nivel 1 de arquitectura jerarquica)" type: "regression" framework: "XGBoost" input_features: 52 features_breakdown: base_features: 50 attention_features: 2 attention_features: - "attention_score" - "attention_class" output: - "delta_high (multiplos de factor)" - "delta_low (multiplos de factor)" symbols_supported: - "XAUUSD" - "EURUSD" - "BTCUSD" - "GBPUSD" - "USDJPY" timeframes: - "5m" - "15m" training_frequency: "weekly" uses_attention: true related_et: "ET-ML-007" files: trainer: "src/training/symbol_timeframe_trainer.py" script: "scripts/train_symbol_timeframe_models.py" status: "implemented" implementation_date: "2026-01-06" - id: "ML-007" name: "AssetMetamodel" description: "Metamodelo por activo que sintetiza predicciones de 5m y 15m (Nivel 2 de arquitectura jerarquica)" type: "dual (regression + classification)" framework: "XGBoost" input_features: 10 features: predictions: - "pred_high_5m" - "pred_low_5m" - "pred_high_15m" - "pred_low_15m" attention: - "attention_5m" - "attention_15m" - "attention_class_5m" - "attention_class_15m" context: - "ATR_ratio" - "volume_z" output: - "delta_high_final" - "delta_low_final" - "confidence (binary + probability)" symbols_trained: - "XAUUSD" - "EURUSD" - "GBPUSD" - "USDJPY" - "BTCUSD" symbols_pending: [] training_frequency: "weekly" uses_oos_predictions: true oos_period: "2024-06-01 to 2025-12-31" metrics: XAUUSD: samples: 18749 mae_high: 2.0818 mae_low: 2.2241 r2_high: 0.0674 r2_low: 0.1150 confidence_accuracy: "90.01%" improvement_vs_avg: "+1.9%" EURUSD: samples: 19505 mae_high: 0.0005 mae_low: 0.0004 r2_high: -0.0417 r2_low: -0.0043 confidence_accuracy: "86.26%" improvement_vs_avg: "+3.0%" GBPUSD: samples: 17412 confidence_accuracy: "93.0%" status: "trained" USDJPY: samples: 16547 confidence_accuracy: "93.6%" status: "trained" BTCUSD: samples: 23233 mae_high: 150.58 mae_low: 175.84 r2_high: 0.163 r2_low: 0.035 confidence_accuracy: "87.3%" improvement_vs_avg: "+5.3%" status: "trained" backtest: strategy: "aggressive_filter" win_rate: "46.8%" expectancy: "+0.0700" profit_factor: 1.17 related_et: "ET-ML-007" files: model: "src/models/asset_metamodel.py" trainer: "src/training/metamodel_trainer.py" script: "scripts/train_metamodels.py" saved_models: - "models/metamodels/XAUUSD/" - "models/metamodels/EURUSD/" - "models/metamodels/GBPUSD/" - "models/metamodels/USDJPY/" - "models/metamodels/BTCUSD/" status: "implemented" implementation_date: "2026-01-07" - id: "ML-008" name: "RangePredictor" description: "Legacy range prediction model" type: "regression" framework: "XGBoost" file: "src/models/range_predictor.py" status: "implemented" - id: "ML-009" name: "RangePredictorV2" description: "Multi-timeframe range prediction model" type: "regression" framework: "XGBoost" file: "src/models/range_predictor_v2.py" status: "implemented" - id: "ML-010" name: "RangePredictorFactor" description: "Factor-based range prediction model" type: "regression" framework: "XGBoost" file: "src/models/range_predictor_factor.py" status: "implemented" - id: "ML-011" name: "EnhancedRangePredictor" description: "Enhanced range predictor with context" type: "regression" framework: "XGBoost" file: "src/models/enhanced_range_predictor.py" status: "implemented" - id: "ML-012" name: "AMDDetectorML" description: "AMD phases ML detector" type: "classification" framework: "XGBoost" file: "src/models/amd_detector_ml.py" status: "implemented" - id: "ML-013" name: "ICTSMCDetector" description: "ICT/SMC patterns detector" type: "classification" framework: "XGBoost" file: "src/models/ict_smc_detector.py" status: "implemented" - id: "ML-014" name: "MovementMagnitudePredictor" description: "Movement USD prediction model" type: "regression" framework: "XGBoost" file: "src/models/movement_magnitude_predictor.py" status: "implemented" - id: "ML-015" name: "TPSLClassifier" description: "TP/SL probability classifier" type: "classification" framework: "XGBoost" file: "src/models/tp_sl_classifier.py" status: "implemented" - id: "ML-016" name: "SignalGenerator" description: "Trading signals generator" type: "classification" framework: "XGBoost" file: "src/models/signal_generator.py" status: "implemented" - id: "ML-017" name: "DualHorizonEnsemble" description: "Multi-horizon ensemble model" type: "ensemble" framework: "XGBoost" file: "src/models/dual_horizon_ensemble.py" status: "implemented" - id: "ML-018" name: "NeuralGatingMetamodel" description: "Neural gating metamodel" type: "ensemble" framework: "PyTorch" file: "src/models/neural_gating_metamodel.py" status: "implemented" # ============================================ # FEATURES ENGINEERING # ============================================ features: technical: - id: "FT-001" name: "rsi_14" description: "Relative Strength Index 14 períodos" type: "float" range: "0-100" - id: "FT-002" name: "macd_signal" description: "MACD Signal Line" type: "float" - id: "FT-003" name: "macd_histogram" description: "MACD Histogram" type: "float" - id: "FT-004" name: "bb_position" description: "Posición relativa en Bollinger Bands" type: "float" range: "0-1" - id: "FT-005" name: "sma_20_50_cross" description: "Cruce SMA 20/50" type: "int" values: "-1/0/1" - id: "FT-006" name: "atr_14" description: "Average True Range 14 períodos" type: "float" - id: "FT-007" name: "volume_ratio" description: "Ratio volumen actual vs promedio" type: "float" - id: "FT-008" name: "price_momentum" description: "Momentum de precio (ROC)" type: "float" market_structure: - id: "FM-001" name: "support_distance" description: "Distancia al soporte más cercano" type: "float" - id: "FM-002" name: "resistance_distance" description: "Distancia a la resistencia más cercana" type: "float" - id: "FM-003" name: "trend_strength" description: "Fuerza de la tendencia (ADX)" type: "float" range: "0-100" sentiment: - id: "FS-001" name: "news_sentiment" description: "Sentimiento de noticias recientes" type: "float" range: "-1 to 1" - id: "FS-002" name: "social_sentiment" description: "Sentimiento de redes sociales" type: "float" range: "-1 to 1" - id: "FS-003" name: "fear_greed_index" description: "Indice de miedo y codicia (crypto)" type: "int" range: "0-100" attention: - id: "FA-001" name: "volume_ratio" description: "Ratio de volumen actual vs mediana movil" type: "float" calculation: "volume / rolling_median(volume, 20)" used_by: ["ML-005"] - id: "FA-002" name: "volume_z" description: "Z-score del volumen" type: "float" calculation: "(volume - rolling_mean) / rolling_std" window: 20 used_by: ["ML-005"] - id: "FA-003" name: "ATR_ratio" description: "Ratio de ATR vs mediana movil - FEATURE MAS IMPORTANTE" type: "float" calculation: "ATR / rolling_median(ATR, 50)" importance: "34-50%" used_by: ["ML-005"] - id: "FA-004" name: "CMF" description: "Chaikin Money Flow - flujo de dinero" type: "float" range: "-1 to 1" used_by: ["ML-005"] - id: "FA-005" name: "MFI" description: "Money Flow Index" type: "float" range: "0-100" used_by: ["ML-005"] - id: "FA-006" name: "OBV_delta" description: "Cambio en On-Balance Volume normalizado" type: "float" calculation: "diff(OBV) / rolling_std(OBV, 20)" used_by: ["ML-005"] - id: "FA-007" name: "BB_width" description: "Ancho de Bollinger Bands normalizado" type: "float" calculation: "(BB_upper - BB_lower) / close" used_by: ["ML-005"] - id: "FA-008" name: "displacement" description: "Desplazamiento de precio normalizado por ATR" type: "float" calculation: "(close - open) / ATR" used_by: ["ML-005"] - id: "FA-009" name: "attention_score" description: "Score de atencion generado por modelo ML-005" type: "float" range: "0-3" output_of: "ML-005" used_by: ["ML-006", "ML-007"] - id: "FA-010" name: "attention_class" description: "Clasificacion de flujo generada por modelo ML-005" type: "int" values: "0=low_flow, 1=medium_flow, 2=high_flow" output_of: "ML-005" used_by: ["ML-006", "ML-007"] # ============================================ # SERVICIOS ML # ============================================ services: - id: "SVC-ML-001" name: "MLPredictionService" description: "Servicio principal de predicciones" framework: "FastAPI" endpoints: - path: "/predict/{symbol}" method: "GET" description: "Obtener predicción para símbolo" - path: "/predict/batch" method: "POST" description: "Predicciones en batch" related_et: "ET-ML-001" - id: "SVC-ML-002" name: "FeatureEngineering" description: "Cálculo y cache de features" framework: "Python" dependencies: - "pandas" - "numpy" - "ta-lib" related_et: "ET-ML-002" - id: "SVC-ML-003" name: "ModelTrainer" description: "Entrenamiento y actualización de modelos" framework: "PyTorch" schedule: "weekly" related_et: "ET-ML-003" - id: "SVC-ML-004" name: "ModelRegistry" description: "Registro y versionado de modelos" framework: "MLflow" storage: "S3" related_et: "ET-ML-004" - id: "SVC-ML-005" name: "HierarchicalPredictorService" description: "Servicio de predicción jerárquica de 3 niveles" framework: "Python" file: "src/services/hierarchical_predictor.py" related_et: "ET-ML-007" # ============================================ # PIPELINES # ============================================ pipelines: - id: "PIP-001" name: "RealTimePrediction" description: "Pipeline de predicción en tiempo real" steps: - "fetch_market_data" - "calculate_features" - "normalize_features" - "run_inference" - "post_process" - "cache_result" latency_target: "< 500ms" - id: "PIP-002" name: "DailyRetrain" description: "Pipeline de reentrenamiento diario" steps: - "fetch_training_data" - "feature_engineering" - "train_model" - "evaluate_model" - "register_if_improved" schedule: "daily" - id: "PIP-003" name: "BatchPrediction" description: "Pipeline de predicción en batch" steps: - "fetch_symbols_list" - "parallel_feature_calc" - "batch_inference" - "store_results" schedule: "every_4h" # ============================================ # CONFIGURACIÓN # ============================================ config: inference: cache_ttl: 60 # segundos batch_size: 100 timeout: 5000 # ms training: train_test_split: 0.8 validation_split: 0.1 epochs: 100 early_stopping_patience: 10 features: lookback_periods: short: 20 medium: 50 long: 200 normalization: "min_max" # ============================================ # MÉTRICAS Y MONITOREO # ============================================ metrics: model_performance: - name: "accuracy" target: "> 0.65" - name: "precision" target: "> 0.60" - name: "recall" target: "> 0.60" - name: "f1_score" target: "> 0.60" service_health: - name: "latency_p99" target: "< 1000ms" - name: "availability" target: "> 99.5%" - name: "error_rate" target: "< 1%" # ============================================ # INTEGRACIÓN CON TRADINGAGENT # ============================================ tradingagent_integration: description: "Migración del ML Engine existente de TradingAgent" source_repo: "tradingagent" components_to_migrate: - name: "PredictionEngine" source: "tradingagent/ml/prediction_engine.py" target: "apps/ml-engine/src/prediction/" status: "planned" - name: "FeatureCalculator" source: "tradingagent/ml/features.py" target: "apps/ml-engine/src/features/" status: "planned" - name: "ModelLoader" source: "tradingagent/ml/model_loader.py" target: "apps/ml-engine/src/models/" status: "planned" api_compatibility: description: "Mantener compatibilidad con API existente" endpoints_to_preserve: - "/api/v1/predict" - "/api/v1/signals" - "/api/v1/features" # ============================================ # NOTAS DE COMPATIBILIDAD DE FEATURES # ============================================ feature_compatibility: description: "Documentación de compatibilidad entre modelos con diferentes números de features" last_updated: "2026-01-07" models_feature_count: GBPUSD: feature_count: 50 uses_attention: false note: "Entrenado con use_attention_features=False" status: "trained" training_date: "2026-01-07" EURUSD: feature_count: 52 uses_attention: true note: "Entrenado con attention_score y attention_class" status: "trained" training_date: "2026-01-06" XAUUSD: feature_count: 52 uses_attention: true note: "Entrenado con attention_score y attention_class" status: "trained" training_date: "2026-01-06" USDJPY: feature_count: 50 uses_attention: false note: "Attention models trained, base models without attention features" status: "trained" training_date: "2026-01-07" backtest_results: period: "2024-09-01 to 2024-12-31" win_rate: "39.2%" expectancy: "-0.0544" confidence_accuracy: "93.6%" BTCUSD: feature_count: 50 uses_attention: false note: "ACTUALIZADO - Datos de Polygon API (2024-2025)" status: "trained" training_date: "2026-01-07" data_source: provider: "Polygon.io API" available_range: "2015-03-22 to 2025-12-31" new_data_range: "2024-01-07 to 2025-12-31" new_records: 215699 total_records: 367500 model_metrics: attention_5m: r2: 0.223 accuracy: "62.3%" attention_15m: r2: 0.169 accuracy: "59.9%" metamodel: confidence_accuracy: "87.3%" improvement_over_avg: "5.3%" backtest_results: period: "2025-09-01 to 2025-12-31" best_strategy: "aggressive_filter" trades: 2524 win_rate: "46.8%" expectancy: "+0.0700" profit_factor: 1.17 total_profit_r: "+176.71" status: "PROFITABLE" pipeline_handling: description: "El pipeline maneja automáticamente la diferencia de features" mechanism: "_prepare_features_for_base_model() excluye attention_score y attention_class" files: - "src/pipelines/hierarchical_pipeline.py:402-408" - "src/training/metamodel_trainer.py:343-349" known_issues_resolved: - id: "FIX-001" date: "2026-01-07" issue: "Feature shape mismatch, expected: 50, got 52" cause: "Caché de Python contenía código sin el fix de exclusión" resolution: "Limpieza de __pycache__ y *.pyc" status: "RESOLVED" # ============================================ # REFERENCIAS # ============================================ references: requirements: - "docs/02-definicion-modulos/OQI-006-ml-signals/requerimientos/" specifications: - "docs/02-definicion-modulos/OQI-006-ml-signals/especificaciones/" traceability: - "docs/02-definicion-modulos/OQI-006-ml-signals/implementacion/TRACEABILITY.yml" fix_documentation: - "docs/99-analisis/PLAN-IMPLEMENTACION-FASES.md#fase-8"