trading-platform/docs/90-transversal/inventarios/ML_INVENTORY.yml

# ML_INVENTORY.yml - Inventario de Componentes ML Engine
# Trading Platform Trading Platform
# Ultima actualizacion: 2026-01-25

metadata:
  version: "2.2.0"
  last_updated: "2026-01-25"
  epic: "OQI-006"
  description: "Inventario de modelos, features y servicios del ML Engine"
  changelog:
    - version: "2.2.0"
      date: "2026-01-25"
      changes:
        - "Database migration: MySQL deprecated, now using PostgreSQL exclusively"
        - "Added src/data/database.py module for PostgreSQL access"
        - "Created .env with PostgreSQL credentials"
        - "Updated config/database.yaml to mark MySQL as deprecated"
        - "Data loaded from Polygon API into local PostgreSQL (6 tickers, 365 days)"
    - version: "2.1.0"
      date: "2026-01-07"
      changes:
        - "Added models ML-008 to ML-018 (previously undocumented)"
        - "Added SVC-ML-005 HierarchicalPredictorService"
        - "Alignment validation completed"
    - version: "2.0.0"
      date: "2026-01-07"
      changes:
        - "Added AttentionScoreModel (ML-005)"
        - "Added SymbolTimeframeModel with attention (ML-006)"
        - "Added AssetMetamodel (ML-007 - planned)"
        - "Added attention features (FA-001 to FA-009)"
    - version: "1.0.0"
      date: "2025-12-05"
      changes:
        - "Initial inventory creation"

# ============================================
# MODELOS DE MACHINE LEARNING
# ============================================
models:
  - id: "ML-001"
    name: "PricePredictor"
    description: "Modelo de predicción de dirección de precio"
    type: "classification"
    framework: "PyTorch"
    input_features: 45
    output: "bullish/bearish/neutral"
    confidence_range: "0.0-1.0"
    horizons:
      - "1h"
      - "4h"
      - "1d"
    symbols_supported:
      - "stocks_us"
      - "crypto_major"
    training_frequency: "weekly"
    accuracy_target: "65%"
    related_rf: "RF-ML-001"
    status: "planned"

  - id: "ML-002"
    name: "TrendDetector"
    description: "Detector de tendencias y cambios de tendencia"
    type: "classification"
    framework: "PyTorch"
    input_features: 30
    output: "uptrend/downtrend/ranging"
    horizons:
      - "4h"
      - "1d"
      - "1w"
    related_rf: "RF-ML-002"
    status: "planned"

  - id: "ML-003"
    name: "VolatilityPredictor"
    description: "Predictor de volatilidad futura"
    type: "regression"
    framework: "PyTorch"
    input_features: 25
    output: "volatility_percent"
    related_rf: "RF-ML-003"
    status: "planned"

  - id: "ML-004"
    name: "SentimentAnalyzer"
    description: "Análisis de sentimiento de noticias"
    type: "classification"
    framework: "Transformers"
    model_base: "FinBERT"
    output: "positive/negative/neutral"
    related_rf: "RF-ML-004"
    status: "planned"

  - id: "ML-005"
    name: "AttentionScoreModel"
    description: "Modelo de atencion que aprende CUANDO prestar atencion al mercado (Nivel 0 de arquitectura jerarquica)"
    type: "dual (regression + classification)"
    framework: "XGBoost"
    input_features: 9
    features:
      - "volume_ratio"
      - "volume_z"
      - "ATR"
      - "ATR_ratio"
      - "CMF"
      - "MFI"
      - "OBV_delta"
      - "BB_width"
      - "displacement"
    output:
      regression: "attention_score (0-3)"
      classification: "flow_class (0=low, 1=medium, 2=high)"
    target: "move_multiplier = future_range / rolling_median(range)"
    symbols_supported:
      - "XAUUSD"
      - "EURUSD"
      - "BTCUSD"
      - "GBPUSD"
      - "USDJPY"
    timeframes:
      - "5m"
      - "15m"
    training_frequency: "weekly"
    metrics:
      r2_regression: "0.12-0.22"
      classification_accuracy: "54-61%"
    related_et: "ET-ML-007"
    files:
      model: "src/models/attention_score_model.py"
      trainer: "src/training/attention_trainer.py"
      script: "scripts/train_attention_model.py"
    status: "implemented"
    implementation_date: "2026-01-06"

  - id: "ML-006"
    name: "SymbolTimeframeModel"
    description: "Modelo base de prediccion de rango con attention features (Nivel 1 de arquitectura jerarquica)"
    type: "regression"
    framework: "XGBoost"
    input_features: 52
    features_breakdown:
      base_features: 50
      attention_features: 2
    attention_features:
      - "attention_score"
      - "attention_class"
    output:
      - "delta_high (multiplos de factor)"
      - "delta_low (multiplos de factor)"
    symbols_supported:
      - "XAUUSD"
      - "EURUSD"
      - "BTCUSD"
      - "GBPUSD"
      - "USDJPY"
    timeframes:
      - "5m"
      - "15m"
    training_frequency: "weekly"
    uses_attention: true
    related_et: "ET-ML-007"
    files:
      trainer: "src/training/symbol_timeframe_trainer.py"
      script: "scripts/train_symbol_timeframe_models.py"
    status: "implemented"
    implementation_date: "2026-01-06"

  - id: "ML-007"
    name: "AssetMetamodel"
    description: "Metamodelo por activo que sintetiza predicciones de 5m y 15m (Nivel 2 de arquitectura jerarquica)"
    type: "dual (regression + classification)"
    framework: "XGBoost"
    input_features: 10
    features:
      predictions:
        - "pred_high_5m"
        - "pred_low_5m"
        - "pred_high_15m"
        - "pred_low_15m"
      attention:
        - "attention_5m"
        - "attention_15m"
        - "attention_class_5m"
        - "attention_class_15m"
      context:
        - "ATR_ratio"
        - "volume_z"
    output:
      - "delta_high_final"
      - "delta_low_final"
      - "confidence (binary + probability)"
    symbols_trained:
      - "XAUUSD"
      - "EURUSD"
      - "GBPUSD"
      - "USDJPY"
      - "BTCUSD"
    symbols_pending: []
    training_frequency: "weekly"
    uses_oos_predictions: true
    oos_period: "2024-06-01 to 2025-12-31"
    metrics:
      XAUUSD:
        samples: 18749
        mae_high: 2.0818
        mae_low: 2.2241
        r2_high: 0.0674
        r2_low: 0.1150
        confidence_accuracy: "90.01%"
        improvement_vs_avg: "+1.9%"
      EURUSD:
        samples: 19505
        mae_high: 0.0005
        mae_low: 0.0004
        r2_high: -0.0417
        r2_low: -0.0043
        confidence_accuracy: "86.26%"
        improvement_vs_avg: "+3.0%"
      GBPUSD:
        samples: 17412
        confidence_accuracy: "93.0%"
        status: "trained"
      USDJPY:
        samples: 16547
        confidence_accuracy: "93.6%"
        status: "trained"
      BTCUSD:
        samples: 23233
        mae_high: 150.58
        mae_low: 175.84
        r2_high: 0.163
        r2_low: 0.035
        confidence_accuracy: "87.3%"
        improvement_vs_avg: "+5.3%"
        status: "trained"
        backtest:
          strategy: "aggressive_filter"
          win_rate: "46.8%"
          expectancy: "+0.0700"
          profit_factor: 1.17
    related_et: "ET-ML-007"
    files:
      model: "src/models/asset_metamodel.py"
      trainer: "src/training/metamodel_trainer.py"
      script: "scripts/train_metamodels.py"
    saved_models:
      - "models/metamodels/XAUUSD/"
      - "models/metamodels/EURUSD/"
      - "models/metamodels/GBPUSD/"
      - "models/metamodels/USDJPY/"
      - "models/metamodels/BTCUSD/"
    status: "implemented"
    implementation_date: "2026-01-07"

  - id: "ML-008"
    name: "RangePredictor"
    description: "Legacy range prediction model"
    type: "regression"
    framework: "XGBoost"
    file: "src/models/range_predictor.py"
    status: "implemented"

  - id: "ML-009"
    name: "RangePredictorV2"
    description: "Multi-timeframe range prediction model"
    type: "regression"
    framework: "XGBoost"
    file: "src/models/range_predictor_v2.py"
    status: "implemented"

  - id: "ML-010"
    name: "RangePredictorFactor"
    description: "Factor-based range prediction model"
    type: "regression"
    framework: "XGBoost"
    file: "src/models/range_predictor_factor.py"
    status: "implemented"

  - id: "ML-011"
    name: "EnhancedRangePredictor"
    description: "Enhanced range predictor with context"
    type: "regression"
    framework: "XGBoost"
    file: "src/models/enhanced_range_predictor.py"
    status: "implemented"

  - id: "ML-012"
    name: "AMDDetectorML"
    description: "AMD phases ML detector"
    type: "classification"
    framework: "XGBoost"
    file: "src/models/amd_detector_ml.py"
    status: "implemented"

  - id: "ML-013"
    name: "ICTSMCDetector"
    description: "ICT/SMC patterns detector"
    type: "classification"
    framework: "XGBoost"
    file: "src/models/ict_smc_detector.py"
    status: "implemented"

  - id: "ML-014"
    name: "MovementMagnitudePredictor"
    description: "Movement USD prediction model"
    type: "regression"
    framework: "XGBoost"
    file: "src/models/movement_magnitude_predictor.py"
    status: "implemented"

  - id: "ML-015"
    name: "TPSLClassifier"
    description: "TP/SL probability classifier"
    type: "classification"
    framework: "XGBoost"
    file: "src/models/tp_sl_classifier.py"
    status: "implemented"

  - id: "ML-016"
    name: "SignalGenerator"
    description: "Trading signals generator"
    type: "classification"
    framework: "XGBoost"
    file: "src/models/signal_generator.py"
    status: "implemented"

  - id: "ML-017"
    name: "DualHorizonEnsemble"
    description: "Multi-horizon ensemble model"
    type: "ensemble"
    framework: "XGBoost"
    file: "src/models/dual_horizon_ensemble.py"
    status: "implemented"

  - id: "ML-018"
    name: "NeuralGatingMetamodel"
    description: "Neural gating metamodel"
    type: "ensemble"
    framework: "PyTorch"
    file: "src/models/neural_gating_metamodel.py"
    status: "implemented"

# ============================================
# FEATURES ENGINEERING
# ============================================
features:
  technical:
    - id: "FT-001"
      name: "rsi_14"
      description: "Relative Strength Index 14 períodos"
      type: "float"
      range: "0-100"

    - id: "FT-002"
      name: "macd_signal"
      description: "MACD Signal Line"
      type: "float"

    - id: "FT-003"
      name: "macd_histogram"
      description: "MACD Histogram"
      type: "float"

    - id: "FT-004"
      name: "bb_position"
      description: "Posición relativa en Bollinger Bands"
      type: "float"
      range: "0-1"

    - id: "FT-005"
      name: "sma_20_50_cross"
      description: "Cruce SMA 20/50"
      type: "int"
      values: "-1/0/1"

    - id: "FT-006"
      name: "atr_14"
      description: "Average True Range 14 períodos"
      type: "float"

    - id: "FT-007"
      name: "volume_ratio"
      description: "Ratio volumen actual vs promedio"
      type: "float"

    - id: "FT-008"
      name: "price_momentum"
      description: "Momentum de precio (ROC)"
      type: "float"

  market_structure:
    - id: "FM-001"
      name: "support_distance"
      description: "Distancia al soporte más cercano"
      type: "float"

    - id: "FM-002"
      name: "resistance_distance"
      description: "Distancia a la resistencia más cercana"
      type: "float"

    - id: "FM-003"
      name: "trend_strength"
      description: "Fuerza de la tendencia (ADX)"
      type: "float"
      range: "0-100"

  sentiment:
    - id: "FS-001"
      name: "news_sentiment"
      description: "Sentimiento de noticias recientes"
      type: "float"
      range: "-1 to 1"

    - id: "FS-002"
      name: "social_sentiment"
      description: "Sentimiento de redes sociales"
      type: "float"
      range: "-1 to 1"

    - id: "FS-003"
      name: "fear_greed_index"
      description: "Indice de miedo y codicia (crypto)"
      type: "int"
      range: "0-100"

  attention:
    - id: "FA-001"
      name: "volume_ratio"
      description: "Ratio de volumen actual vs mediana movil"
      type: "float"
      calculation: "volume / rolling_median(volume, 20)"
      used_by: ["ML-005"]

    - id: "FA-002"
      name: "volume_z"
      description: "Z-score del volumen"
      type: "float"
      calculation: "(volume - rolling_mean) / rolling_std"
      window: 20
      used_by: ["ML-005"]

    - id: "FA-003"
      name: "ATR_ratio"
      description: "Ratio de ATR vs mediana movil - FEATURE MAS IMPORTANTE"
      type: "float"
      calculation: "ATR / rolling_median(ATR, 50)"
      importance: "34-50%"
      used_by: ["ML-005"]

    - id: "FA-004"
      name: "CMF"
      description: "Chaikin Money Flow - flujo de dinero"
      type: "float"
      range: "-1 to 1"
      used_by: ["ML-005"]

    - id: "FA-005"
      name: "MFI"
      description: "Money Flow Index"
      type: "float"
      range: "0-100"
      used_by: ["ML-005"]

    - id: "FA-006"
      name: "OBV_delta"
      description: "Cambio en On-Balance Volume normalizado"
      type: "float"
      calculation: "diff(OBV) / rolling_std(OBV, 20)"
      used_by: ["ML-005"]

    - id: "FA-007"
      name: "BB_width"
      description: "Ancho de Bollinger Bands normalizado"
      type: "float"
      calculation: "(BB_upper - BB_lower) / close"
      used_by: ["ML-005"]

    - id: "FA-008"
      name: "displacement"
      description: "Desplazamiento de precio normalizado por ATR"
      type: "float"
      calculation: "(close - open) / ATR"
      used_by: ["ML-005"]

    - id: "FA-009"
      name: "attention_score"
      description: "Score de atencion generado por modelo ML-005"
      type: "float"
      range: "0-3"
      output_of: "ML-005"
      used_by: ["ML-006", "ML-007"]

    - id: "FA-010"
      name: "attention_class"
      description: "Clasificacion de flujo generada por modelo ML-005"
      type: "int"
      values: "0=low_flow, 1=medium_flow, 2=high_flow"
      output_of: "ML-005"
      used_by: ["ML-006", "ML-007"]

# ============================================
# SERVICIOS ML
# ============================================
services:
  - id: "SVC-ML-001"
    name: "MLPredictionService"
    description: "Servicio principal de predicciones"
    framework: "FastAPI"
    endpoints:
      - path: "/predict/{symbol}"
        method: "GET"
        description: "Obtener predicción para símbolo"
      - path: "/predict/batch"
        method: "POST"
        description: "Predicciones en batch"
    related_et: "ET-ML-001"

  - id: "SVC-ML-002"
    name: "FeatureEngineering"
    description: "Cálculo y cache de features"
    framework: "Python"
    dependencies:
      - "pandas"
      - "numpy"
      - "ta-lib"
    related_et: "ET-ML-002"

  - id: "SVC-ML-003"
    name: "ModelTrainer"
    description: "Entrenamiento y actualización de modelos"
    framework: "PyTorch"
    schedule: "weekly"
    related_et: "ET-ML-003"

  - id: "SVC-ML-004"
    name: "ModelRegistry"
    description: "Registro y versionado de modelos"
    framework: "MLflow"
    storage: "S3"
    related_et: "ET-ML-004"

  - id: "SVC-ML-005"
    name: "HierarchicalPredictorService"
    description: "Servicio de predicción jerárquica de 3 niveles"
    framework: "Python"
    file: "src/services/hierarchical_predictor.py"
    related_et: "ET-ML-007"

# ============================================
# PIPELINES
# ============================================
pipelines:
  - id: "PIP-001"
    name: "RealTimePrediction"
    description: "Pipeline de predicción en tiempo real"
    steps:
      - "fetch_market_data"
      - "calculate_features"
      - "normalize_features"
      - "run_inference"
      - "post_process"
      - "cache_result"
    latency_target: "< 500ms"

  - id: "PIP-002"
    name: "DailyRetrain"
    description: "Pipeline de reentrenamiento diario"
    steps:
      - "fetch_training_data"
      - "feature_engineering"
      - "train_model"
      - "evaluate_model"
      - "register_if_improved"
    schedule: "daily"

  - id: "PIP-003"
    name: "BatchPrediction"
    description: "Pipeline de predicción en batch"
    steps:
      - "fetch_symbols_list"
      - "parallel_feature_calc"
      - "batch_inference"
      - "store_results"
    schedule: "every_4h"

# ============================================
# CONFIGURACIÓN
# ============================================
config:
  inference:
    cache_ttl: 60  # segundos
    batch_size: 100
    timeout: 5000  # ms

  training:
    train_test_split: 0.8
    validation_split: 0.1
    epochs: 100
    early_stopping_patience: 10

  features:
    lookback_periods:
      short: 20
      medium: 50
      long: 200
    normalization: "min_max"

# ============================================
# MÉTRICAS Y MONITOREO
# ============================================
metrics:
  model_performance:
    - name: "accuracy"
      target: "> 0.65"
    - name: "precision"
      target: "> 0.60"
    - name: "recall"
      target: "> 0.60"
    - name: "f1_score"
      target: "> 0.60"

  service_health:
    - name: "latency_p99"
      target: "< 1000ms"
    - name: "availability"
      target: "> 99.5%"
    - name: "error_rate"
      target: "< 1%"

# ============================================
# INTEGRACIÓN CON TRADINGAGENT
# ============================================
tradingagent_integration:
  description: "Migración del ML Engine existente de TradingAgent"
  source_repo: "tradingagent"
  components_to_migrate:
    - name: "PredictionEngine"
      source: "tradingagent/ml/prediction_engine.py"
      target: "apps/ml-engine/src/prediction/"
      status: "planned"

    - name: "FeatureCalculator"
      source: "tradingagent/ml/features.py"
      target: "apps/ml-engine/src/features/"
      status: "planned"

    - name: "ModelLoader"
      source: "tradingagent/ml/model_loader.py"
      target: "apps/ml-engine/src/models/"
      status: "planned"

  api_compatibility:
    description: "Mantener compatibilidad con API existente"
    endpoints_to_preserve:
      - "/api/v1/predict"
      - "/api/v1/signals"
      - "/api/v1/features"

# ============================================
# NOTAS DE COMPATIBILIDAD DE FEATURES
# ============================================
feature_compatibility:
  description: "Documentación de compatibilidad entre modelos con diferentes números de features"
  last_updated: "2026-01-07"

  models_feature_count:
    GBPUSD:
      feature_count: 50
      uses_attention: false
      note: "Entrenado con use_attention_features=False"
      status: "trained"
      training_date: "2026-01-07"
    EURUSD:
      feature_count: 52
      uses_attention: true
      note: "Entrenado con attention_score y attention_class"
      status: "trained"
      training_date: "2026-01-06"
    XAUUSD:
      feature_count: 52
      uses_attention: true
      note: "Entrenado con attention_score y attention_class"
      status: "trained"
      training_date: "2026-01-06"
    USDJPY:
      feature_count: 50
      uses_attention: false
      note: "Attention models trained, base models without attention features"
      status: "trained"
      training_date: "2026-01-07"
      backtest_results:
        period: "2024-09-01 to 2024-12-31"
        win_rate: "39.2%"
        expectancy: "-0.0544"
        confidence_accuracy: "93.6%"
    BTCUSD:
      feature_count: 50
      uses_attention: false
      note: "ACTUALIZADO - Datos de Polygon API (2024-2025)"
      status: "trained"
      training_date: "2026-01-07"
      data_source:
        provider: "Polygon.io API"
        available_range: "2015-03-22 to 2025-12-31"
        new_data_range: "2024-01-07 to 2025-12-31"
        new_records: 215699
        total_records: 367500
      model_metrics:
        attention_5m:
          r2: 0.223
          accuracy: "62.3%"
        attention_15m:
          r2: 0.169
          accuracy: "59.9%"
        metamodel:
          confidence_accuracy: "87.3%"
          improvement_over_avg: "5.3%"
      backtest_results:
        period: "2025-09-01 to 2025-12-31"
        best_strategy: "aggressive_filter"
        trades: 2524
        win_rate: "46.8%"
        expectancy: "+0.0700"
        profit_factor: 1.17
        total_profit_r: "+176.71"
        status: "PROFITABLE"

  pipeline_handling:
    description: "El pipeline maneja automáticamente la diferencia de features"
    mechanism: "_prepare_features_for_base_model() excluye attention_score y attention_class"
    files:
      - "src/pipelines/hierarchical_pipeline.py:402-408"
      - "src/training/metamodel_trainer.py:343-349"

  known_issues_resolved:
    - id: "FIX-001"
      date: "2026-01-07"
      issue: "Feature shape mismatch, expected: 50, got 52"
      cause: "Caché de Python contenía código sin el fix de exclusión"
      resolution: "Limpieza de __pycache__ y *.pyc"
      status: "RESOLVED"

# ============================================
# REFERENCIAS
# ============================================
references:
  requirements:
    - "docs/02-definicion-modulos/OQI-006-ml-signals/requerimientos/"
  specifications:
    - "docs/02-definicion-modulos/OQI-006-ml-signals/especificaciones/"
  traceability:
    - "docs/02-definicion-modulos/OQI-006-ml-signals/implementacion/TRACEABILITY.yml"
  fix_documentation:
    - "docs/99-analisis/PLAN-IMPLEMENTACION-FASES.md#fase-8"