trading-platform/docs/90-transversal/inventarios/ML_INVENTORY.yml
Adrian Flores Cortes ffee1900f9 docs: Update inventories for PostgreSQL data migration
- DATABASE_INVENTORY: Added data_status section with 469K+ bars loaded
- ML_INVENTORY: Documented MySQL deprecation and PostgreSQL migration
- 6 tickers loaded with 365 days of 5-minute data from Polygon API

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-25 05:51:39 -06:00

757 lines
21 KiB
YAML

# ML_INVENTORY.yml - Inventario de Componentes ML Engine
# Trading Platform Trading Platform
# Ultima actualizacion: 2026-01-25
metadata:
version: "2.2.0"
last_updated: "2026-01-25"
epic: "OQI-006"
description: "Inventario de modelos, features y servicios del ML Engine"
changelog:
- version: "2.2.0"
date: "2026-01-25"
changes:
- "Database migration: MySQL deprecated, now using PostgreSQL exclusively"
- "Added src/data/database.py module for PostgreSQL access"
- "Created .env with PostgreSQL credentials"
- "Updated config/database.yaml to mark MySQL as deprecated"
- "Data loaded from Polygon API into local PostgreSQL (6 tickers, 365 days)"
- version: "2.1.0"
date: "2026-01-07"
changes:
- "Added models ML-008 to ML-018 (previously undocumented)"
- "Added SVC-ML-005 HierarchicalPredictorService"
- "Alignment validation completed"
- version: "2.0.0"
date: "2026-01-07"
changes:
- "Added AttentionScoreModel (ML-005)"
- "Added SymbolTimeframeModel with attention (ML-006)"
- "Added AssetMetamodel (ML-007 - planned)"
- "Added attention features (FA-001 to FA-009)"
- version: "1.0.0"
date: "2025-12-05"
changes:
- "Initial inventory creation"
# ============================================
# MODELOS DE MACHINE LEARNING
# ============================================
models:
- id: "ML-001"
name: "PricePredictor"
description: "Modelo de predicción de dirección de precio"
type: "classification"
framework: "PyTorch"
input_features: 45
output: "bullish/bearish/neutral"
confidence_range: "0.0-1.0"
horizons:
- "1h"
- "4h"
- "1d"
symbols_supported:
- "stocks_us"
- "crypto_major"
training_frequency: "weekly"
accuracy_target: "65%"
related_rf: "RF-ML-001"
status: "planned"
- id: "ML-002"
name: "TrendDetector"
description: "Detector de tendencias y cambios de tendencia"
type: "classification"
framework: "PyTorch"
input_features: 30
output: "uptrend/downtrend/ranging"
horizons:
- "4h"
- "1d"
- "1w"
related_rf: "RF-ML-002"
status: "planned"
- id: "ML-003"
name: "VolatilityPredictor"
description: "Predictor de volatilidad futura"
type: "regression"
framework: "PyTorch"
input_features: 25
output: "volatility_percent"
related_rf: "RF-ML-003"
status: "planned"
- id: "ML-004"
name: "SentimentAnalyzer"
description: "Análisis de sentimiento de noticias"
type: "classification"
framework: "Transformers"
model_base: "FinBERT"
output: "positive/negative/neutral"
related_rf: "RF-ML-004"
status: "planned"
- id: "ML-005"
name: "AttentionScoreModel"
description: "Modelo de atencion que aprende CUANDO prestar atencion al mercado (Nivel 0 de arquitectura jerarquica)"
type: "dual (regression + classification)"
framework: "XGBoost"
input_features: 9
features:
- "volume_ratio"
- "volume_z"
- "ATR"
- "ATR_ratio"
- "CMF"
- "MFI"
- "OBV_delta"
- "BB_width"
- "displacement"
output:
regression: "attention_score (0-3)"
classification: "flow_class (0=low, 1=medium, 2=high)"
target: "move_multiplier = future_range / rolling_median(range)"
symbols_supported:
- "XAUUSD"
- "EURUSD"
- "BTCUSD"
- "GBPUSD"
- "USDJPY"
timeframes:
- "5m"
- "15m"
training_frequency: "weekly"
metrics:
r2_regression: "0.12-0.22"
classification_accuracy: "54-61%"
related_et: "ET-ML-007"
files:
model: "src/models/attention_score_model.py"
trainer: "src/training/attention_trainer.py"
script: "scripts/train_attention_model.py"
status: "implemented"
implementation_date: "2026-01-06"
- id: "ML-006"
name: "SymbolTimeframeModel"
description: "Modelo base de prediccion de rango con attention features (Nivel 1 de arquitectura jerarquica)"
type: "regression"
framework: "XGBoost"
input_features: 52
features_breakdown:
base_features: 50
attention_features: 2
attention_features:
- "attention_score"
- "attention_class"
output:
- "delta_high (multiplos de factor)"
- "delta_low (multiplos de factor)"
symbols_supported:
- "XAUUSD"
- "EURUSD"
- "BTCUSD"
- "GBPUSD"
- "USDJPY"
timeframes:
- "5m"
- "15m"
training_frequency: "weekly"
uses_attention: true
related_et: "ET-ML-007"
files:
trainer: "src/training/symbol_timeframe_trainer.py"
script: "scripts/train_symbol_timeframe_models.py"
status: "implemented"
implementation_date: "2026-01-06"
- id: "ML-007"
name: "AssetMetamodel"
description: "Metamodelo por activo que sintetiza predicciones de 5m y 15m (Nivel 2 de arquitectura jerarquica)"
type: "dual (regression + classification)"
framework: "XGBoost"
input_features: 10
features:
predictions:
- "pred_high_5m"
- "pred_low_5m"
- "pred_high_15m"
- "pred_low_15m"
attention:
- "attention_5m"
- "attention_15m"
- "attention_class_5m"
- "attention_class_15m"
context:
- "ATR_ratio"
- "volume_z"
output:
- "delta_high_final"
- "delta_low_final"
- "confidence (binary + probability)"
symbols_trained:
- "XAUUSD"
- "EURUSD"
- "GBPUSD"
- "USDJPY"
- "BTCUSD"
symbols_pending: []
training_frequency: "weekly"
uses_oos_predictions: true
oos_period: "2024-06-01 to 2025-12-31"
metrics:
XAUUSD:
samples: 18749
mae_high: 2.0818
mae_low: 2.2241
r2_high: 0.0674
r2_low: 0.1150
confidence_accuracy: "90.01%"
improvement_vs_avg: "+1.9%"
EURUSD:
samples: 19505
mae_high: 0.0005
mae_low: 0.0004
r2_high: -0.0417
r2_low: -0.0043
confidence_accuracy: "86.26%"
improvement_vs_avg: "+3.0%"
GBPUSD:
samples: 17412
confidence_accuracy: "93.0%"
status: "trained"
USDJPY:
samples: 16547
confidence_accuracy: "93.6%"
status: "trained"
BTCUSD:
samples: 23233
mae_high: 150.58
mae_low: 175.84
r2_high: 0.163
r2_low: 0.035
confidence_accuracy: "87.3%"
improvement_vs_avg: "+5.3%"
status: "trained"
backtest:
strategy: "aggressive_filter"
win_rate: "46.8%"
expectancy: "+0.0700"
profit_factor: 1.17
related_et: "ET-ML-007"
files:
model: "src/models/asset_metamodel.py"
trainer: "src/training/metamodel_trainer.py"
script: "scripts/train_metamodels.py"
saved_models:
- "models/metamodels/XAUUSD/"
- "models/metamodels/EURUSD/"
- "models/metamodels/GBPUSD/"
- "models/metamodels/USDJPY/"
- "models/metamodels/BTCUSD/"
status: "implemented"
implementation_date: "2026-01-07"
- id: "ML-008"
name: "RangePredictor"
description: "Legacy range prediction model"
type: "regression"
framework: "XGBoost"
file: "src/models/range_predictor.py"
status: "implemented"
- id: "ML-009"
name: "RangePredictorV2"
description: "Multi-timeframe range prediction model"
type: "regression"
framework: "XGBoost"
file: "src/models/range_predictor_v2.py"
status: "implemented"
- id: "ML-010"
name: "RangePredictorFactor"
description: "Factor-based range prediction model"
type: "regression"
framework: "XGBoost"
file: "src/models/range_predictor_factor.py"
status: "implemented"
- id: "ML-011"
name: "EnhancedRangePredictor"
description: "Enhanced range predictor with context"
type: "regression"
framework: "XGBoost"
file: "src/models/enhanced_range_predictor.py"
status: "implemented"
- id: "ML-012"
name: "AMDDetectorML"
description: "AMD phases ML detector"
type: "classification"
framework: "XGBoost"
file: "src/models/amd_detector_ml.py"
status: "implemented"
- id: "ML-013"
name: "ICTSMCDetector"
description: "ICT/SMC patterns detector"
type: "classification"
framework: "XGBoost"
file: "src/models/ict_smc_detector.py"
status: "implemented"
- id: "ML-014"
name: "MovementMagnitudePredictor"
description: "Movement USD prediction model"
type: "regression"
framework: "XGBoost"
file: "src/models/movement_magnitude_predictor.py"
status: "implemented"
- id: "ML-015"
name: "TPSLClassifier"
description: "TP/SL probability classifier"
type: "classification"
framework: "XGBoost"
file: "src/models/tp_sl_classifier.py"
status: "implemented"
- id: "ML-016"
name: "SignalGenerator"
description: "Trading signals generator"
type: "classification"
framework: "XGBoost"
file: "src/models/signal_generator.py"
status: "implemented"
- id: "ML-017"
name: "DualHorizonEnsemble"
description: "Multi-horizon ensemble model"
type: "ensemble"
framework: "XGBoost"
file: "src/models/dual_horizon_ensemble.py"
status: "implemented"
- id: "ML-018"
name: "NeuralGatingMetamodel"
description: "Neural gating metamodel"
type: "ensemble"
framework: "PyTorch"
file: "src/models/neural_gating_metamodel.py"
status: "implemented"
# ============================================
# FEATURES ENGINEERING
# ============================================
features:
technical:
- id: "FT-001"
name: "rsi_14"
description: "Relative Strength Index 14 períodos"
type: "float"
range: "0-100"
- id: "FT-002"
name: "macd_signal"
description: "MACD Signal Line"
type: "float"
- id: "FT-003"
name: "macd_histogram"
description: "MACD Histogram"
type: "float"
- id: "FT-004"
name: "bb_position"
description: "Posición relativa en Bollinger Bands"
type: "float"
range: "0-1"
- id: "FT-005"
name: "sma_20_50_cross"
description: "Cruce SMA 20/50"
type: "int"
values: "-1/0/1"
- id: "FT-006"
name: "atr_14"
description: "Average True Range 14 períodos"
type: "float"
- id: "FT-007"
name: "volume_ratio"
description: "Ratio volumen actual vs promedio"
type: "float"
- id: "FT-008"
name: "price_momentum"
description: "Momentum de precio (ROC)"
type: "float"
market_structure:
- id: "FM-001"
name: "support_distance"
description: "Distancia al soporte más cercano"
type: "float"
- id: "FM-002"
name: "resistance_distance"
description: "Distancia a la resistencia más cercana"
type: "float"
- id: "FM-003"
name: "trend_strength"
description: "Fuerza de la tendencia (ADX)"
type: "float"
range: "0-100"
sentiment:
- id: "FS-001"
name: "news_sentiment"
description: "Sentimiento de noticias recientes"
type: "float"
range: "-1 to 1"
- id: "FS-002"
name: "social_sentiment"
description: "Sentimiento de redes sociales"
type: "float"
range: "-1 to 1"
- id: "FS-003"
name: "fear_greed_index"
description: "Indice de miedo y codicia (crypto)"
type: "int"
range: "0-100"
attention:
- id: "FA-001"
name: "volume_ratio"
description: "Ratio de volumen actual vs mediana movil"
type: "float"
calculation: "volume / rolling_median(volume, 20)"
used_by: ["ML-005"]
- id: "FA-002"
name: "volume_z"
description: "Z-score del volumen"
type: "float"
calculation: "(volume - rolling_mean) / rolling_std"
window: 20
used_by: ["ML-005"]
- id: "FA-003"
name: "ATR_ratio"
description: "Ratio de ATR vs mediana movil - FEATURE MAS IMPORTANTE"
type: "float"
calculation: "ATR / rolling_median(ATR, 50)"
importance: "34-50%"
used_by: ["ML-005"]
- id: "FA-004"
name: "CMF"
description: "Chaikin Money Flow - flujo de dinero"
type: "float"
range: "-1 to 1"
used_by: ["ML-005"]
- id: "FA-005"
name: "MFI"
description: "Money Flow Index"
type: "float"
range: "0-100"
used_by: ["ML-005"]
- id: "FA-006"
name: "OBV_delta"
description: "Cambio en On-Balance Volume normalizado"
type: "float"
calculation: "diff(OBV) / rolling_std(OBV, 20)"
used_by: ["ML-005"]
- id: "FA-007"
name: "BB_width"
description: "Ancho de Bollinger Bands normalizado"
type: "float"
calculation: "(BB_upper - BB_lower) / close"
used_by: ["ML-005"]
- id: "FA-008"
name: "displacement"
description: "Desplazamiento de precio normalizado por ATR"
type: "float"
calculation: "(close - open) / ATR"
used_by: ["ML-005"]
- id: "FA-009"
name: "attention_score"
description: "Score de atencion generado por modelo ML-005"
type: "float"
range: "0-3"
output_of: "ML-005"
used_by: ["ML-006", "ML-007"]
- id: "FA-010"
name: "attention_class"
description: "Clasificacion de flujo generada por modelo ML-005"
type: "int"
values: "0=low_flow, 1=medium_flow, 2=high_flow"
output_of: "ML-005"
used_by: ["ML-006", "ML-007"]
# ============================================
# SERVICIOS ML
# ============================================
services:
- id: "SVC-ML-001"
name: "MLPredictionService"
description: "Servicio principal de predicciones"
framework: "FastAPI"
endpoints:
- path: "/predict/{symbol}"
method: "GET"
description: "Obtener predicción para símbolo"
- path: "/predict/batch"
method: "POST"
description: "Predicciones en batch"
related_et: "ET-ML-001"
- id: "SVC-ML-002"
name: "FeatureEngineering"
description: "Cálculo y cache de features"
framework: "Python"
dependencies:
- "pandas"
- "numpy"
- "ta-lib"
related_et: "ET-ML-002"
- id: "SVC-ML-003"
name: "ModelTrainer"
description: "Entrenamiento y actualización de modelos"
framework: "PyTorch"
schedule: "weekly"
related_et: "ET-ML-003"
- id: "SVC-ML-004"
name: "ModelRegistry"
description: "Registro y versionado de modelos"
framework: "MLflow"
storage: "S3"
related_et: "ET-ML-004"
- id: "SVC-ML-005"
name: "HierarchicalPredictorService"
description: "Servicio de predicción jerárquica de 3 niveles"
framework: "Python"
file: "src/services/hierarchical_predictor.py"
related_et: "ET-ML-007"
# ============================================
# PIPELINES
# ============================================
pipelines:
- id: "PIP-001"
name: "RealTimePrediction"
description: "Pipeline de predicción en tiempo real"
steps:
- "fetch_market_data"
- "calculate_features"
- "normalize_features"
- "run_inference"
- "post_process"
- "cache_result"
latency_target: "< 500ms"
- id: "PIP-002"
name: "DailyRetrain"
description: "Pipeline de reentrenamiento diario"
steps:
- "fetch_training_data"
- "feature_engineering"
- "train_model"
- "evaluate_model"
- "register_if_improved"
schedule: "daily"
- id: "PIP-003"
name: "BatchPrediction"
description: "Pipeline de predicción en batch"
steps:
- "fetch_symbols_list"
- "parallel_feature_calc"
- "batch_inference"
- "store_results"
schedule: "every_4h"
# ============================================
# CONFIGURACIÓN
# ============================================
config:
inference:
cache_ttl: 60 # segundos
batch_size: 100
timeout: 5000 # ms
training:
train_test_split: 0.8
validation_split: 0.1
epochs: 100
early_stopping_patience: 10
features:
lookback_periods:
short: 20
medium: 50
long: 200
normalization: "min_max"
# ============================================
# MÉTRICAS Y MONITOREO
# ============================================
metrics:
model_performance:
- name: "accuracy"
target: "> 0.65"
- name: "precision"
target: "> 0.60"
- name: "recall"
target: "> 0.60"
- name: "f1_score"
target: "> 0.60"
service_health:
- name: "latency_p99"
target: "< 1000ms"
- name: "availability"
target: "> 99.5%"
- name: "error_rate"
target: "< 1%"
# ============================================
# INTEGRACIÓN CON TRADINGAGENT
# ============================================
tradingagent_integration:
description: "Migración del ML Engine existente de TradingAgent"
source_repo: "tradingagent"
components_to_migrate:
- name: "PredictionEngine"
source: "tradingagent/ml/prediction_engine.py"
target: "apps/ml-engine/src/prediction/"
status: "planned"
- name: "FeatureCalculator"
source: "tradingagent/ml/features.py"
target: "apps/ml-engine/src/features/"
status: "planned"
- name: "ModelLoader"
source: "tradingagent/ml/model_loader.py"
target: "apps/ml-engine/src/models/"
status: "planned"
api_compatibility:
description: "Mantener compatibilidad con API existente"
endpoints_to_preserve:
- "/api/v1/predict"
- "/api/v1/signals"
- "/api/v1/features"
# ============================================
# NOTAS DE COMPATIBILIDAD DE FEATURES
# ============================================
feature_compatibility:
description: "Documentación de compatibilidad entre modelos con diferentes números de features"
last_updated: "2026-01-07"
models_feature_count:
GBPUSD:
feature_count: 50
uses_attention: false
note: "Entrenado con use_attention_features=False"
status: "trained"
training_date: "2026-01-07"
EURUSD:
feature_count: 52
uses_attention: true
note: "Entrenado con attention_score y attention_class"
status: "trained"
training_date: "2026-01-06"
XAUUSD:
feature_count: 52
uses_attention: true
note: "Entrenado con attention_score y attention_class"
status: "trained"
training_date: "2026-01-06"
USDJPY:
feature_count: 50
uses_attention: false
note: "Attention models trained, base models without attention features"
status: "trained"
training_date: "2026-01-07"
backtest_results:
period: "2024-09-01 to 2024-12-31"
win_rate: "39.2%"
expectancy: "-0.0544"
confidence_accuracy: "93.6%"
BTCUSD:
feature_count: 50
uses_attention: false
note: "ACTUALIZADO - Datos de Polygon API (2024-2025)"
status: "trained"
training_date: "2026-01-07"
data_source:
provider: "Polygon.io API"
available_range: "2015-03-22 to 2025-12-31"
new_data_range: "2024-01-07 to 2025-12-31"
new_records: 215699
total_records: 367500
model_metrics:
attention_5m:
r2: 0.223
accuracy: "62.3%"
attention_15m:
r2: 0.169
accuracy: "59.9%"
metamodel:
confidence_accuracy: "87.3%"
improvement_over_avg: "5.3%"
backtest_results:
period: "2025-09-01 to 2025-12-31"
best_strategy: "aggressive_filter"
trades: 2524
win_rate: "46.8%"
expectancy: "+0.0700"
profit_factor: 1.17
total_profit_r: "+176.71"
status: "PROFITABLE"
pipeline_handling:
description: "El pipeline maneja automáticamente la diferencia de features"
mechanism: "_prepare_features_for_base_model() excluye attention_score y attention_class"
files:
- "src/pipelines/hierarchical_pipeline.py:402-408"
- "src/training/metamodel_trainer.py:343-349"
known_issues_resolved:
- id: "FIX-001"
date: "2026-01-07"
issue: "Feature shape mismatch, expected: 50, got 52"
cause: "Caché de Python contenía código sin el fix de exclusión"
resolution: "Limpieza de __pycache__ y *.pyc"
status: "RESOLVED"
# ============================================
# REFERENCIAS
# ============================================
references:
requirements:
- "docs/02-definicion-modulos/OQI-006-ml-signals/requerimientos/"
specifications:
- "docs/02-definicion-modulos/OQI-006-ml-signals/especificaciones/"
traceability:
- "docs/02-definicion-modulos/OQI-006-ml-signals/implementacion/TRACEABILITY.yml"
fix_documentation:
- "docs/99-analisis/PLAN-IMPLEMENTACION-FASES.md#fase-8"