-- ============================================================================ -- OrbiQuant IA - Esquema ML (Machine Learning) -- ============================================================================ -- Archivo: 06_ml_schema.sql -- Descripción: Modelos ML, predicciones, features y métricas -- Fecha: 2025-12-05 -- ============================================================================ SET search_path TO ml; -- ============================================================================ -- TIPOS ENUMERADOS -- ============================================================================ CREATE TYPE model_type_enum AS ENUM ( 'range_predictor', -- Predicción de rangos (ΔHigh/ΔLow) 'tpsl_classifier', -- Clasificación TP vs SL 'signal_generator', -- Generador de señales 'regime_classifier', -- Clasificación de régimen de mercado 'amd_detector', -- Detector de fases AMD 'volatility_model', -- Modelo de volatilidad 'ensemble' -- Meta-modelo ensemble ); CREATE TYPE model_status_enum AS ENUM ( 'training', 'validating', 'ready', 'deployed', 'deprecated', 'failed' ); -- ============================================================================ -- TABLA: models -- Descripción: Registro de modelos ML -- ============================================================================ CREATE TABLE IF NOT EXISTS models ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), -- Identificación name VARCHAR(100) NOT NULL, slug VARCHAR(100) NOT NULL, description TEXT, -- Tipo y versión model_type model_type_enum NOT NULL, version VARCHAR(20) NOT NULL, is_latest BOOLEAN DEFAULT FALSE, -- Símbolos y timeframes symbols TEXT[], -- NULL = todos timeframes trading.timeframe_enum[], -- Arquitectura algorithm VARCHAR(50), -- 'xgboost', 'gru', 'transformer', 'ensemble' architecture_config JSONB, -- Configuración de arquitectura -- Hiperparámetros hyperparameters JSONB, -- Features feature_columns TEXT[], feature_count INT, -- Artifact artifact_path TEXT, -- Path al modelo serializado artifact_size_mb DECIMAL(10,2), -- Estado status model_status_enum DEFAULT 'training', deployed_at TIMESTAMPTZ, -- Metadata training_duration_seconds INT, total_samples INT, created_by UUID REFERENCES public.users(id), created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, UNIQUE(slug, version) ); CREATE INDEX idx_models_slug ON models(slug); CREATE INDEX idx_models_type ON models(model_type); CREATE INDEX idx_models_status ON models(status); CREATE INDEX idx_models_latest ON models(is_latest) WHERE is_latest = TRUE; -- ============================================================================ -- TABLA: training_runs -- Descripción: Ejecuciones de entrenamiento -- ============================================================================ CREATE TABLE IF NOT EXISTS training_runs ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), model_id UUID REFERENCES models(id) ON DELETE SET NULL, -- Configuración run_name VARCHAR(100), config JSONB NOT NULL, -- Datos training_data_start DATE, training_data_end DATE, validation_data_start DATE, validation_data_end DATE, total_samples INT, training_samples INT, validation_samples INT, -- Walk-forward walk_forward_splits INT, walk_forward_config JSONB, -- Estado status VARCHAR(20) DEFAULT 'running', -- 'running', 'completed', 'failed', 'cancelled' started_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, completed_at TIMESTAMPTZ, error_message TEXT, -- Recursos gpu_used BOOLEAN DEFAULT FALSE, memory_peak_mb INT, duration_seconds INT, -- Logs logs_path TEXT, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_training_runs_model ON training_runs(model_id); CREATE INDEX idx_training_runs_status ON training_runs(status); -- ============================================================================ -- TABLA: model_metrics -- Descripción: Métricas de rendimiento de modelos -- ============================================================================ CREATE TABLE IF NOT EXISTS model_metrics ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), model_id UUID NOT NULL REFERENCES models(id) ON DELETE CASCADE, training_run_id UUID REFERENCES training_runs(id), -- Tipo de métricas metric_set VARCHAR(50) NOT NULL, -- 'training', 'validation', 'test', 'production' split_index INT, -- Para walk-forward -- Métricas de regresión mae DECIMAL(10,6), rmse DECIMAL(10,6), mape DECIMAL(10,6), r2_score DECIMAL(10,6), -- Métricas de clasificación accuracy DECIMAL(5,4), precision_score DECIMAL(5,4), recall_score DECIMAL(5,4), f1_score DECIMAL(5,4), roc_auc DECIMAL(5,4), -- Métricas por clase confusion_matrix JSONB, classification_report JSONB, -- Métricas de trading win_rate DECIMAL(5,4), profit_factor DECIMAL(6,2), sharpe_ratio DECIMAL(6,2), sortino_ratio DECIMAL(6,2), max_drawdown DECIMAL(5,4), -- Feature importance feature_importance JSONB, -- Timestamp calculated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_model_metrics_model ON model_metrics(model_id); CREATE INDEX idx_model_metrics_set ON model_metrics(metric_set); -- ============================================================================ -- TABLA: predictions -- Descripción: Predicciones generadas -- ============================================================================ CREATE TABLE IF NOT EXISTS predictions ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), model_id UUID NOT NULL REFERENCES models(id), -- Contexto symbol VARCHAR(20) NOT NULL, timeframe trading.timeframe_enum NOT NULL, prediction_timestamp TIMESTAMPTZ NOT NULL, -- Input candle_timestamp TIMESTAMPTZ NOT NULL, -- Timestamp de la vela de entrada input_features JSONB, -- Features usadas (opcional, para debugging) -- Predicción de rango predicted_delta_high DECIMAL(20,8), predicted_delta_low DECIMAL(20,8), predicted_delta_high_1h DECIMAL(20,8), predicted_delta_low_1h DECIMAL(20,8), -- Clasificación de bins (ATR-based) predicted_high_bin INT, predicted_low_bin INT, bin_probabilities JSONB, -- TP/SL prediction prob_tp_first DECIMAL(5,4), rr_config VARCHAR(20), -- 'rr_2_1', 'rr_3_1' -- Confianza confidence_score DECIMAL(5,4), model_uncertainty DECIMAL(5,4), -- Contexto de mercado predicho predicted_amd_phase trading.amd_phase_enum, predicted_volatility trading.volatility_regime_enum, -- Resultado real (llenado posteriormente) actual_delta_high DECIMAL(20,8), actual_delta_low DECIMAL(20,8), actual_tp_sl_outcome VARCHAR(20), -- 'tp_hit', 'sl_hit', 'neither' outcome_recorded_at TIMESTAMPTZ, -- Error calculado error_high DECIMAL(20,8), error_low DECIMAL(20,8), created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_predictions_model ON predictions(model_id); CREATE INDEX idx_predictions_symbol ON predictions(symbol); CREATE INDEX idx_predictions_timestamp ON predictions(prediction_timestamp DESC); CREATE INDEX idx_predictions_candle ON predictions(candle_timestamp); -- Particionamiento por fecha para mejor rendimiento -- (En producción, considerar particionar por mes) -- ============================================================================ -- TABLA: prediction_accuracy_daily -- Descripción: Precisión de predicciones agregada por día -- ============================================================================ CREATE TABLE IF NOT EXISTS prediction_accuracy_daily ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), model_id UUID NOT NULL REFERENCES models(id) ON DELETE CASCADE, symbol VARCHAR(20) NOT NULL, date DATE NOT NULL, -- Conteos total_predictions INT DEFAULT 0, predictions_evaluated INT DEFAULT 0, -- Métricas de rango mae_high DECIMAL(10,6), mae_low DECIMAL(10,6), mape_high DECIMAL(10,6), mape_low DECIMAL(10,6), -- Métricas de TP/SL tp_sl_predictions INT DEFAULT 0, tp_correct INT DEFAULT 0, sl_correct INT DEFAULT 0, accuracy_tp_sl DECIMAL(5,4), -- Métricas de bins bin_accuracy DECIMAL(5,4), created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, UNIQUE(model_id, symbol, date) ); CREATE INDEX idx_pred_accuracy_model ON prediction_accuracy_daily(model_id); CREATE INDEX idx_pred_accuracy_date ON prediction_accuracy_daily(date DESC); -- ============================================================================ -- TABLA: feature_store -- Descripción: Features pre-calculadas para inferencia rápida -- ============================================================================ CREATE TABLE IF NOT EXISTS feature_store ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), symbol VARCHAR(20) NOT NULL, timeframe trading.timeframe_enum NOT NULL, candle_timestamp TIMESTAMPTZ NOT NULL, -- OHLCV base open DECIMAL(20,8) NOT NULL, high DECIMAL(20,8) NOT NULL, low DECIMAL(20,8) NOT NULL, close DECIMAL(20,8) NOT NULL, volume DECIMAL(20,4), -- Features calculadas (las 21 del modelo) features JSONB NOT NULL, -- Indicadores técnicos indicators JSONB, -- Validación is_valid BOOLEAN DEFAULT TRUE, validation_errors TEXT[], created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, UNIQUE(symbol, timeframe, candle_timestamp) ); CREATE INDEX idx_feature_store_symbol ON feature_store(symbol, timeframe); CREATE INDEX idx_feature_store_timestamp ON feature_store(candle_timestamp DESC); -- ============================================================================ -- TABLA: model_drift_alerts -- Descripción: Alertas de drift de modelo -- ============================================================================ CREATE TABLE IF NOT EXISTS model_drift_alerts ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), model_id UUID NOT NULL REFERENCES models(id), -- Tipo de drift drift_type VARCHAR(50) NOT NULL, -- 'feature_drift', 'prediction_drift', 'performance_drift' -- Detalles metric_name VARCHAR(100), expected_value DECIMAL(10,6), actual_value DECIMAL(10,6), deviation_percent DECIMAL(10,4), -- Severidad severity VARCHAR(20), -- 'low', 'medium', 'high', 'critical' -- Estado status VARCHAR(20) DEFAULT 'active', -- 'active', 'acknowledged', 'resolved' acknowledged_by UUID REFERENCES public.users(id), acknowledged_at TIMESTAMPTZ, resolved_at TIMESTAMPTZ, -- Acción tomada action_taken TEXT, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_drift_alerts_model ON model_drift_alerts(model_id); CREATE INDEX idx_drift_alerts_status ON model_drift_alerts(status); -- ============================================================================ -- TABLA: ab_tests -- Descripción: Tests A/B de modelos -- ============================================================================ CREATE TABLE IF NOT EXISTS ab_tests ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), name VARCHAR(100) NOT NULL, description TEXT, -- Modelos control_model_id UUID NOT NULL REFERENCES models(id), treatment_model_id UUID NOT NULL REFERENCES models(id), -- Configuración traffic_split DECIMAL(3,2) DEFAULT 0.50, -- % al tratamiento target_metric VARCHAR(50), -- Métrica principal a optimizar -- Estado status VARCHAR(20) DEFAULT 'draft', -- 'draft', 'running', 'paused', 'completed', 'cancelled' started_at TIMESTAMPTZ, ended_at TIMESTAMPTZ, -- Resultados control_samples INT DEFAULT 0, treatment_samples INT DEFAULT 0, control_metric_value DECIMAL(10,6), treatment_metric_value DECIMAL(10,6), statistical_significance DECIMAL(5,4), winner VARCHAR(20), -- 'control', 'treatment', 'inconclusive' created_by UUID REFERENCES public.users(id), created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_ab_tests_status ON ab_tests(status); -- ============================================================================ -- TRIGGERS -- ============================================================================ CREATE TRIGGER update_models_updated_at BEFORE UPDATE ON models FOR EACH ROW EXECUTE FUNCTION public.update_updated_at_column(); CREATE TRIGGER update_ab_tests_updated_at BEFORE UPDATE ON ab_tests FOR EACH ROW EXECUTE FUNCTION public.update_updated_at_column(); -- ============================================================================ -- FUNCIÓN: Marcar modelo como latest -- ============================================================================ CREATE OR REPLACE FUNCTION set_model_as_latest() RETURNS TRIGGER AS $$ BEGIN IF NEW.is_latest = TRUE THEN UPDATE ml.models SET is_latest = FALSE WHERE slug = NEW.slug AND id != NEW.id AND is_latest = TRUE; END IF; RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE TRIGGER ensure_single_latest_model AFTER INSERT OR UPDATE ON models FOR EACH ROW WHEN (NEW.is_latest = TRUE) EXECUTE FUNCTION set_model_as_latest();