trading-platform-database-v2/schemas/06_ml_schema.sql
rckrdmrd 45e77e9a9c feat: Initial commit - Database schemas and scripts
DDL schemas for Trading Platform:
- User management
- Authentication
- Payments
- Education
- ML predictions
- Trading data

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 04:30:23 -06:00

427 lines
13 KiB
PL/PgSQL

-- ============================================================================
-- OrbiQuant IA - Esquema ML (Machine Learning)
-- ============================================================================
-- Archivo: 06_ml_schema.sql
-- Descripción: Modelos ML, predicciones, features y métricas
-- Fecha: 2025-12-05
-- ============================================================================
SET search_path TO ml;
-- ============================================================================
-- TIPOS ENUMERADOS
-- ============================================================================
CREATE TYPE model_type_enum AS ENUM (
'range_predictor', -- Predicción de rangos (ΔHigh/ΔLow)
'tpsl_classifier', -- Clasificación TP vs SL
'signal_generator', -- Generador de señales
'regime_classifier', -- Clasificación de régimen de mercado
'amd_detector', -- Detector de fases AMD
'volatility_model', -- Modelo de volatilidad
'ensemble' -- Meta-modelo ensemble
);
CREATE TYPE model_status_enum AS ENUM (
'training',
'validating',
'ready',
'deployed',
'deprecated',
'failed'
);
-- ============================================================================
-- TABLA: models
-- Descripción: Registro de modelos ML
-- ============================================================================
CREATE TABLE IF NOT EXISTS models (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-- Identificación
name VARCHAR(100) NOT NULL,
slug VARCHAR(100) NOT NULL,
description TEXT,
-- Tipo y versión
model_type model_type_enum NOT NULL,
version VARCHAR(20) NOT NULL,
is_latest BOOLEAN DEFAULT FALSE,
-- Símbolos y timeframes
symbols TEXT[], -- NULL = todos
timeframes trading.timeframe_enum[],
-- Arquitectura
algorithm VARCHAR(50), -- 'xgboost', 'gru', 'transformer', 'ensemble'
architecture_config JSONB, -- Configuración de arquitectura
-- Hiperparámetros
hyperparameters JSONB,
-- Features
feature_columns TEXT[],
feature_count INT,
-- Artifact
artifact_path TEXT, -- Path al modelo serializado
artifact_size_mb DECIMAL(10,2),
-- Estado
status model_status_enum DEFAULT 'training',
deployed_at TIMESTAMPTZ,
-- Metadata
training_duration_seconds INT,
total_samples INT,
created_by UUID REFERENCES public.users(id),
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
UNIQUE(slug, version)
);
CREATE INDEX idx_models_slug ON models(slug);
CREATE INDEX idx_models_type ON models(model_type);
CREATE INDEX idx_models_status ON models(status);
CREATE INDEX idx_models_latest ON models(is_latest) WHERE is_latest = TRUE;
-- ============================================================================
-- TABLA: training_runs
-- Descripción: Ejecuciones de entrenamiento
-- ============================================================================
CREATE TABLE IF NOT EXISTS training_runs (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
model_id UUID REFERENCES models(id) ON DELETE SET NULL,
-- Configuración
run_name VARCHAR(100),
config JSONB NOT NULL,
-- Datos
training_data_start DATE,
training_data_end DATE,
validation_data_start DATE,
validation_data_end DATE,
total_samples INT,
training_samples INT,
validation_samples INT,
-- Walk-forward
walk_forward_splits INT,
walk_forward_config JSONB,
-- Estado
status VARCHAR(20) DEFAULT 'running', -- 'running', 'completed', 'failed', 'cancelled'
started_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMPTZ,
error_message TEXT,
-- Recursos
gpu_used BOOLEAN DEFAULT FALSE,
memory_peak_mb INT,
duration_seconds INT,
-- Logs
logs_path TEXT,
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_training_runs_model ON training_runs(model_id);
CREATE INDEX idx_training_runs_status ON training_runs(status);
-- ============================================================================
-- TABLA: model_metrics
-- Descripción: Métricas de rendimiento de modelos
-- ============================================================================
CREATE TABLE IF NOT EXISTS model_metrics (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
model_id UUID NOT NULL REFERENCES models(id) ON DELETE CASCADE,
training_run_id UUID REFERENCES training_runs(id),
-- Tipo de métricas
metric_set VARCHAR(50) NOT NULL, -- 'training', 'validation', 'test', 'production'
split_index INT, -- Para walk-forward
-- Métricas de regresión
mae DECIMAL(10,6),
rmse DECIMAL(10,6),
mape DECIMAL(10,6),
r2_score DECIMAL(10,6),
-- Métricas de clasificación
accuracy DECIMAL(5,4),
precision_score DECIMAL(5,4),
recall_score DECIMAL(5,4),
f1_score DECIMAL(5,4),
roc_auc DECIMAL(5,4),
-- Métricas por clase
confusion_matrix JSONB,
classification_report JSONB,
-- Métricas de trading
win_rate DECIMAL(5,4),
profit_factor DECIMAL(6,2),
sharpe_ratio DECIMAL(6,2),
sortino_ratio DECIMAL(6,2),
max_drawdown DECIMAL(5,4),
-- Feature importance
feature_importance JSONB,
-- Timestamp
calculated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_model_metrics_model ON model_metrics(model_id);
CREATE INDEX idx_model_metrics_set ON model_metrics(metric_set);
-- ============================================================================
-- TABLA: predictions
-- Descripción: Predicciones generadas
-- ============================================================================
CREATE TABLE IF NOT EXISTS predictions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
model_id UUID NOT NULL REFERENCES models(id),
-- Contexto
symbol VARCHAR(20) NOT NULL,
timeframe trading.timeframe_enum NOT NULL,
prediction_timestamp TIMESTAMPTZ NOT NULL,
-- Input
candle_timestamp TIMESTAMPTZ NOT NULL, -- Timestamp de la vela de entrada
input_features JSONB, -- Features usadas (opcional, para debugging)
-- Predicción de rango
predicted_delta_high DECIMAL(20,8),
predicted_delta_low DECIMAL(20,8),
predicted_delta_high_1h DECIMAL(20,8),
predicted_delta_low_1h DECIMAL(20,8),
-- Clasificación de bins (ATR-based)
predicted_high_bin INT,
predicted_low_bin INT,
bin_probabilities JSONB,
-- TP/SL prediction
prob_tp_first DECIMAL(5,4),
rr_config VARCHAR(20), -- 'rr_2_1', 'rr_3_1'
-- Confianza
confidence_score DECIMAL(5,4),
model_uncertainty DECIMAL(5,4),
-- Contexto de mercado predicho
predicted_amd_phase trading.amd_phase_enum,
predicted_volatility trading.volatility_regime_enum,
-- Resultado real (llenado posteriormente)
actual_delta_high DECIMAL(20,8),
actual_delta_low DECIMAL(20,8),
actual_tp_sl_outcome VARCHAR(20), -- 'tp_hit', 'sl_hit', 'neither'
outcome_recorded_at TIMESTAMPTZ,
-- Error calculado
error_high DECIMAL(20,8),
error_low DECIMAL(20,8),
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_predictions_model ON predictions(model_id);
CREATE INDEX idx_predictions_symbol ON predictions(symbol);
CREATE INDEX idx_predictions_timestamp ON predictions(prediction_timestamp DESC);
CREATE INDEX idx_predictions_candle ON predictions(candle_timestamp);
-- Particionamiento por fecha para mejor rendimiento
-- (En producción, considerar particionar por mes)
-- ============================================================================
-- TABLA: prediction_accuracy_daily
-- Descripción: Precisión de predicciones agregada por día
-- ============================================================================
CREATE TABLE IF NOT EXISTS prediction_accuracy_daily (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
model_id UUID NOT NULL REFERENCES models(id) ON DELETE CASCADE,
symbol VARCHAR(20) NOT NULL,
date DATE NOT NULL,
-- Conteos
total_predictions INT DEFAULT 0,
predictions_evaluated INT DEFAULT 0,
-- Métricas de rango
mae_high DECIMAL(10,6),
mae_low DECIMAL(10,6),
mape_high DECIMAL(10,6),
mape_low DECIMAL(10,6),
-- Métricas de TP/SL
tp_sl_predictions INT DEFAULT 0,
tp_correct INT DEFAULT 0,
sl_correct INT DEFAULT 0,
accuracy_tp_sl DECIMAL(5,4),
-- Métricas de bins
bin_accuracy DECIMAL(5,4),
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
UNIQUE(model_id, symbol, date)
);
CREATE INDEX idx_pred_accuracy_model ON prediction_accuracy_daily(model_id);
CREATE INDEX idx_pred_accuracy_date ON prediction_accuracy_daily(date DESC);
-- ============================================================================
-- TABLA: feature_store
-- Descripción: Features pre-calculadas para inferencia rápida
-- ============================================================================
CREATE TABLE IF NOT EXISTS feature_store (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
symbol VARCHAR(20) NOT NULL,
timeframe trading.timeframe_enum NOT NULL,
candle_timestamp TIMESTAMPTZ NOT NULL,
-- OHLCV base
open DECIMAL(20,8) NOT NULL,
high DECIMAL(20,8) NOT NULL,
low DECIMAL(20,8) NOT NULL,
close DECIMAL(20,8) NOT NULL,
volume DECIMAL(20,4),
-- Features calculadas (las 21 del modelo)
features JSONB NOT NULL,
-- Indicadores técnicos
indicators JSONB,
-- Validación
is_valid BOOLEAN DEFAULT TRUE,
validation_errors TEXT[],
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
UNIQUE(symbol, timeframe, candle_timestamp)
);
CREATE INDEX idx_feature_store_symbol ON feature_store(symbol, timeframe);
CREATE INDEX idx_feature_store_timestamp ON feature_store(candle_timestamp DESC);
-- ============================================================================
-- TABLA: model_drift_alerts
-- Descripción: Alertas de drift de modelo
-- ============================================================================
CREATE TABLE IF NOT EXISTS model_drift_alerts (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
model_id UUID NOT NULL REFERENCES models(id),
-- Tipo de drift
drift_type VARCHAR(50) NOT NULL, -- 'feature_drift', 'prediction_drift', 'performance_drift'
-- Detalles
metric_name VARCHAR(100),
expected_value DECIMAL(10,6),
actual_value DECIMAL(10,6),
deviation_percent DECIMAL(10,4),
-- Severidad
severity VARCHAR(20), -- 'low', 'medium', 'high', 'critical'
-- Estado
status VARCHAR(20) DEFAULT 'active', -- 'active', 'acknowledged', 'resolved'
acknowledged_by UUID REFERENCES public.users(id),
acknowledged_at TIMESTAMPTZ,
resolved_at TIMESTAMPTZ,
-- Acción tomada
action_taken TEXT,
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_drift_alerts_model ON model_drift_alerts(model_id);
CREATE INDEX idx_drift_alerts_status ON model_drift_alerts(status);
-- ============================================================================
-- TABLA: ab_tests
-- Descripción: Tests A/B de modelos
-- ============================================================================
CREATE TABLE IF NOT EXISTS ab_tests (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name VARCHAR(100) NOT NULL,
description TEXT,
-- Modelos
control_model_id UUID NOT NULL REFERENCES models(id),
treatment_model_id UUID NOT NULL REFERENCES models(id),
-- Configuración
traffic_split DECIMAL(3,2) DEFAULT 0.50, -- % al tratamiento
target_metric VARCHAR(50), -- Métrica principal a optimizar
-- Estado
status VARCHAR(20) DEFAULT 'draft', -- 'draft', 'running', 'paused', 'completed', 'cancelled'
started_at TIMESTAMPTZ,
ended_at TIMESTAMPTZ,
-- Resultados
control_samples INT DEFAULT 0,
treatment_samples INT DEFAULT 0,
control_metric_value DECIMAL(10,6),
treatment_metric_value DECIMAL(10,6),
statistical_significance DECIMAL(5,4),
winner VARCHAR(20), -- 'control', 'treatment', 'inconclusive'
created_by UUID REFERENCES public.users(id),
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_ab_tests_status ON ab_tests(status);
-- ============================================================================
-- TRIGGERS
-- ============================================================================
CREATE TRIGGER update_models_updated_at
BEFORE UPDATE ON models
FOR EACH ROW
EXECUTE FUNCTION public.update_updated_at_column();
CREATE TRIGGER update_ab_tests_updated_at
BEFORE UPDATE ON ab_tests
FOR EACH ROW
EXECUTE FUNCTION public.update_updated_at_column();
-- ============================================================================
-- FUNCIÓN: Marcar modelo como latest
-- ============================================================================
CREATE OR REPLACE FUNCTION set_model_as_latest()
RETURNS TRIGGER AS $$
BEGIN
IF NEW.is_latest = TRUE THEN
UPDATE ml.models
SET is_latest = FALSE
WHERE slug = NEW.slug
AND id != NEW.id
AND is_latest = TRUE;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER ensure_single_latest_model
AFTER INSERT OR UPDATE ON models
FOR EACH ROW
WHEN (NEW.is_latest = TRUE)
EXECUTE FUNCTION set_model_as_latest();