DDL schemas for Trading Platform: - User management - Authentication - Payments - Education - ML predictions - Trading data Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
427 lines
13 KiB
PL/PgSQL
427 lines
13 KiB
PL/PgSQL
-- ============================================================================
|
|
-- OrbiQuant IA - Esquema ML (Machine Learning)
|
|
-- ============================================================================
|
|
-- Archivo: 06_ml_schema.sql
|
|
-- Descripción: Modelos ML, predicciones, features y métricas
|
|
-- Fecha: 2025-12-05
|
|
-- ============================================================================
|
|
|
|
SET search_path TO ml;
|
|
|
|
-- ============================================================================
|
|
-- TIPOS ENUMERADOS
|
|
-- ============================================================================
|
|
|
|
CREATE TYPE model_type_enum AS ENUM (
|
|
'range_predictor', -- Predicción de rangos (ΔHigh/ΔLow)
|
|
'tpsl_classifier', -- Clasificación TP vs SL
|
|
'signal_generator', -- Generador de señales
|
|
'regime_classifier', -- Clasificación de régimen de mercado
|
|
'amd_detector', -- Detector de fases AMD
|
|
'volatility_model', -- Modelo de volatilidad
|
|
'ensemble' -- Meta-modelo ensemble
|
|
);
|
|
|
|
CREATE TYPE model_status_enum AS ENUM (
|
|
'training',
|
|
'validating',
|
|
'ready',
|
|
'deployed',
|
|
'deprecated',
|
|
'failed'
|
|
);
|
|
|
|
-- ============================================================================
|
|
-- TABLA: models
|
|
-- Descripción: Registro de modelos ML
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS models (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
|
|
-- Identificación
|
|
name VARCHAR(100) NOT NULL,
|
|
slug VARCHAR(100) NOT NULL,
|
|
description TEXT,
|
|
|
|
-- Tipo y versión
|
|
model_type model_type_enum NOT NULL,
|
|
version VARCHAR(20) NOT NULL,
|
|
is_latest BOOLEAN DEFAULT FALSE,
|
|
|
|
-- Símbolos y timeframes
|
|
symbols TEXT[], -- NULL = todos
|
|
timeframes trading.timeframe_enum[],
|
|
|
|
-- Arquitectura
|
|
algorithm VARCHAR(50), -- 'xgboost', 'gru', 'transformer', 'ensemble'
|
|
architecture_config JSONB, -- Configuración de arquitectura
|
|
|
|
-- Hiperparámetros
|
|
hyperparameters JSONB,
|
|
|
|
-- Features
|
|
feature_columns TEXT[],
|
|
feature_count INT,
|
|
|
|
-- Artifact
|
|
artifact_path TEXT, -- Path al modelo serializado
|
|
artifact_size_mb DECIMAL(10,2),
|
|
|
|
-- Estado
|
|
status model_status_enum DEFAULT 'training',
|
|
deployed_at TIMESTAMPTZ,
|
|
|
|
-- Metadata
|
|
training_duration_seconds INT,
|
|
total_samples INT,
|
|
created_by UUID REFERENCES public.users(id),
|
|
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
UNIQUE(slug, version)
|
|
);
|
|
|
|
CREATE INDEX idx_models_slug ON models(slug);
|
|
CREATE INDEX idx_models_type ON models(model_type);
|
|
CREATE INDEX idx_models_status ON models(status);
|
|
CREATE INDEX idx_models_latest ON models(is_latest) WHERE is_latest = TRUE;
|
|
|
|
-- ============================================================================
|
|
-- TABLA: training_runs
|
|
-- Descripción: Ejecuciones de entrenamiento
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS training_runs (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
model_id UUID REFERENCES models(id) ON DELETE SET NULL,
|
|
|
|
-- Configuración
|
|
run_name VARCHAR(100),
|
|
config JSONB NOT NULL,
|
|
|
|
-- Datos
|
|
training_data_start DATE,
|
|
training_data_end DATE,
|
|
validation_data_start DATE,
|
|
validation_data_end DATE,
|
|
total_samples INT,
|
|
training_samples INT,
|
|
validation_samples INT,
|
|
|
|
-- Walk-forward
|
|
walk_forward_splits INT,
|
|
walk_forward_config JSONB,
|
|
|
|
-- Estado
|
|
status VARCHAR(20) DEFAULT 'running', -- 'running', 'completed', 'failed', 'cancelled'
|
|
started_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
completed_at TIMESTAMPTZ,
|
|
error_message TEXT,
|
|
|
|
-- Recursos
|
|
gpu_used BOOLEAN DEFAULT FALSE,
|
|
memory_peak_mb INT,
|
|
duration_seconds INT,
|
|
|
|
-- Logs
|
|
logs_path TEXT,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_training_runs_model ON training_runs(model_id);
|
|
CREATE INDEX idx_training_runs_status ON training_runs(status);
|
|
|
|
-- ============================================================================
|
|
-- TABLA: model_metrics
|
|
-- Descripción: Métricas de rendimiento de modelos
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS model_metrics (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
model_id UUID NOT NULL REFERENCES models(id) ON DELETE CASCADE,
|
|
training_run_id UUID REFERENCES training_runs(id),
|
|
|
|
-- Tipo de métricas
|
|
metric_set VARCHAR(50) NOT NULL, -- 'training', 'validation', 'test', 'production'
|
|
split_index INT, -- Para walk-forward
|
|
|
|
-- Métricas de regresión
|
|
mae DECIMAL(10,6),
|
|
rmse DECIMAL(10,6),
|
|
mape DECIMAL(10,6),
|
|
r2_score DECIMAL(10,6),
|
|
|
|
-- Métricas de clasificación
|
|
accuracy DECIMAL(5,4),
|
|
precision_score DECIMAL(5,4),
|
|
recall_score DECIMAL(5,4),
|
|
f1_score DECIMAL(5,4),
|
|
roc_auc DECIMAL(5,4),
|
|
|
|
-- Métricas por clase
|
|
confusion_matrix JSONB,
|
|
classification_report JSONB,
|
|
|
|
-- Métricas de trading
|
|
win_rate DECIMAL(5,4),
|
|
profit_factor DECIMAL(6,2),
|
|
sharpe_ratio DECIMAL(6,2),
|
|
sortino_ratio DECIMAL(6,2),
|
|
max_drawdown DECIMAL(5,4),
|
|
|
|
-- Feature importance
|
|
feature_importance JSONB,
|
|
|
|
-- Timestamp
|
|
calculated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_model_metrics_model ON model_metrics(model_id);
|
|
CREATE INDEX idx_model_metrics_set ON model_metrics(metric_set);
|
|
|
|
-- ============================================================================
|
|
-- TABLA: predictions
|
|
-- Descripción: Predicciones generadas
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS predictions (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
model_id UUID NOT NULL REFERENCES models(id),
|
|
|
|
-- Contexto
|
|
symbol VARCHAR(20) NOT NULL,
|
|
timeframe trading.timeframe_enum NOT NULL,
|
|
prediction_timestamp TIMESTAMPTZ NOT NULL,
|
|
|
|
-- Input
|
|
candle_timestamp TIMESTAMPTZ NOT NULL, -- Timestamp de la vela de entrada
|
|
input_features JSONB, -- Features usadas (opcional, para debugging)
|
|
|
|
-- Predicción de rango
|
|
predicted_delta_high DECIMAL(20,8),
|
|
predicted_delta_low DECIMAL(20,8),
|
|
predicted_delta_high_1h DECIMAL(20,8),
|
|
predicted_delta_low_1h DECIMAL(20,8),
|
|
|
|
-- Clasificación de bins (ATR-based)
|
|
predicted_high_bin INT,
|
|
predicted_low_bin INT,
|
|
bin_probabilities JSONB,
|
|
|
|
-- TP/SL prediction
|
|
prob_tp_first DECIMAL(5,4),
|
|
rr_config VARCHAR(20), -- 'rr_2_1', 'rr_3_1'
|
|
|
|
-- Confianza
|
|
confidence_score DECIMAL(5,4),
|
|
model_uncertainty DECIMAL(5,4),
|
|
|
|
-- Contexto de mercado predicho
|
|
predicted_amd_phase trading.amd_phase_enum,
|
|
predicted_volatility trading.volatility_regime_enum,
|
|
|
|
-- Resultado real (llenado posteriormente)
|
|
actual_delta_high DECIMAL(20,8),
|
|
actual_delta_low DECIMAL(20,8),
|
|
actual_tp_sl_outcome VARCHAR(20), -- 'tp_hit', 'sl_hit', 'neither'
|
|
outcome_recorded_at TIMESTAMPTZ,
|
|
|
|
-- Error calculado
|
|
error_high DECIMAL(20,8),
|
|
error_low DECIMAL(20,8),
|
|
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_predictions_model ON predictions(model_id);
|
|
CREATE INDEX idx_predictions_symbol ON predictions(symbol);
|
|
CREATE INDEX idx_predictions_timestamp ON predictions(prediction_timestamp DESC);
|
|
CREATE INDEX idx_predictions_candle ON predictions(candle_timestamp);
|
|
|
|
-- Particionamiento por fecha para mejor rendimiento
|
|
-- (En producción, considerar particionar por mes)
|
|
|
|
-- ============================================================================
|
|
-- TABLA: prediction_accuracy_daily
|
|
-- Descripción: Precisión de predicciones agregada por día
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS prediction_accuracy_daily (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
model_id UUID NOT NULL REFERENCES models(id) ON DELETE CASCADE,
|
|
symbol VARCHAR(20) NOT NULL,
|
|
date DATE NOT NULL,
|
|
|
|
-- Conteos
|
|
total_predictions INT DEFAULT 0,
|
|
predictions_evaluated INT DEFAULT 0,
|
|
|
|
-- Métricas de rango
|
|
mae_high DECIMAL(10,6),
|
|
mae_low DECIMAL(10,6),
|
|
mape_high DECIMAL(10,6),
|
|
mape_low DECIMAL(10,6),
|
|
|
|
-- Métricas de TP/SL
|
|
tp_sl_predictions INT DEFAULT 0,
|
|
tp_correct INT DEFAULT 0,
|
|
sl_correct INT DEFAULT 0,
|
|
accuracy_tp_sl DECIMAL(5,4),
|
|
|
|
-- Métricas de bins
|
|
bin_accuracy DECIMAL(5,4),
|
|
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
UNIQUE(model_id, symbol, date)
|
|
);
|
|
|
|
CREATE INDEX idx_pred_accuracy_model ON prediction_accuracy_daily(model_id);
|
|
CREATE INDEX idx_pred_accuracy_date ON prediction_accuracy_daily(date DESC);
|
|
|
|
-- ============================================================================
|
|
-- TABLA: feature_store
|
|
-- Descripción: Features pre-calculadas para inferencia rápida
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS feature_store (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
|
|
symbol VARCHAR(20) NOT NULL,
|
|
timeframe trading.timeframe_enum NOT NULL,
|
|
candle_timestamp TIMESTAMPTZ NOT NULL,
|
|
|
|
-- OHLCV base
|
|
open DECIMAL(20,8) NOT NULL,
|
|
high DECIMAL(20,8) NOT NULL,
|
|
low DECIMAL(20,8) NOT NULL,
|
|
close DECIMAL(20,8) NOT NULL,
|
|
volume DECIMAL(20,4),
|
|
|
|
-- Features calculadas (las 21 del modelo)
|
|
features JSONB NOT NULL,
|
|
|
|
-- Indicadores técnicos
|
|
indicators JSONB,
|
|
|
|
-- Validación
|
|
is_valid BOOLEAN DEFAULT TRUE,
|
|
validation_errors TEXT[],
|
|
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
UNIQUE(symbol, timeframe, candle_timestamp)
|
|
);
|
|
|
|
CREATE INDEX idx_feature_store_symbol ON feature_store(symbol, timeframe);
|
|
CREATE INDEX idx_feature_store_timestamp ON feature_store(candle_timestamp DESC);
|
|
|
|
-- ============================================================================
|
|
-- TABLA: model_drift_alerts
|
|
-- Descripción: Alertas de drift de modelo
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS model_drift_alerts (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
model_id UUID NOT NULL REFERENCES models(id),
|
|
|
|
-- Tipo de drift
|
|
drift_type VARCHAR(50) NOT NULL, -- 'feature_drift', 'prediction_drift', 'performance_drift'
|
|
|
|
-- Detalles
|
|
metric_name VARCHAR(100),
|
|
expected_value DECIMAL(10,6),
|
|
actual_value DECIMAL(10,6),
|
|
deviation_percent DECIMAL(10,4),
|
|
|
|
-- Severidad
|
|
severity VARCHAR(20), -- 'low', 'medium', 'high', 'critical'
|
|
|
|
-- Estado
|
|
status VARCHAR(20) DEFAULT 'active', -- 'active', 'acknowledged', 'resolved'
|
|
acknowledged_by UUID REFERENCES public.users(id),
|
|
acknowledged_at TIMESTAMPTZ,
|
|
resolved_at TIMESTAMPTZ,
|
|
|
|
-- Acción tomada
|
|
action_taken TEXT,
|
|
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_drift_alerts_model ON model_drift_alerts(model_id);
|
|
CREATE INDEX idx_drift_alerts_status ON model_drift_alerts(status);
|
|
|
|
-- ============================================================================
|
|
-- TABLA: ab_tests
|
|
-- Descripción: Tests A/B de modelos
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS ab_tests (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
|
|
name VARCHAR(100) NOT NULL,
|
|
description TEXT,
|
|
|
|
-- Modelos
|
|
control_model_id UUID NOT NULL REFERENCES models(id),
|
|
treatment_model_id UUID NOT NULL REFERENCES models(id),
|
|
|
|
-- Configuración
|
|
traffic_split DECIMAL(3,2) DEFAULT 0.50, -- % al tratamiento
|
|
target_metric VARCHAR(50), -- Métrica principal a optimizar
|
|
|
|
-- Estado
|
|
status VARCHAR(20) DEFAULT 'draft', -- 'draft', 'running', 'paused', 'completed', 'cancelled'
|
|
started_at TIMESTAMPTZ,
|
|
ended_at TIMESTAMPTZ,
|
|
|
|
-- Resultados
|
|
control_samples INT DEFAULT 0,
|
|
treatment_samples INT DEFAULT 0,
|
|
control_metric_value DECIMAL(10,6),
|
|
treatment_metric_value DECIMAL(10,6),
|
|
statistical_significance DECIMAL(5,4),
|
|
winner VARCHAR(20), -- 'control', 'treatment', 'inconclusive'
|
|
|
|
created_by UUID REFERENCES public.users(id),
|
|
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_ab_tests_status ON ab_tests(status);
|
|
|
|
-- ============================================================================
|
|
-- TRIGGERS
|
|
-- ============================================================================
|
|
|
|
CREATE TRIGGER update_models_updated_at
|
|
BEFORE UPDATE ON models
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION public.update_updated_at_column();
|
|
|
|
CREATE TRIGGER update_ab_tests_updated_at
|
|
BEFORE UPDATE ON ab_tests
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION public.update_updated_at_column();
|
|
|
|
-- ============================================================================
|
|
-- FUNCIÓN: Marcar modelo como latest
|
|
-- ============================================================================
|
|
CREATE OR REPLACE FUNCTION set_model_as_latest()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
IF NEW.is_latest = TRUE THEN
|
|
UPDATE ml.models
|
|
SET is_latest = FALSE
|
|
WHERE slug = NEW.slug
|
|
AND id != NEW.id
|
|
AND is_latest = TRUE;
|
|
END IF;
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CREATE TRIGGER ensure_single_latest_model
|
|
AFTER INSERT OR UPDATE ON models
|
|
FOR EACH ROW
|
|
WHEN (NEW.is_latest = TRUE)
|
|
EXECUTE FUNCTION set_model_as_latest();
|