trading-platform-database/ddl/schemas/ml/tables/02-model_versions.sql

103 lines
3.4 KiB
SQL

-- =====================================================
-- ML SCHEMA - MODEL VERSIONS TABLE
-- =====================================================
-- Description: Versioned ML model artifacts and metadata
-- Schema: ml
-- Author: Database Agent
-- Date: 2025-12-06
-- =====================================================
CREATE TABLE ml.model_versions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Modelo padre
model_id UUID NOT NULL REFERENCES ml.models(id) ON DELETE CASCADE,
-- Versión
version VARCHAR(50) NOT NULL, -- Semantic versioning: 1.0.0, 1.1.0, 2.0.0
-- Artefacto
artifact_path VARCHAR(500) NOT NULL, -- S3 path, local path, registry URL
artifact_size_bytes BIGINT,
checksum VARCHAR(64), -- SHA-256
-- Métricas de entrenamiento
training_metrics JSONB, -- {accuracy, precision, recall, f1, loss}
validation_metrics JSONB, -- {accuracy, precision, recall, f1, loss}
test_metrics JSONB, -- {accuracy, precision, recall, f1, loss}
-- Features
feature_set JSONB NOT NULL, -- Lista de features usadas
feature_importance JSONB, -- Importancia de cada feature
-- Hiperparámetros
hyperparameters JSONB, -- Parámetros del modelo
-- Dataset info
training_dataset_size INTEGER,
training_dataset_path VARCHAR(500),
data_version VARCHAR(50),
-- Estado
is_production BOOLEAN DEFAULT false,
deployed_at TIMESTAMPTZ,
deployment_metadata JSONB,
-- Metadata de entrenamiento
training_started_at TIMESTAMPTZ,
training_completed_at TIMESTAMPTZ,
training_duration_seconds INTEGER,
trained_by VARCHAR(100),
training_environment JSONB, -- Python version, library versions, etc.
-- Performance en producción
production_predictions INTEGER DEFAULT 0,
production_accuracy DECIMAL(5,4),
-- Notas
release_notes TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Constraints
CONSTRAINT unique_model_version UNIQUE(model_id, version)
);
-- Índices
CREATE INDEX idx_model_versions_model ON ml.model_versions(model_id);
CREATE INDEX idx_model_versions_production ON ml.model_versions(is_production)
WHERE is_production = true;
CREATE INDEX idx_model_versions_version ON ml.model_versions(version);
CREATE INDEX idx_model_versions_deployed ON ml.model_versions(deployed_at DESC);
-- Comentarios
COMMENT ON TABLE ml.model_versions IS 'Versioned artifacts and metadata for ML models';
COMMENT ON COLUMN ml.model_versions.version IS 'Semantic version (major.minor.patch)';
COMMENT ON COLUMN ml.model_versions.artifact_path IS 'Location of serialized model file';
COMMENT ON COLUMN ml.model_versions.checksum IS 'SHA-256 hash for artifact integrity verification';
COMMENT ON COLUMN ml.model_versions.feature_set IS 'Array of feature names used by this version';
COMMENT ON COLUMN ml.model_versions.hyperparameters IS 'Model hyperparameters for reproducibility';
COMMENT ON COLUMN ml.model_versions.is_production IS 'Whether this version is currently deployed in production';
-- Ejemplo de training_metrics JSONB:
COMMENT ON COLUMN ml.model_versions.training_metrics IS
'Example: {
"accuracy": 0.8542,
"precision": 0.8234,
"recall": 0.7891,
"f1_score": 0.8058,
"loss": 0.3421,
"auc_roc": 0.9123
}';
-- Ejemplo de feature_set JSONB:
COMMENT ON COLUMN ml.model_versions.feature_set IS
'Example: [
"rsi_14",
"macd_signal",
"volume_sma_20",
"price_change_1h",
"sentiment_score"
]';