103 lines
3.4 KiB
SQL
103 lines
3.4 KiB
SQL
-- =====================================================
|
|
-- ML SCHEMA - MODEL VERSIONS TABLE
|
|
-- =====================================================
|
|
-- Description: Versioned ML model artifacts and metadata
|
|
-- Schema: ml
|
|
-- Author: Database Agent
|
|
-- Date: 2025-12-06
|
|
-- =====================================================
|
|
|
|
CREATE TABLE ml.model_versions (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
|
|
-- Modelo padre
|
|
model_id UUID NOT NULL REFERENCES ml.models(id) ON DELETE CASCADE,
|
|
|
|
-- Versión
|
|
version VARCHAR(50) NOT NULL, -- Semantic versioning: 1.0.0, 1.1.0, 2.0.0
|
|
|
|
-- Artefacto
|
|
artifact_path VARCHAR(500) NOT NULL, -- S3 path, local path, registry URL
|
|
artifact_size_bytes BIGINT,
|
|
checksum VARCHAR(64), -- SHA-256
|
|
|
|
-- Métricas de entrenamiento
|
|
training_metrics JSONB, -- {accuracy, precision, recall, f1, loss}
|
|
validation_metrics JSONB, -- {accuracy, precision, recall, f1, loss}
|
|
test_metrics JSONB, -- {accuracy, precision, recall, f1, loss}
|
|
|
|
-- Features
|
|
feature_set JSONB NOT NULL, -- Lista de features usadas
|
|
feature_importance JSONB, -- Importancia de cada feature
|
|
|
|
-- Hiperparámetros
|
|
hyperparameters JSONB, -- Parámetros del modelo
|
|
|
|
-- Dataset info
|
|
training_dataset_size INTEGER,
|
|
training_dataset_path VARCHAR(500),
|
|
data_version VARCHAR(50),
|
|
|
|
-- Estado
|
|
is_production BOOLEAN DEFAULT false,
|
|
deployed_at TIMESTAMPTZ,
|
|
deployment_metadata JSONB,
|
|
|
|
-- Metadata de entrenamiento
|
|
training_started_at TIMESTAMPTZ,
|
|
training_completed_at TIMESTAMPTZ,
|
|
training_duration_seconds INTEGER,
|
|
trained_by VARCHAR(100),
|
|
training_environment JSONB, -- Python version, library versions, etc.
|
|
|
|
-- Performance en producción
|
|
production_predictions INTEGER DEFAULT 0,
|
|
production_accuracy DECIMAL(5,4),
|
|
|
|
-- Notas
|
|
release_notes TEXT,
|
|
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
|
|
-- Constraints
|
|
CONSTRAINT unique_model_version UNIQUE(model_id, version)
|
|
);
|
|
|
|
-- Índices
|
|
CREATE INDEX idx_model_versions_model ON ml.model_versions(model_id);
|
|
CREATE INDEX idx_model_versions_production ON ml.model_versions(is_production)
|
|
WHERE is_production = true;
|
|
CREATE INDEX idx_model_versions_version ON ml.model_versions(version);
|
|
CREATE INDEX idx_model_versions_deployed ON ml.model_versions(deployed_at DESC);
|
|
|
|
-- Comentarios
|
|
COMMENT ON TABLE ml.model_versions IS 'Versioned artifacts and metadata for ML models';
|
|
COMMENT ON COLUMN ml.model_versions.version IS 'Semantic version (major.minor.patch)';
|
|
COMMENT ON COLUMN ml.model_versions.artifact_path IS 'Location of serialized model file';
|
|
COMMENT ON COLUMN ml.model_versions.checksum IS 'SHA-256 hash for artifact integrity verification';
|
|
COMMENT ON COLUMN ml.model_versions.feature_set IS 'Array of feature names used by this version';
|
|
COMMENT ON COLUMN ml.model_versions.hyperparameters IS 'Model hyperparameters for reproducibility';
|
|
COMMENT ON COLUMN ml.model_versions.is_production IS 'Whether this version is currently deployed in production';
|
|
|
|
-- Ejemplo de training_metrics JSONB:
|
|
COMMENT ON COLUMN ml.model_versions.training_metrics IS
|
|
'Example: {
|
|
"accuracy": 0.8542,
|
|
"precision": 0.8234,
|
|
"recall": 0.7891,
|
|
"f1_score": 0.8058,
|
|
"loss": 0.3421,
|
|
"auc_roc": 0.9123
|
|
}';
|
|
|
|
-- Ejemplo de feature_set JSONB:
|
|
COMMENT ON COLUMN ml.model_versions.feature_set IS
|
|
'Example: [
|
|
"rsi_14",
|
|
"macd_signal",
|
|
"volume_sma_20",
|
|
"price_change_1h",
|
|
"sentiment_score"
|
|
]';
|