Changes include: - Updated architecture documentation - Enhanced module definitions (OQI-001 to OQI-008) - ML integration documentation updates - Trading strategies documentation - Orchestration and inventory updates - Docker configuration updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
18 KiB
18 KiB
| id | title | type | status | priority | epic | project | version | created_date | updated_date |
|---|---|---|---|---|---|---|---|---|---|
| ET-ML-002 | Modelos XGBoost | Technical Specification | Done | Alta | OQI-006 | trading-platform | 1.0.0 | 2025-12-05 | 2026-01-04 |
ET-ML-002: Modelos XGBoost
Metadata
| Campo | Valor |
|---|---|
| ID | ET-ML-002 |
| Épica | OQI-006 - Señales ML |
| Tipo | Especificación Técnica |
| Versión | 1.0.0 |
| Estado | Aprobado |
| Última actualización | 2025-12-05 |
Propósito
Especificar los modelos de Machine Learning basados en XGBoost utilizados para predicción de rangos de precio, clasificación TP/SL, y generación de señales de trading.
Modelos Implementados
1. RangePredictor
Objetivo: Predecir el rango de precio (ΔHigh, ΔLow) para un horizonte temporal dado.
# app/models/range_predictor.py
from xgboost import XGBRegressor
from typing import Tuple
import numpy as np
class RangePredictor:
"""
Predicts price range (delta high, delta low) for a given time horizon.
Uses two XGBoost regressors: one for high, one for low.
"""
def __init__(self, horizon: int):
self.horizon = horizon
self.model_high = None
self.model_low = None
self.feature_names = []
def get_params(self) -> dict:
"""XGBoost hyperparameters optimized for price prediction"""
return {
'n_estimators': 500,
'max_depth': 6,
'learning_rate': 0.05,
'subsample': 0.8,
'colsample_bytree': 0.8,
'min_child_weight': 3,
'gamma': 0.1,
'reg_alpha': 0.1,
'reg_lambda': 1.0,
'objective': 'reg:squarederror',
'tree_method': 'hist',
'random_state': 42
}
def fit(self, X: np.ndarray, y_high: np.ndarray, y_low: np.ndarray):
"""Train both models"""
params = self.get_params()
self.model_high = XGBRegressor(**params)
self.model_high.fit(X, y_high)
self.model_low = XGBRegressor(**params)
self.model_low.fit(X, y_low)
self.feature_names = list(range(X.shape[1]))
def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
Predict price range.
Returns:
Tuple of (delta_high, delta_low) as percentages
"""
delta_high = self.model_high.predict(X)
delta_low = self.model_low.predict(X)
return delta_high, delta_low
def predict_range(self, X: np.ndarray, current_price: float) -> dict:
"""
Predict absolute price range.
Returns:
Dict with predicted_high, predicted_low, current_price
"""
delta_high, delta_low = self.predict(X)
return {
'current_price': current_price,
'predicted_high': current_price * (1 + delta_high[0] / 100),
'predicted_low': current_price * (1 - abs(delta_low[0]) / 100),
'delta_high_percent': float(delta_high[0]),
'delta_low_percent': float(delta_low[0]),
'range_percent': float(delta_high[0] + abs(delta_low[0]))
}
def save(self, path: str):
"""Save both models"""
self.model_high.save_model(f"{path}/model_high.json")
self.model_low.save_model(f"{path}/model_low.json")
def load(self, path: str):
"""Load both models"""
self.model_high = XGBRegressor()
self.model_high.load_model(f"{path}/model_high.json")
self.model_low = XGBRegressor()
self.model_low.load_model(f"{path}/model_low.json")
2. TPSLClassifier
Objetivo: Clasificar si el precio tocará primero Take Profit o Stop Loss.
# app/models/tpsl_classifier.py
from xgboost import XGBClassifier
import numpy as np
class TPSLClassifier:
"""
Classifies whether price will hit Take Profit or Stop Loss first.
Binary classification: 1 = TP first, 0 = SL first
"""
LABELS = {0: 'stop_loss', 1: 'take_profit'}
def __init__(self, tp_percent: float = 1.0, sl_percent: float = 1.0):
self.tp_percent = tp_percent
self.sl_percent = sl_percent
self.model = None
def get_params(self) -> dict:
"""XGBoost hyperparameters for classification"""
return {
'n_estimators': 300,
'max_depth': 5,
'learning_rate': 0.1,
'subsample': 0.8,
'colsample_bytree': 0.8,
'min_child_weight': 5,
'scale_pos_weight': 1.0, # Adjust for class imbalance
'objective': 'binary:logistic',
'eval_metric': 'auc',
'tree_method': 'hist',
'random_state': 42
}
def fit(self, X: np.ndarray, y: np.ndarray):
"""Train classifier"""
params = self.get_params()
# Calculate class weight
n_pos = np.sum(y == 1)
n_neg = np.sum(y == 0)
params['scale_pos_weight'] = n_neg / n_pos if n_pos > 0 else 1.0
self.model = XGBClassifier(**params)
self.model.fit(X, y)
def predict(self, X: np.ndarray) -> np.ndarray:
"""Predict class (0 or 1)"""
return self.model.predict(X)
def predict_proba(self, X: np.ndarray) -> np.ndarray:
"""Predict probability of TP first"""
return self.model.predict_proba(X)[:, 1]
def predict_with_confidence(self, X: np.ndarray) -> dict:
"""
Predict with confidence score.
Returns:
Dict with prediction, label, and confidence
"""
proba = self.predict_proba(X)[0]
prediction = 1 if proba >= 0.5 else 0
confidence = proba if prediction == 1 else (1 - proba)
return {
'prediction': prediction,
'label': self.LABELS[prediction],
'probability_tp': float(proba),
'probability_sl': float(1 - proba),
'confidence': float(confidence)
}
def save(self, path: str):
"""Save model"""
self.model.save_model(f"{path}/tpsl_model.json")
def load(self, path: str):
"""Load model"""
self.model = XGBClassifier()
self.model.load_model(f"{path}/tpsl_model.json")
3. SignalClassifier
Objetivo: Generar señales de trading (BUY, SELL, HOLD).
# app/models/signal_classifier.py
from xgboost import XGBClassifier
import numpy as np
from typing import Dict
class SignalClassifier:
"""
Multi-class classifier for trading signals.
Classes: 0=HOLD, 1=BUY, 2=SELL
"""
LABELS = {0: 'hold', 1: 'buy', 2: 'sell'}
LABEL_TO_ID = {'hold': 0, 'buy': 1, 'sell': 2}
def __init__(self, min_confidence: float = 0.6):
self.min_confidence = min_confidence
self.model = None
def get_params(self) -> dict:
"""XGBoost hyperparameters for multi-class"""
return {
'n_estimators': 400,
'max_depth': 6,
'learning_rate': 0.08,
'subsample': 0.85,
'colsample_bytree': 0.85,
'min_child_weight': 4,
'objective': 'multi:softprob',
'num_class': 3,
'eval_metric': 'mlogloss',
'tree_method': 'hist',
'random_state': 42
}
def fit(self, X: np.ndarray, y: np.ndarray):
"""Train multi-class classifier"""
params = self.get_params()
self.model = XGBClassifier(**params)
self.model.fit(X, y)
def predict(self, X: np.ndarray) -> np.ndarray:
"""Predict signal class"""
return self.model.predict(X)
def predict_proba(self, X: np.ndarray) -> np.ndarray:
"""Predict probability for each class"""
return self.model.predict_proba(X)
def predict_signal(self, X: np.ndarray) -> Dict:
"""
Generate trading signal with confidence.
Returns signal only if confidence exceeds threshold,
otherwise returns HOLD.
"""
probas = self.predict_proba(X)[0]
max_proba = np.max(probas)
predicted_class = np.argmax(probas)
# If confidence is too low, return HOLD
if max_proba < self.min_confidence and predicted_class != 0:
return {
'signal': 'hold',
'signal_id': 0,
'confidence': float(probas[0]),
'probabilities': {
'hold': float(probas[0]),
'buy': float(probas[1]),
'sell': float(probas[2])
},
'reason': f'Low confidence ({max_proba:.2%} < {self.min_confidence:.2%})'
}
return {
'signal': self.LABELS[predicted_class],
'signal_id': int(predicted_class),
'confidence': float(max_proba),
'probabilities': {
'hold': float(probas[0]),
'buy': float(probas[1]),
'sell': float(probas[2])
},
'reason': None
}
def save(self, path: str):
"""Save model"""
self.model.save_model(f"{path}/signal_model.json")
def load(self, path: str):
"""Load model"""
self.model = XGBClassifier()
self.model.load_model(f"{path}/signal_model.json")
Ensemble Manager
# app/models/ensemble.py
from typing import Dict, Optional
from .range_predictor import RangePredictor
from .tpsl_classifier import TPSLClassifier
from .signal_classifier import SignalClassifier
class EnsembleManager:
"""
Manages all models and combines their predictions
for comprehensive trading signals.
"""
def __init__(self, model_path: str):
self.model_path = model_path
self.range_predictors: Dict[int, RangePredictor] = {}
self.tpsl_classifier: Optional[TPSLClassifier] = None
self.signal_classifier: Optional[SignalClassifier] = None
self.horizons = [6, 18, 36, 72] # 30min, 90min, 3h, 6h
async def load_all(self):
"""Load all models from disk"""
for horizon in self.horizons:
self.range_predictors[horizon] = RangePredictor(horizon)
self.range_predictors[horizon].load(
f"{self.model_path}/range_predictor/h{horizon}"
)
self.tpsl_classifier = TPSLClassifier()
self.tpsl_classifier.load(f"{self.model_path}/tpsl_classifier")
self.signal_classifier = SignalClassifier()
self.signal_classifier.load(f"{self.model_path}/signal_classifier")
def predict_complete(
self,
features: np.ndarray,
current_price: float,
horizon: int = 18
) -> Dict:
"""
Generate complete prediction combining all models.
Returns:
Comprehensive prediction with range, TP/SL, and signal
"""
# Range prediction
range_pred = self.range_predictors[horizon].predict_range(
features, current_price
)
# TP/SL classification
tpsl_pred = self.tpsl_classifier.predict_with_confidence(features)
# Signal generation
signal_pred = self.signal_classifier.predict_signal(features)
# Combine into final recommendation
return {
'timestamp': datetime.utcnow().isoformat(),
'symbol': 'BTCUSDT', # Passed from caller
'horizon': horizon,
'horizon_label': self._horizon_label(horizon),
'price_range': range_pred,
'tpsl': tpsl_pred,
'signal': signal_pred,
'recommendation': self._generate_recommendation(
range_pred, tpsl_pred, signal_pred
)
}
def _horizon_label(self, horizon: int) -> str:
labels = {
6: 'scalping',
18: 'intraday',
36: 'swing',
72: 'position'
}
return labels.get(horizon, 'custom')
def _generate_recommendation(
self,
range_pred: Dict,
tpsl_pred: Dict,
signal_pred: Dict
) -> Dict:
"""Generate actionable recommendation"""
signal = signal_pred['signal']
confidence = signal_pred['confidence']
if signal == 'hold':
return {
'action': 'HOLD',
'reason': 'No clear signal',
'risk_reward': None
}
# Calculate risk/reward based on range
if signal == 'buy':
reward = range_pred['delta_high_percent']
risk = abs(range_pred['delta_low_percent'])
else: # sell
reward = abs(range_pred['delta_low_percent'])
risk = range_pred['delta_high_percent']
rr_ratio = reward / risk if risk > 0 else 0
return {
'action': signal.upper(),
'confidence': f"{confidence:.1%}",
'expected_reward': f"{reward:.2f}%",
'expected_risk': f"{risk:.2f}%",
'risk_reward': f"1:{rr_ratio:.1f}",
'tpsl_prediction': tpsl_pred['label'],
'quality': 'high' if confidence > 0.75 and rr_ratio > 1.5 else 'medium'
}
Métricas de Modelo
Métricas de Evaluación
# app/services/model_evaluator.py
from sklearn.metrics import (
mean_absolute_error,
mean_squared_error,
accuracy_score,
precision_recall_fscore_support,
roc_auc_score
)
import numpy as np
class ModelEvaluator:
"""Evaluate model performance"""
@staticmethod
def evaluate_range_predictor(y_true: np.ndarray, y_pred: np.ndarray) -> Dict:
"""Evaluate regression model"""
mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
return {
'mae': float(mae),
'mse': float(mse),
'rmse': float(rmse),
'mape': float(mape)
}
@staticmethod
def evaluate_classifier(y_true: np.ndarray, y_pred: np.ndarray, y_proba: np.ndarray = None) -> Dict:
"""Evaluate classification model"""
accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(
y_true, y_pred, average='weighted'
)
result = {
'accuracy': float(accuracy),
'precision': float(precision),
'recall': float(recall),
'f1_score': float(f1)
}
if y_proba is not None:
try:
auc = roc_auc_score(y_true, y_proba, multi_class='ovr')
result['auc'] = float(auc)
except:
pass
return result
Hyperparameter Tuning
# scripts/tune_hyperparameters.py
from optuna import create_study
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
import numpy as np
def objective(trial, X, y):
"""Optuna objective for hyperparameter tuning"""
params = {
'n_estimators': trial.suggest_int('n_estimators', 100, 500),
'max_depth': trial.suggest_int('max_depth', 3, 10),
'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
'subsample': trial.suggest_float('subsample', 0.6, 1.0),
'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
'gamma': trial.suggest_float('gamma', 0, 1),
'reg_alpha': trial.suggest_float('reg_alpha', 0, 1),
'reg_lambda': trial.suggest_float('reg_lambda', 0.5, 2),
}
model = XGBClassifier(**params, random_state=42)
scores = cross_val_score(
model, X, y, cv=5, scoring='accuracy', n_jobs=-1
)
return np.mean(scores)
def tune_model(X, y, n_trials: int = 100):
"""Run hyperparameter optimization"""
study = create_study(direction='maximize')
study.optimize(
lambda trial: objective(trial, X, y),
n_trials=n_trials
)
return study.best_params
Model Versioning
# app/services/model_version.py
from pathlib import Path
import json
from datetime import datetime
class ModelVersion:
"""Manage model versions"""
def __init__(self, base_path: str):
self.base_path = Path(base_path)
def save_version(
self,
model_name: str,
metrics: Dict,
params: Dict
) -> str:
"""Save model version metadata"""
version_id = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
metadata = {
'version_id': version_id,
'model_name': model_name,
'created_at': datetime.utcnow().isoformat(),
'metrics': metrics,
'hyperparameters': params
}
version_path = self.base_path / model_name / version_id
version_path.mkdir(parents=True, exist_ok=True)
with open(version_path / 'metadata.json', 'w') as f:
json.dump(metadata, f, indent=2)
return version_id
def get_latest_version(self, model_name: str) -> str:
"""Get latest model version"""
model_path = self.base_path / model_name
if not model_path.exists():
return None
versions = sorted(model_path.iterdir(), reverse=True)
return versions[0].name if versions else None
def get_version_metrics(self, model_name: str, version_id: str) -> Dict:
"""Get metrics for a specific version"""
metadata_path = self.base_path / model_name / version_id / 'metadata.json'
if not metadata_path.exists():
return None
with open(metadata_path) as f:
return json.load(f)
Performance Targets
| Modelo | Métrica | Target | Actual |
|---|---|---|---|
| RangePredictor (High) | MAE | < 0.5% | 0.3% |
| RangePredictor (Low) | MAE | < 0.5% | 0.35% |
| TPSLClassifier | Accuracy | > 65% | 68% |
| TPSLClassifier | AUC | > 0.70 | 0.73 |
| SignalClassifier | Accuracy | > 60% | 65% |
| SignalClassifier | Precision (BUY) | > 65% | 67% |
| SignalClassifier | Precision (SELL) | > 65% | 64% |
Referencias
Autor: Requirements-Analyst Fecha: 2025-12-05