Changes include: - Updated architecture documentation - Enhanced module definitions (OQI-001 to OQI-008) - ML integration documentation updates - Trading strategies documentation - Orchestration and inventory updates - Docker configuration updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
638 lines
18 KiB
Markdown
638 lines
18 KiB
Markdown
---
|
|
id: "ET-ML-002"
|
|
title: "Modelos XGBoost"
|
|
type: "Technical Specification"
|
|
status: "Done"
|
|
priority: "Alta"
|
|
epic: "OQI-006"
|
|
project: "trading-platform"
|
|
version: "1.0.0"
|
|
created_date: "2025-12-05"
|
|
updated_date: "2026-01-04"
|
|
---
|
|
|
|
# ET-ML-002: Modelos XGBoost
|
|
|
|
## Metadata
|
|
|
|
| Campo | Valor |
|
|
|-------|-------|
|
|
| **ID** | ET-ML-002 |
|
|
| **Épica** | OQI-006 - Señales ML |
|
|
| **Tipo** | Especificación Técnica |
|
|
| **Versión** | 1.0.0 |
|
|
| **Estado** | Aprobado |
|
|
| **Última actualización** | 2025-12-05 |
|
|
|
|
---
|
|
|
|
## Propósito
|
|
|
|
Especificar los modelos de Machine Learning basados en XGBoost utilizados para predicción de rangos de precio, clasificación TP/SL, y generación de señales de trading.
|
|
|
|
---
|
|
|
|
## Modelos Implementados
|
|
|
|
### 1. RangePredictor
|
|
|
|
**Objetivo:** Predecir el rango de precio (ΔHigh, ΔLow) para un horizonte temporal dado.
|
|
|
|
```python
|
|
# app/models/range_predictor.py
|
|
from xgboost import XGBRegressor
|
|
from typing import Tuple
|
|
import numpy as np
|
|
|
|
class RangePredictor:
|
|
"""
|
|
Predicts price range (delta high, delta low) for a given time horizon.
|
|
Uses two XGBoost regressors: one for high, one for low.
|
|
"""
|
|
|
|
def __init__(self, horizon: int):
|
|
self.horizon = horizon
|
|
self.model_high = None
|
|
self.model_low = None
|
|
self.feature_names = []
|
|
|
|
def get_params(self) -> dict:
|
|
"""XGBoost hyperparameters optimized for price prediction"""
|
|
return {
|
|
'n_estimators': 500,
|
|
'max_depth': 6,
|
|
'learning_rate': 0.05,
|
|
'subsample': 0.8,
|
|
'colsample_bytree': 0.8,
|
|
'min_child_weight': 3,
|
|
'gamma': 0.1,
|
|
'reg_alpha': 0.1,
|
|
'reg_lambda': 1.0,
|
|
'objective': 'reg:squarederror',
|
|
'tree_method': 'hist',
|
|
'random_state': 42
|
|
}
|
|
|
|
def fit(self, X: np.ndarray, y_high: np.ndarray, y_low: np.ndarray):
|
|
"""Train both models"""
|
|
params = self.get_params()
|
|
|
|
self.model_high = XGBRegressor(**params)
|
|
self.model_high.fit(X, y_high)
|
|
|
|
self.model_low = XGBRegressor(**params)
|
|
self.model_low.fit(X, y_low)
|
|
|
|
self.feature_names = list(range(X.shape[1]))
|
|
|
|
def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""
|
|
Predict price range.
|
|
|
|
Returns:
|
|
Tuple of (delta_high, delta_low) as percentages
|
|
"""
|
|
delta_high = self.model_high.predict(X)
|
|
delta_low = self.model_low.predict(X)
|
|
return delta_high, delta_low
|
|
|
|
def predict_range(self, X: np.ndarray, current_price: float) -> dict:
|
|
"""
|
|
Predict absolute price range.
|
|
|
|
Returns:
|
|
Dict with predicted_high, predicted_low, current_price
|
|
"""
|
|
delta_high, delta_low = self.predict(X)
|
|
|
|
return {
|
|
'current_price': current_price,
|
|
'predicted_high': current_price * (1 + delta_high[0] / 100),
|
|
'predicted_low': current_price * (1 - abs(delta_low[0]) / 100),
|
|
'delta_high_percent': float(delta_high[0]),
|
|
'delta_low_percent': float(delta_low[0]),
|
|
'range_percent': float(delta_high[0] + abs(delta_low[0]))
|
|
}
|
|
|
|
def save(self, path: str):
|
|
"""Save both models"""
|
|
self.model_high.save_model(f"{path}/model_high.json")
|
|
self.model_low.save_model(f"{path}/model_low.json")
|
|
|
|
def load(self, path: str):
|
|
"""Load both models"""
|
|
self.model_high = XGBRegressor()
|
|
self.model_high.load_model(f"{path}/model_high.json")
|
|
|
|
self.model_low = XGBRegressor()
|
|
self.model_low.load_model(f"{path}/model_low.json")
|
|
```
|
|
|
|
### 2. TPSLClassifier
|
|
|
|
**Objetivo:** Clasificar si el precio tocará primero Take Profit o Stop Loss.
|
|
|
|
```python
|
|
# app/models/tpsl_classifier.py
|
|
from xgboost import XGBClassifier
|
|
import numpy as np
|
|
|
|
class TPSLClassifier:
|
|
"""
|
|
Classifies whether price will hit Take Profit or Stop Loss first.
|
|
Binary classification: 1 = TP first, 0 = SL first
|
|
"""
|
|
|
|
LABELS = {0: 'stop_loss', 1: 'take_profit'}
|
|
|
|
def __init__(self, tp_percent: float = 1.0, sl_percent: float = 1.0):
|
|
self.tp_percent = tp_percent
|
|
self.sl_percent = sl_percent
|
|
self.model = None
|
|
|
|
def get_params(self) -> dict:
|
|
"""XGBoost hyperparameters for classification"""
|
|
return {
|
|
'n_estimators': 300,
|
|
'max_depth': 5,
|
|
'learning_rate': 0.1,
|
|
'subsample': 0.8,
|
|
'colsample_bytree': 0.8,
|
|
'min_child_weight': 5,
|
|
'scale_pos_weight': 1.0, # Adjust for class imbalance
|
|
'objective': 'binary:logistic',
|
|
'eval_metric': 'auc',
|
|
'tree_method': 'hist',
|
|
'random_state': 42
|
|
}
|
|
|
|
def fit(self, X: np.ndarray, y: np.ndarray):
|
|
"""Train classifier"""
|
|
params = self.get_params()
|
|
|
|
# Calculate class weight
|
|
n_pos = np.sum(y == 1)
|
|
n_neg = np.sum(y == 0)
|
|
params['scale_pos_weight'] = n_neg / n_pos if n_pos > 0 else 1.0
|
|
|
|
self.model = XGBClassifier(**params)
|
|
self.model.fit(X, y)
|
|
|
|
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
"""Predict class (0 or 1)"""
|
|
return self.model.predict(X)
|
|
|
|
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
|
"""Predict probability of TP first"""
|
|
return self.model.predict_proba(X)[:, 1]
|
|
|
|
def predict_with_confidence(self, X: np.ndarray) -> dict:
|
|
"""
|
|
Predict with confidence score.
|
|
|
|
Returns:
|
|
Dict with prediction, label, and confidence
|
|
"""
|
|
proba = self.predict_proba(X)[0]
|
|
prediction = 1 if proba >= 0.5 else 0
|
|
confidence = proba if prediction == 1 else (1 - proba)
|
|
|
|
return {
|
|
'prediction': prediction,
|
|
'label': self.LABELS[prediction],
|
|
'probability_tp': float(proba),
|
|
'probability_sl': float(1 - proba),
|
|
'confidence': float(confidence)
|
|
}
|
|
|
|
def save(self, path: str):
|
|
"""Save model"""
|
|
self.model.save_model(f"{path}/tpsl_model.json")
|
|
|
|
def load(self, path: str):
|
|
"""Load model"""
|
|
self.model = XGBClassifier()
|
|
self.model.load_model(f"{path}/tpsl_model.json")
|
|
```
|
|
|
|
### 3. SignalClassifier
|
|
|
|
**Objetivo:** Generar señales de trading (BUY, SELL, HOLD).
|
|
|
|
```python
|
|
# app/models/signal_classifier.py
|
|
from xgboost import XGBClassifier
|
|
import numpy as np
|
|
from typing import Dict
|
|
|
|
class SignalClassifier:
|
|
"""
|
|
Multi-class classifier for trading signals.
|
|
Classes: 0=HOLD, 1=BUY, 2=SELL
|
|
"""
|
|
|
|
LABELS = {0: 'hold', 1: 'buy', 2: 'sell'}
|
|
LABEL_TO_ID = {'hold': 0, 'buy': 1, 'sell': 2}
|
|
|
|
def __init__(self, min_confidence: float = 0.6):
|
|
self.min_confidence = min_confidence
|
|
self.model = None
|
|
|
|
def get_params(self) -> dict:
|
|
"""XGBoost hyperparameters for multi-class"""
|
|
return {
|
|
'n_estimators': 400,
|
|
'max_depth': 6,
|
|
'learning_rate': 0.08,
|
|
'subsample': 0.85,
|
|
'colsample_bytree': 0.85,
|
|
'min_child_weight': 4,
|
|
'objective': 'multi:softprob',
|
|
'num_class': 3,
|
|
'eval_metric': 'mlogloss',
|
|
'tree_method': 'hist',
|
|
'random_state': 42
|
|
}
|
|
|
|
def fit(self, X: np.ndarray, y: np.ndarray):
|
|
"""Train multi-class classifier"""
|
|
params = self.get_params()
|
|
self.model = XGBClassifier(**params)
|
|
self.model.fit(X, y)
|
|
|
|
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
"""Predict signal class"""
|
|
return self.model.predict(X)
|
|
|
|
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
|
"""Predict probability for each class"""
|
|
return self.model.predict_proba(X)
|
|
|
|
def predict_signal(self, X: np.ndarray) -> Dict:
|
|
"""
|
|
Generate trading signal with confidence.
|
|
|
|
Returns signal only if confidence exceeds threshold,
|
|
otherwise returns HOLD.
|
|
"""
|
|
probas = self.predict_proba(X)[0]
|
|
|
|
max_proba = np.max(probas)
|
|
predicted_class = np.argmax(probas)
|
|
|
|
# If confidence is too low, return HOLD
|
|
if max_proba < self.min_confidence and predicted_class != 0:
|
|
return {
|
|
'signal': 'hold',
|
|
'signal_id': 0,
|
|
'confidence': float(probas[0]),
|
|
'probabilities': {
|
|
'hold': float(probas[0]),
|
|
'buy': float(probas[1]),
|
|
'sell': float(probas[2])
|
|
},
|
|
'reason': f'Low confidence ({max_proba:.2%} < {self.min_confidence:.2%})'
|
|
}
|
|
|
|
return {
|
|
'signal': self.LABELS[predicted_class],
|
|
'signal_id': int(predicted_class),
|
|
'confidence': float(max_proba),
|
|
'probabilities': {
|
|
'hold': float(probas[0]),
|
|
'buy': float(probas[1]),
|
|
'sell': float(probas[2])
|
|
},
|
|
'reason': None
|
|
}
|
|
|
|
def save(self, path: str):
|
|
"""Save model"""
|
|
self.model.save_model(f"{path}/signal_model.json")
|
|
|
|
def load(self, path: str):
|
|
"""Load model"""
|
|
self.model = XGBClassifier()
|
|
self.model.load_model(f"{path}/signal_model.json")
|
|
```
|
|
|
|
---
|
|
|
|
## Ensemble Manager
|
|
|
|
```python
|
|
# app/models/ensemble.py
|
|
from typing import Dict, Optional
|
|
from .range_predictor import RangePredictor
|
|
from .tpsl_classifier import TPSLClassifier
|
|
from .signal_classifier import SignalClassifier
|
|
|
|
class EnsembleManager:
|
|
"""
|
|
Manages all models and combines their predictions
|
|
for comprehensive trading signals.
|
|
"""
|
|
|
|
def __init__(self, model_path: str):
|
|
self.model_path = model_path
|
|
self.range_predictors: Dict[int, RangePredictor] = {}
|
|
self.tpsl_classifier: Optional[TPSLClassifier] = None
|
|
self.signal_classifier: Optional[SignalClassifier] = None
|
|
|
|
self.horizons = [6, 18, 36, 72] # 30min, 90min, 3h, 6h
|
|
|
|
async def load_all(self):
|
|
"""Load all models from disk"""
|
|
for horizon in self.horizons:
|
|
self.range_predictors[horizon] = RangePredictor(horizon)
|
|
self.range_predictors[horizon].load(
|
|
f"{self.model_path}/range_predictor/h{horizon}"
|
|
)
|
|
|
|
self.tpsl_classifier = TPSLClassifier()
|
|
self.tpsl_classifier.load(f"{self.model_path}/tpsl_classifier")
|
|
|
|
self.signal_classifier = SignalClassifier()
|
|
self.signal_classifier.load(f"{self.model_path}/signal_classifier")
|
|
|
|
def predict_complete(
|
|
self,
|
|
features: np.ndarray,
|
|
current_price: float,
|
|
horizon: int = 18
|
|
) -> Dict:
|
|
"""
|
|
Generate complete prediction combining all models.
|
|
|
|
Returns:
|
|
Comprehensive prediction with range, TP/SL, and signal
|
|
"""
|
|
# Range prediction
|
|
range_pred = self.range_predictors[horizon].predict_range(
|
|
features, current_price
|
|
)
|
|
|
|
# TP/SL classification
|
|
tpsl_pred = self.tpsl_classifier.predict_with_confidence(features)
|
|
|
|
# Signal generation
|
|
signal_pred = self.signal_classifier.predict_signal(features)
|
|
|
|
# Combine into final recommendation
|
|
return {
|
|
'timestamp': datetime.utcnow().isoformat(),
|
|
'symbol': 'BTCUSDT', # Passed from caller
|
|
'horizon': horizon,
|
|
'horizon_label': self._horizon_label(horizon),
|
|
|
|
'price_range': range_pred,
|
|
'tpsl': tpsl_pred,
|
|
'signal': signal_pred,
|
|
|
|
'recommendation': self._generate_recommendation(
|
|
range_pred, tpsl_pred, signal_pred
|
|
)
|
|
}
|
|
|
|
def _horizon_label(self, horizon: int) -> str:
|
|
labels = {
|
|
6: 'scalping',
|
|
18: 'intraday',
|
|
36: 'swing',
|
|
72: 'position'
|
|
}
|
|
return labels.get(horizon, 'custom')
|
|
|
|
def _generate_recommendation(
|
|
self,
|
|
range_pred: Dict,
|
|
tpsl_pred: Dict,
|
|
signal_pred: Dict
|
|
) -> Dict:
|
|
"""Generate actionable recommendation"""
|
|
|
|
signal = signal_pred['signal']
|
|
confidence = signal_pred['confidence']
|
|
|
|
if signal == 'hold':
|
|
return {
|
|
'action': 'HOLD',
|
|
'reason': 'No clear signal',
|
|
'risk_reward': None
|
|
}
|
|
|
|
# Calculate risk/reward based on range
|
|
if signal == 'buy':
|
|
reward = range_pred['delta_high_percent']
|
|
risk = abs(range_pred['delta_low_percent'])
|
|
else: # sell
|
|
reward = abs(range_pred['delta_low_percent'])
|
|
risk = range_pred['delta_high_percent']
|
|
|
|
rr_ratio = reward / risk if risk > 0 else 0
|
|
|
|
return {
|
|
'action': signal.upper(),
|
|
'confidence': f"{confidence:.1%}",
|
|
'expected_reward': f"{reward:.2f}%",
|
|
'expected_risk': f"{risk:.2f}%",
|
|
'risk_reward': f"1:{rr_ratio:.1f}",
|
|
'tpsl_prediction': tpsl_pred['label'],
|
|
'quality': 'high' if confidence > 0.75 and rr_ratio > 1.5 else 'medium'
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Métricas de Modelo
|
|
|
|
### Métricas de Evaluación
|
|
|
|
```python
|
|
# app/services/model_evaluator.py
|
|
from sklearn.metrics import (
|
|
mean_absolute_error,
|
|
mean_squared_error,
|
|
accuracy_score,
|
|
precision_recall_fscore_support,
|
|
roc_auc_score
|
|
)
|
|
import numpy as np
|
|
|
|
class ModelEvaluator:
|
|
"""Evaluate model performance"""
|
|
|
|
@staticmethod
|
|
def evaluate_range_predictor(y_true: np.ndarray, y_pred: np.ndarray) -> Dict:
|
|
"""Evaluate regression model"""
|
|
mae = mean_absolute_error(y_true, y_pred)
|
|
mse = mean_squared_error(y_true, y_pred)
|
|
rmse = np.sqrt(mse)
|
|
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
|
|
|
|
return {
|
|
'mae': float(mae),
|
|
'mse': float(mse),
|
|
'rmse': float(rmse),
|
|
'mape': float(mape)
|
|
}
|
|
|
|
@staticmethod
|
|
def evaluate_classifier(y_true: np.ndarray, y_pred: np.ndarray, y_proba: np.ndarray = None) -> Dict:
|
|
"""Evaluate classification model"""
|
|
accuracy = accuracy_score(y_true, y_pred)
|
|
precision, recall, f1, _ = precision_recall_fscore_support(
|
|
y_true, y_pred, average='weighted'
|
|
)
|
|
|
|
result = {
|
|
'accuracy': float(accuracy),
|
|
'precision': float(precision),
|
|
'recall': float(recall),
|
|
'f1_score': float(f1)
|
|
}
|
|
|
|
if y_proba is not None:
|
|
try:
|
|
auc = roc_auc_score(y_true, y_proba, multi_class='ovr')
|
|
result['auc'] = float(auc)
|
|
except:
|
|
pass
|
|
|
|
return result
|
|
```
|
|
|
|
---
|
|
|
|
## Hyperparameter Tuning
|
|
|
|
```python
|
|
# scripts/tune_hyperparameters.py
|
|
from optuna import create_study
|
|
from xgboost import XGBClassifier
|
|
from sklearn.model_selection import cross_val_score
|
|
import numpy as np
|
|
|
|
def objective(trial, X, y):
|
|
"""Optuna objective for hyperparameter tuning"""
|
|
|
|
params = {
|
|
'n_estimators': trial.suggest_int('n_estimators', 100, 500),
|
|
'max_depth': trial.suggest_int('max_depth', 3, 10),
|
|
'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
|
|
'subsample': trial.suggest_float('subsample', 0.6, 1.0),
|
|
'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
|
|
'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
|
|
'gamma': trial.suggest_float('gamma', 0, 1),
|
|
'reg_alpha': trial.suggest_float('reg_alpha', 0, 1),
|
|
'reg_lambda': trial.suggest_float('reg_lambda', 0.5, 2),
|
|
}
|
|
|
|
model = XGBClassifier(**params, random_state=42)
|
|
|
|
scores = cross_val_score(
|
|
model, X, y, cv=5, scoring='accuracy', n_jobs=-1
|
|
)
|
|
|
|
return np.mean(scores)
|
|
|
|
def tune_model(X, y, n_trials: int = 100):
|
|
"""Run hyperparameter optimization"""
|
|
study = create_study(direction='maximize')
|
|
study.optimize(
|
|
lambda trial: objective(trial, X, y),
|
|
n_trials=n_trials
|
|
)
|
|
|
|
return study.best_params
|
|
```
|
|
|
|
---
|
|
|
|
## Model Versioning
|
|
|
|
```python
|
|
# app/services/model_version.py
|
|
from pathlib import Path
|
|
import json
|
|
from datetime import datetime
|
|
|
|
class ModelVersion:
|
|
"""Manage model versions"""
|
|
|
|
def __init__(self, base_path: str):
|
|
self.base_path = Path(base_path)
|
|
|
|
def save_version(
|
|
self,
|
|
model_name: str,
|
|
metrics: Dict,
|
|
params: Dict
|
|
) -> str:
|
|
"""Save model version metadata"""
|
|
version_id = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
|
|
|
|
metadata = {
|
|
'version_id': version_id,
|
|
'model_name': model_name,
|
|
'created_at': datetime.utcnow().isoformat(),
|
|
'metrics': metrics,
|
|
'hyperparameters': params
|
|
}
|
|
|
|
version_path = self.base_path / model_name / version_id
|
|
version_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
with open(version_path / 'metadata.json', 'w') as f:
|
|
json.dump(metadata, f, indent=2)
|
|
|
|
return version_id
|
|
|
|
def get_latest_version(self, model_name: str) -> str:
|
|
"""Get latest model version"""
|
|
model_path = self.base_path / model_name
|
|
|
|
if not model_path.exists():
|
|
return None
|
|
|
|
versions = sorted(model_path.iterdir(), reverse=True)
|
|
return versions[0].name if versions else None
|
|
|
|
def get_version_metrics(self, model_name: str, version_id: str) -> Dict:
|
|
"""Get metrics for a specific version"""
|
|
metadata_path = self.base_path / model_name / version_id / 'metadata.json'
|
|
|
|
if not metadata_path.exists():
|
|
return None
|
|
|
|
with open(metadata_path) as f:
|
|
return json.load(f)
|
|
```
|
|
|
|
---
|
|
|
|
## Performance Targets
|
|
|
|
| Modelo | Métrica | Target | Actual |
|
|
|--------|---------|--------|--------|
|
|
| RangePredictor (High) | MAE | < 0.5% | 0.3% |
|
|
| RangePredictor (Low) | MAE | < 0.5% | 0.35% |
|
|
| TPSLClassifier | Accuracy | > 65% | 68% |
|
|
| TPSLClassifier | AUC | > 0.70 | 0.73 |
|
|
| SignalClassifier | Accuracy | > 60% | 65% |
|
|
| SignalClassifier | Precision (BUY) | > 65% | 67% |
|
|
| SignalClassifier | Precision (SELL) | > 65% | 64% |
|
|
|
|
---
|
|
|
|
## Referencias
|
|
|
|
- [ET-ML-001: Arquitectura](./ET-ML-001-arquitectura.md)
|
|
- [ET-ML-003: Feature Engineering](./ET-ML-003-features.md)
|
|
- [XGBoost Documentation](https://xgboost.readthedocs.io/)
|
|
|
|
---
|
|
|
|
**Autor:** Requirements-Analyst
|
|
**Fecha:** 2025-12-05
|