trading-platform-ml-engine/config/models.yaml

# Model Configuration

# XGBoost Settings
xgboost:
  base:
    n_estimators: 200
    max_depth: 5
    learning_rate: 0.05
    subsample: 0.8
    colsample_bytree: 0.8
    gamma: 0.1
    reg_alpha: 0.1
    reg_lambda: 1.0
    min_child_weight: 3
    tree_method: "hist"
    device: "cuda"
    random_state: 42

  hyperparameter_search:
    n_estimators: [100, 200, 300, 500]
    max_depth: [3, 5, 7]
    learning_rate: [0.01, 0.05, 0.1]
    subsample: [0.7, 0.8, 0.9]
    colsample_bytree: [0.7, 0.8, 0.9]

  gpu:
    max_bin: 512
    predictor: "gpu_predictor"

# GRU Settings
gru:
  architecture:
    hidden_size: 128
    num_layers: 2
    dropout: 0.2
    recurrent_dropout: 0.1
    use_attention: true
    attention_heads: 8
    attention_units: 128

  training:
    epochs: 100
    batch_size: 256
    learning_rate: 0.001
    optimizer: "adamw"
    loss: "mse"
    early_stopping_patience: 15
    reduce_lr_patience: 5
    reduce_lr_factor: 0.5
    min_lr: 1.0e-7
    gradient_clip: 1.0

  sequence:
    length: 32
    step: 1

  mixed_precision:
    enabled: true
    dtype: "bfloat16"

# Transformer Settings
transformer:
  architecture:
    d_model: 512
    nhead: 8
    num_encoder_layers: 4
    num_decoder_layers: 2
    dim_feedforward: 2048
    dropout: 0.1
    use_flash_attention: true

  training:
    epochs: 100
    batch_size: 512
    learning_rate: 0.0001
    warmup_steps: 4000
    gradient_accumulation_steps: 2

  sequence:
    max_length: 128

# Meta-Model Settings
meta_model:
  type: "xgboost"  # Options: xgboost, linear, ridge, neural

  xgboost:
    n_estimators: 100
    max_depth: 3
    learning_rate: 0.1
    subsample: 0.8
    colsample_bytree: 0.8

  neural:
    hidden_layers: [64, 32]
    activation: "relu"
    dropout: 0.2

  features:
    use_original: true
    use_statistics: true
    max_original_features: 10

  levels:
    use_level_2: true
    use_level_3: true  # Meta-metamodel

# AMD Strategy Models
amd:
  accumulation:
    focus_features: ["volume", "obv", "support_levels", "rsi"]
    model_type: "lstm"
    hidden_size: 64

  manipulation:
    focus_features: ["volatility", "volume_spikes", "false_breakouts"]
    model_type: "gru"
    hidden_size: 128

  distribution:
    focus_features: ["momentum", "divergences", "resistance_levels"]
    model_type: "transformer"
    d_model: 256

# Output Configuration
output:
  horizons:
    - name: "scalping"
      id: 0
      range: [1, 6]  # 5-30 minutes
    - name: "intraday"
      id: 1
      range: [7, 18]  # 35-90 minutes
    - name: "swing"
      id: 2
      range: [19, 36]  # 95-180 minutes
    - name: "position"
      id: 3
      range: [37, 72]  # 3-6 hours

  targets:
    - "high"
    - "low"
    - "close"
    - "direction"