local-llm-agent/orchestration/inventarios/MASTER_INVENTORY.yml

# =============================================================================
# MASTER_INVENTORY.yml - Local LLM Agent
# =============================================================================
# Sistema: SIMCO v4.3.0 + NEXUS v4.0
# Proposito: Inventario maestro del proyecto
# Fecha: 2026-01-24
# Version: 1.0.0
# =============================================================================

version: "1.0.0"
proyecto: "local-llm-agent"
nivel: "CONSUMER (L2)"
tipo: "STANDALONE"
subtipo: "INFRASTRUCTURE"
actualizado: "2026-01-24"
actualizado_por: "Claude Code (Opus 4.5)"

# =============================================================================
# RESUMEN EJECUTIVO
# =============================================================================
resumen:
  descripcion: "Gateway de inferencia LLM local OpenAI-compatible"
  estado: "production-ready"
  version: "0.6.0"
  prioridad: "P1"
  completitud: "95%"

# =============================================================================
# METRICAS GLOBALES
# =============================================================================
metricas:
  archivos_totales: 42
  lineas_codigo: 3500
  tests_totales: 98
  test_coverage: "90%"
  servicios: 2
  endpoints: 9
  mcp_tools: 4

# =============================================================================
# INVENTARIO POR CAPA
# =============================================================================

# -----------------------------------------------------------------------------
# GATEWAY (NestJS)
# -----------------------------------------------------------------------------
gateway:
  path: "apps/gateway"
  framework: "NestJS 10.x"
  lenguaje: "TypeScript"
  runtime: "Node.js 20 LTS"
  puerto: 3160

  archivos:
    total: 20
    src: 15
    tests: 5

  modulos:
    - nombre: "AppModule"
      tipo: "root"
      archivo: "src/app.module.ts"

    - nombre: "ChatModule"
      tipo: "feature"
      archivo: "src/modules/chat/chat.module.ts"
      endpoints:
        - "POST /v1/chat/completions"

    - nombre: "ModelsModule"
      tipo: "feature"
      archivo: "src/modules/models/models.module.ts"
      endpoints:
        - "GET /v1/models"

    - nombre: "LoraModule"
      tipo: "feature"
      archivo: "src/modules/lora/lora.module.ts"
      endpoints:
        - "POST /v1/lora/*"

    - nombre: "McpToolsModule"
      tipo: "feature"
      archivo: "src/modules/mcp-tools/mcp-tools.module.ts"
      endpoints:
        - "POST /mcp/tools/classify"
        - "POST /mcp/tools/extract"
        - "POST /mcp/tools/rewrite"
        - "POST /mcp/tools/summarize"

    - nombre: "HealthModule"
      tipo: "infrastructure"
      archivo: "src/modules/health/health.module.ts"
      endpoints:
        - "GET /health"

  tests:
    total: 54
    unitarios: 40
    integracion: 14
    estado: "pasando"

# -----------------------------------------------------------------------------
# INFERENCE ENGINE (Python)
# -----------------------------------------------------------------------------
inference_engine:
  path: "apps/inference-engine"
  framework: "FastAPI"
  lenguaje: "Python"
  version: "3.11"
  puerto: 3161

  archivos:
    total: 22
    src: 16
    tests: 6

  modulos:
    - nombre: "main"
      tipo: "entrypoint"
      archivo: "src/main.py"

    - nombre: "chat_router"
      tipo: "router"
      archivo: "src/routers/chat.py"
      endpoints:
        - "POST /chat"

    - nombre: "models_router"
      tipo: "router"
      archivo: "src/routers/models.py"
      endpoints:
        - "GET /models"

    - nombre: "health_router"
      tipo: "router"
      archivo: "src/routers/health.py"
      endpoints:
        - "GET /health"
        - "GET /metrics"

    - nombre: "ollama_backend"
      tipo: "backend"
      archivo: "src/backends/ollama.py"

    - nombre: "vllm_backend"
      tipo: "backend"
      archivo: "src/backends/vllm.py"

  tests:
    total: 44
    unitarios: 30
    integracion: 14
    estado: "pasando"

# -----------------------------------------------------------------------------
# MCP TOOLS
# -----------------------------------------------------------------------------
mcp_tools:
  total: 4

  tools:
    - nombre: "classify"
      descripcion: "Clasificar texto en categorias"
      tier: "small"
      estado: "production-ready"

    - nombre: "extract"
      descripcion: "Extraer datos estructurados"
      tier: "small"
      estado: "production-ready"

    - nombre: "rewrite"
      descripcion: "Reescribir texto"
      tier: "main"
      estado: "production-ready"

    - nombre: "summarize"
      descripcion: "Resumir texto"
      tier: "main"
      estado: "production-ready"

# -----------------------------------------------------------------------------
# CONFIGURACION
# -----------------------------------------------------------------------------
configuracion:
  path: "config"

  archivos:
    - nombre: "docker-compose.yml"
      proposito: "Desarrollo con Ollama"

    - nombre: "docker-compose.prod.yml"
      proposito: "Produccion con vLLM"

    - nombre: "prometheus.yml"
      proposito: "Configuracion de metricas"

    - nombre: "grafana/"
      proposito: "Dashboards de Grafana"

  env_files:
    - ".env.example"
    - ".env.development"
    - ".env.production"

# -----------------------------------------------------------------------------
# DOCUMENTACION
# -----------------------------------------------------------------------------
documentacion:
  path: "docs"

  archivos:
    - "README.md"
    - "API.md"
    - "DEPLOYMENT.md"
    - "MCP-TOOLS.md"

# =============================================================================
# BACKENDS DE INFERENCIA
# =============================================================================
backends:
  ollama:
    tipo: "desarrollo"
    puerto: 11434
    host: "localhost"
    modelo_default: "gpt-oss-20b"
    estado: "activo"

  vllm:
    tipo: "produccion"
    puerto: 8000
    host: "wsl"
    modelo_default: "gpt-oss-20b"
    features:
      - "Multi-LoRA"
      - "Continuous Batching"
    requiere: "GPU NVIDIA"
    estado: "configurado"

# =============================================================================
# DEPENDENCIAS EXTERNAS
# =============================================================================
dependencias_externas:
  runtime:
    - nombre: "Ollama"
      version: ">=0.1.0"
      puerto: 11434
      obligatorio: false

    - nombre: "vLLM"
      version: ">=0.4.0"
      puerto: 8000
      obligatorio: false

  opcional:
    - nombre: "PostgreSQL"
      version: "16"
      puerto: 5432
      database: "local_llm_dev"

    - nombre: "Redis"
      puerto: 6379
      db: 9

    - nombre: "Prometheus"
      puerto: 9090

    - nombre: "Grafana"
      puerto: 3000

  gpu:
    - nombre: "NVIDIA CUDA"
      version: ">=12.6"

    - nombre: "NVIDIA Container Toolkit"

# =============================================================================
# ESTADO DE FASES
# =============================================================================
fases:
  fase_1:
    nombre: "MVP - Gateway + Ollama"
    estado: "completado"
    completitud: "100%"

  fase_2:
    nombre: "MCP Tools + Rate Limiting"
    estado: "completado"
    completitud: "100%"

  fase_3:
    nombre: "Production - vLLM + Multi-LoRA"
    estado: "completado"
    completitud: "100%"

# =============================================================================
# REGISTRO EN WORKSPACE
# =============================================================================
registro_workspace:
  devenv_ports:
    archivo: "orchestration/inventarios/DEVENV-PORTS-INVENTORY.yml"
    entrada: "local-llm-agent"
    puertos: [3160, 3161]

  dependency_graph:
    archivo: "orchestration/DEPENDENCY-GRAPH.yml"
    nodo: "infra:local-llm-agent"

# =============================================================================
# NOTAS
# =============================================================================
notas:
  - "Proyecto STANDALONE de infraestructura"
  - "Sirve a todos los proyectos del workspace via API"
  - "No forma parte de la jerarquia ERP"
  - "Phase 3 complete - Production ready"
  - "GPU setup requiere WSL con NVIDIA drivers"