local-llm-agent/orchestration/_archive/environment/ENVIRONMENT-INVENTORY.yml

# =============================================================================
# ENVIRONMENT-INVENTORY.yml - Local LLM Agent
# =============================================================================
# Inventario de entorno de desarrollo
# Gestionado por: DevEnv Agent / Claude Code
# Fecha: 2026-01-20
# Version: 1.0.0
# =============================================================================

version: "1.0.0"
project_name: "local-llm-agent"
updated: "2026-01-20"
created_by: "Claude Code (Opus 4.5)"

# =============================================================================
# METADATA DEL PROYECTO
# =============================================================================
metadata:
  nivel: "INFRASTRUCTURE"
  tipo: "standalone"
  estado: "desarrollo"
  prioridad: "P1"
  descripcion: "Gateway LLM local OpenAI-compatible"

# =============================================================================
# HERRAMIENTAS DE DESARROLLO
# =============================================================================
tools:
  runtime:
    node:
      version: "20.x"
      uso: "Gateway API (NestJS)"
    python:
      version: "3.11+"
      uso: "Inference Engine (FastAPI)"

  frameworks:
    nestjs:
      version: "10.x"
      uso: "API Gateway"
    fastapi:
      version: "0.104+"
      uso: "Inference Engine"

  build:
    typescript:
      version: "5.x"
      uso: "Gateway"
    pip:
      version: "latest"
      uso: "Python dependencies"

  contenedores:
    docker:
      version: "latest"
      compose_version: "3.8"

# =============================================================================
# SERVICIOS
# =============================================================================
services:
  gateway:
    nombre: "API Gateway"
    framework: "NestJS"
    puerto: 3160
    path: "apps/gateway"
    health_endpoint: "/health"
    descripcion: "API OpenAI-compatible + MCP Tools"

  inference_engine:
    nombre: "Inference Engine"
    framework: "FastAPI"
    puerto: 3161
    path: "apps/inference-engine"
    health_endpoint: "/health"
    descripcion: "Motor de inferencia Python"

# =============================================================================
# BACKENDS DE INFERENCIA
# =============================================================================
inference_backends:
  ollama:
    tipo: "mvp"
    puerto: 11434
    host: "localhost"
    modelo_default: "gpt-oss-20b"
    estado: "activo"

  vllm:
    tipo: "produccion"
    puerto: 8000
    host: "wsl"
    modelo_default: "gpt-oss-20b"
    estado: "placeholder"

# =============================================================================
# BASES DE DATOS (INSTANCIA COMPARTIDA DEL WORKSPACE)
# =============================================================================
databases:
  postgresql:
    arquitectura: "instancia_unica_compartida"
    host: "localhost"
    port: 5432
    database: "local_llm_dev"
    user: "local_llm_dev"
    uso: "Metricas y cache (opcional)"
    nota: |
      PostgreSQL es una instancia UNICA compartida por todos los proyectos.
      La separacion es por nombre de base de datos y usuario.
      NO crear instancias adicionales.

  redis:
    arquitectura: "instancia_unica_compartida"
    host: "localhost"
    port: 6379
    db: 9
    uso: "Cache de sesiones (opcional)"
    nota: |
      Redis es una instancia UNICA compartida por todos los proyectos.
      La separacion es por numero de database (0-15).
      NO crear instancias adicionales.

# =============================================================================
# MODELO LLM
# =============================================================================
modelo:
  nombre: "gpt-oss-20b"
  quantizacion: "Q4_K_M"
  vram_mb: 14000
  context_length: 16384
  ubicacion: "models/base/"

# =============================================================================
# TIERS DE INFERENCIA
# =============================================================================
tiers:
  small:
    max_tokens: 512
    max_context: 4096
    latencia_target_ms: 500
    uso: "Clasificacion, extraccion simple"

  main:
    max_tokens: 2048
    max_context: 16384
    latencia_target_ms: 2000
    uso: "Tareas complejas"

# =============================================================================
# VARIABLES DE ENTORNO REQUERIDAS
# =============================================================================
env_variables:
  gateway:
    - nombre: "GATEWAY_PORT"
      valor: "3160"
      requerido: true
    - nombre: "NODE_ENV"
      valor: "development"
      requerido: true
    - nombre: "INFERENCE_HOST"
      valor: "localhost"
      requerido: true
    - nombre: "INFERENCE_PORT"
      valor: "3161"
      requerido: true

  inference_engine:
    - nombre: "INFERENCE_PORT"
      valor: "3161"
      requerido: true
    - nombre: "INFERENCE_BACKEND"
      valor: "ollama"
      requerido: true
    - nombre: "OLLAMA_HOST"
      valor: "http://localhost:11434"
      requerido: true
    - nombre: "MODEL_NAME"
      valor: "gpt-oss-20b"
      requerido: true

# =============================================================================
# DEPENDENCIAS EXTERNAS
# =============================================================================
dependencias_externas:
  - nombre: "Ollama"
    tipo: "inferencia"
    puerto: 11434
    obligatorio: true
    instalacion: "https://ollama.ai"

  - nombre: "PostgreSQL"
    tipo: "database"
    puerto: 5432
    obligatorio: false
    nota: "Instancia compartida del workspace"

  - nombre: "Redis"
    tipo: "cache"
    puerto: 6379
    obligatorio: false
    nota: "Instancia compartida del workspace, DB 9"

# =============================================================================
# INSTRUCCIONES DE SETUP
# =============================================================================
setup:
  prerequisitos:
    - "Node.js 20.x instalado"
    - "Python 3.11+ instalado"
    - "Ollama instalado y corriendo en puerto 11434"
    - "Modelo gpt-oss-20b descargado en Ollama"

  pasos:
    - descripcion: "Instalar dependencias Gateway"
      comando: "cd apps/gateway && npm install"

    - descripcion: "Instalar dependencias Inference Engine"
      comando: "cd apps/inference-engine && pip install -r requirements.txt"

    - descripcion: "Copiar variables de entorno"
      comando: "cp .env.example .env"

    - descripcion: "Iniciar Gateway"
      comando: "cd apps/gateway && npm run start:dev"

    - descripcion: "Iniciar Inference Engine"
      comando: "cd apps/inference-engine && uvicorn src.main:app --reload --port 3161"

  verificacion:
    - descripcion: "Health check Gateway"
      comando: "curl http://localhost:3160/health"

    - descripcion: "Health check Inference Engine"
      comando: "curl http://localhost:3161/health"

# =============================================================================
# REGISTRO EN WORKSPACE
# =============================================================================
registro_workspace:
  devenv_ports_inventory:
    archivo: "orchestration/inventarios/DEVENV-PORTS-INVENTORY.yml"
    version: "3.7.0"
    entrada: "local-llm-agent"

  devenv_master_inventory:
    archivo: "orchestration/inventarios/DEVENV-MASTER-INVENTORY.yml"
    entrada: "local-llm-agent"

  dependency_graph:
    archivo: "orchestration/DEPENDENCY-GRAPH.yml"
    nodo: "infra:local-llm-agent"