local-llm-agent/orchestration/PROJECT-PROFILE.yml

# ===============================================================================
# PROJECT-PROFILE.yml - Local LLM Agent
# ===============================================================================
#
# Sistema: SIMCO v4.3.0 + NEXUS v4.0
# Proposito: Perfil y metadata del proyecto
# Fecha: 2026-01-24
#
# ===============================================================================

proyecto:
  nombre: "Local LLM Agent"
  codigo: "local-llm-agent"
  alias: "LLM"
  tipo: "STANDALONE"
  subtipo: "INFRASTRUCTURE"
  nivel: "2A"
  version: "0.6.0"

descripcion: |
  Gateway de LLM local para el workspace-v2.
  Permite a los agentes (Claude Code, Trae, Gemini) delegar tareas
  simples para ahorrar contexto y tokens.

  Caracteristicas principales:
  - API Gateway OpenAI-compatible
  - MCP Tools (classify, extract, rewrite, summarize)
  - Multi-backend (Ollama CPU, vLLM GPU)
  - Multi-LoRA adapters por proyecto
  - Monitoring con Prometheus + Grafana
  - Rate limiting por tier

# -----------------------------------------------------------------------------
# STACK TECNOLOGICO
# -----------------------------------------------------------------------------

stack:
  gateway:
    framework: "NestJS"
    version: "10.x"
    lenguaje: "TypeScript"
    runtime: "Node.js 20 LTS"

  inference:
    framework: "FastAPI"
    lenguaje: "Python"
    version: "3.11"

  backends:
    desarrollo:
      nombre: "Ollama"
      tipo: "CPU"
      puerto: 11434
    produccion:
      nombre: "vLLM"
      tipo: "GPU"
      puerto: 8000
      features:
        - "Multi-LoRA"
        - "Continuous Batching"

  monitoring:
    prometheus:
      puerto: 9090
    grafana:
      puerto: 3000

  database:
    motor: "PostgreSQL"
    version: "16"
    nombre: "local_llm_dev"
    obligatorio: false

  cache:
    motor: "Redis"
    db: 9
    obligatorio: false

# -----------------------------------------------------------------------------
# SERVICIOS
# -----------------------------------------------------------------------------

servicios:
  - id: "LLM-SVC-001"
    nombre: "Gateway API"
    puerto: 3160
    path: "apps/gateway"
    estado: "production-ready"
    endpoints:
      - "/v1/chat/completions"
      - "/v1/models"
      - "/v1/lora/*"
      - "/mcp/tools/*"
      - "/health"

  - id: "LLM-SVC-002"
    nombre: "Inference Engine"
    puerto: 3161
    path: "apps/inference-engine"
    estado: "production-ready"
    endpoints:
      - "/chat"
      - "/models"
      - "/health"
      - "/metrics"

# -----------------------------------------------------------------------------
# FASES DE DESARROLLO
# -----------------------------------------------------------------------------

fases:
  fase_1_mvp:
    nombre: "MVP - Gateway + Ollama"
    estado: "completado"
    completitud: "100%"
    entregables:
      - "Gateway NestJS"
      - "Inference Engine Python"
      - "Ollama backend"
      - "Docker setup"

  fase_2_mcp_tools:
    nombre: "MCP Tools + Rate Limiting"
    estado: "completado"
    completitud: "100%"
    entregables:
      - "MCP Tools (4 herramientas)"
      - "Tier Classification"
      - "Rate Limiting"
      - "98 tests pasando"

  fase_3_produccion:
    nombre: "Production - vLLM + Multi-LoRA"
    estado: "completado"
    completitud: "100%"
    entregables:
      - "vLLM backend GPU"
      - "Multi-LoRA adapters"
      - "Prometheus metrics"
      - "Grafana dashboard"
      - "Production docker-compose"

# -----------------------------------------------------------------------------
# MCP TOOLS
# -----------------------------------------------------------------------------

mcp_tools:
  - nombre: "classify"
    descripcion: "Clasificar texto en categorias"
    tier: "small"

  - nombre: "extract"
    descripcion: "Extraer datos estructurados"
    tier: "small"

  - nombre: "rewrite"
    descripcion: "Reescribir texto"
    tier: "main"

  - nombre: "summarize"
    descripcion: "Resumir texto"
    tier: "main"

# -----------------------------------------------------------------------------
# DEPENDENCIAS
# -----------------------------------------------------------------------------

dependencias:
  runtime:
    - nombre: "Ollama"
      tipo: "external"
      obligatorio: false
      nota: "Backend CPU para desarrollo"

    - nombre: "vLLM"
      tipo: "external"
      obligatorio: false
      nota: "Backend GPU para produccion"

  opcional:
    - nombre: "Redis"
      tipo: "cache"
      puerto: 6379

    - nombre: "PostgreSQL"
      tipo: "database"
      puerto: 5432

    - nombre: "Prometheus"
      tipo: "monitoring"
      puerto: 9090

    - nombre: "Grafana"
      tipo: "dashboard"
      puerto: 3000

  gpu:
    - nombre: "NVIDIA CUDA"
      version: ">=12.6"
      obligatorio: false
      nota: "Solo para vLLM"

# -----------------------------------------------------------------------------
# ESTADO
# -----------------------------------------------------------------------------

estado:
  general: "production-ready"
  madurez: "stable"
  completitud: "95%"
  en_produccion: false
  prioridad: "P1"

# -----------------------------------------------------------------------------
# METRICAS
# -----------------------------------------------------------------------------

metricas:
  archivos_totales: 42
  lineas_codigo: 3500
  test_coverage: "90%"
  tests_gateway: 54
  tests_inference: 44

# -----------------------------------------------------------------------------
# RUTAS
# -----------------------------------------------------------------------------

rutas:
  proyecto: "projects/local-llm-agent"
  gateway: "projects/local-llm-agent/apps/gateway"
  inference: "projects/local-llm-agent/apps/inference-engine"
  config: "projects/local-llm-agent/config"
  docs: "projects/local-llm-agent/docs"
  orchestration: "projects/local-llm-agent/orchestration"

# -----------------------------------------------------------------------------
# EQUIPO
# -----------------------------------------------------------------------------

equipo:
  owner: "ISEM Development"
  agentes_principales:
    - "Claude Code"
    - "Trae"

# -----------------------------------------------------------------------------
# NOTAS
# -----------------------------------------------------------------------------

notas:
  - "Proyecto STANDALONE de infraestructura"
  - "Sirve a todos los proyectos del workspace via API"
  - "Prioridad P1 segun ROADMAP"
  - "Phase 3 complete - Production ready"
  - "GPU setup requiere WSL con NVIDIA drivers"