319 lines
8.0 KiB
YAML
319 lines
8.0 KiB
YAML
# =============================================================================
|
|
# MASTER_INVENTORY.yml - Local LLM Agent
|
|
# =============================================================================
|
|
# Sistema: SIMCO v4.3.0 + NEXUS v4.0
|
|
# Proposito: Inventario maestro del proyecto
|
|
# Fecha: 2026-01-24
|
|
# Version: 1.0.0
|
|
# =============================================================================
|
|
|
|
version: "1.0.0"
|
|
proyecto: "local-llm-agent"
|
|
nivel: "CONSUMER (L2)"
|
|
tipo: "STANDALONE"
|
|
subtipo: "INFRASTRUCTURE"
|
|
actualizado: "2026-01-24"
|
|
actualizado_por: "Claude Code (Opus 4.5)"
|
|
|
|
# =============================================================================
|
|
# RESUMEN EJECUTIVO
|
|
# =============================================================================
|
|
resumen:
|
|
descripcion: "Gateway de inferencia LLM local OpenAI-compatible"
|
|
estado: "production-ready"
|
|
version: "0.6.0"
|
|
prioridad: "P1"
|
|
completitud: "95%"
|
|
|
|
# =============================================================================
|
|
# METRICAS GLOBALES
|
|
# =============================================================================
|
|
metricas:
|
|
archivos_totales: 42
|
|
lineas_codigo: 3500
|
|
tests_totales: 98
|
|
test_coverage: "90%"
|
|
servicios: 2
|
|
endpoints: 9
|
|
mcp_tools: 4
|
|
|
|
# =============================================================================
|
|
# INVENTARIO POR CAPA
|
|
# =============================================================================
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# GATEWAY (NestJS)
|
|
# -----------------------------------------------------------------------------
|
|
gateway:
|
|
path: "apps/gateway"
|
|
framework: "NestJS 10.x"
|
|
lenguaje: "TypeScript"
|
|
runtime: "Node.js 20 LTS"
|
|
puerto: 3160
|
|
|
|
archivos:
|
|
total: 20
|
|
src: 15
|
|
tests: 5
|
|
|
|
modulos:
|
|
- nombre: "AppModule"
|
|
tipo: "root"
|
|
archivo: "src/app.module.ts"
|
|
|
|
- nombre: "ChatModule"
|
|
tipo: "feature"
|
|
archivo: "src/modules/chat/chat.module.ts"
|
|
endpoints:
|
|
- "POST /v1/chat/completions"
|
|
|
|
- nombre: "ModelsModule"
|
|
tipo: "feature"
|
|
archivo: "src/modules/models/models.module.ts"
|
|
endpoints:
|
|
- "GET /v1/models"
|
|
|
|
- nombre: "LoraModule"
|
|
tipo: "feature"
|
|
archivo: "src/modules/lora/lora.module.ts"
|
|
endpoints:
|
|
- "POST /v1/lora/*"
|
|
|
|
- nombre: "McpToolsModule"
|
|
tipo: "feature"
|
|
archivo: "src/modules/mcp-tools/mcp-tools.module.ts"
|
|
endpoints:
|
|
- "POST /mcp/tools/classify"
|
|
- "POST /mcp/tools/extract"
|
|
- "POST /mcp/tools/rewrite"
|
|
- "POST /mcp/tools/summarize"
|
|
|
|
- nombre: "HealthModule"
|
|
tipo: "infrastructure"
|
|
archivo: "src/modules/health/health.module.ts"
|
|
endpoints:
|
|
- "GET /health"
|
|
|
|
tests:
|
|
total: 54
|
|
unitarios: 40
|
|
integracion: 14
|
|
estado: "pasando"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# INFERENCE ENGINE (Python)
|
|
# -----------------------------------------------------------------------------
|
|
inference_engine:
|
|
path: "apps/inference-engine"
|
|
framework: "FastAPI"
|
|
lenguaje: "Python"
|
|
version: "3.11"
|
|
puerto: 3161
|
|
|
|
archivos:
|
|
total: 22
|
|
src: 16
|
|
tests: 6
|
|
|
|
modulos:
|
|
- nombre: "main"
|
|
tipo: "entrypoint"
|
|
archivo: "src/main.py"
|
|
|
|
- nombre: "chat_router"
|
|
tipo: "router"
|
|
archivo: "src/routers/chat.py"
|
|
endpoints:
|
|
- "POST /chat"
|
|
|
|
- nombre: "models_router"
|
|
tipo: "router"
|
|
archivo: "src/routers/models.py"
|
|
endpoints:
|
|
- "GET /models"
|
|
|
|
- nombre: "health_router"
|
|
tipo: "router"
|
|
archivo: "src/routers/health.py"
|
|
endpoints:
|
|
- "GET /health"
|
|
- "GET /metrics"
|
|
|
|
- nombre: "ollama_backend"
|
|
tipo: "backend"
|
|
archivo: "src/backends/ollama.py"
|
|
|
|
- nombre: "vllm_backend"
|
|
tipo: "backend"
|
|
archivo: "src/backends/vllm.py"
|
|
|
|
tests:
|
|
total: 44
|
|
unitarios: 30
|
|
integracion: 14
|
|
estado: "pasando"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# MCP TOOLS
|
|
# -----------------------------------------------------------------------------
|
|
mcp_tools:
|
|
total: 4
|
|
|
|
tools:
|
|
- nombre: "classify"
|
|
descripcion: "Clasificar texto en categorias"
|
|
tier: "small"
|
|
estado: "production-ready"
|
|
|
|
- nombre: "extract"
|
|
descripcion: "Extraer datos estructurados"
|
|
tier: "small"
|
|
estado: "production-ready"
|
|
|
|
- nombre: "rewrite"
|
|
descripcion: "Reescribir texto"
|
|
tier: "main"
|
|
estado: "production-ready"
|
|
|
|
- nombre: "summarize"
|
|
descripcion: "Resumir texto"
|
|
tier: "main"
|
|
estado: "production-ready"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# CONFIGURACION
|
|
# -----------------------------------------------------------------------------
|
|
configuracion:
|
|
path: "config"
|
|
|
|
archivos:
|
|
- nombre: "docker-compose.yml"
|
|
proposito: "Desarrollo con Ollama"
|
|
|
|
- nombre: "docker-compose.prod.yml"
|
|
proposito: "Produccion con vLLM"
|
|
|
|
- nombre: "prometheus.yml"
|
|
proposito: "Configuracion de metricas"
|
|
|
|
- nombre: "grafana/"
|
|
proposito: "Dashboards de Grafana"
|
|
|
|
env_files:
|
|
- ".env.example"
|
|
- ".env.development"
|
|
- ".env.production"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# DOCUMENTACION
|
|
# -----------------------------------------------------------------------------
|
|
documentacion:
|
|
path: "docs"
|
|
|
|
archivos:
|
|
- "README.md"
|
|
- "API.md"
|
|
- "DEPLOYMENT.md"
|
|
- "MCP-TOOLS.md"
|
|
|
|
# =============================================================================
|
|
# BACKENDS DE INFERENCIA
|
|
# =============================================================================
|
|
backends:
|
|
ollama:
|
|
tipo: "desarrollo"
|
|
puerto: 11434
|
|
host: "localhost"
|
|
modelo_default: "gpt-oss-20b"
|
|
estado: "activo"
|
|
|
|
vllm:
|
|
tipo: "produccion"
|
|
puerto: 8000
|
|
host: "wsl"
|
|
modelo_default: "gpt-oss-20b"
|
|
features:
|
|
- "Multi-LoRA"
|
|
- "Continuous Batching"
|
|
requiere: "GPU NVIDIA"
|
|
estado: "configurado"
|
|
|
|
# =============================================================================
|
|
# DEPENDENCIAS EXTERNAS
|
|
# =============================================================================
|
|
dependencias_externas:
|
|
runtime:
|
|
- nombre: "Ollama"
|
|
version: ">=0.1.0"
|
|
puerto: 11434
|
|
obligatorio: false
|
|
|
|
- nombre: "vLLM"
|
|
version: ">=0.4.0"
|
|
puerto: 8000
|
|
obligatorio: false
|
|
|
|
opcional:
|
|
- nombre: "PostgreSQL"
|
|
version: "16"
|
|
puerto: 5432
|
|
database: "local_llm_dev"
|
|
|
|
- nombre: "Redis"
|
|
puerto: 6379
|
|
db: 9
|
|
|
|
- nombre: "Prometheus"
|
|
puerto: 9090
|
|
|
|
- nombre: "Grafana"
|
|
puerto: 3000
|
|
|
|
gpu:
|
|
- nombre: "NVIDIA CUDA"
|
|
version: ">=12.6"
|
|
|
|
- nombre: "NVIDIA Container Toolkit"
|
|
|
|
# =============================================================================
|
|
# ESTADO DE FASES
|
|
# =============================================================================
|
|
fases:
|
|
fase_1:
|
|
nombre: "MVP - Gateway + Ollama"
|
|
estado: "completado"
|
|
completitud: "100%"
|
|
|
|
fase_2:
|
|
nombre: "MCP Tools + Rate Limiting"
|
|
estado: "completado"
|
|
completitud: "100%"
|
|
|
|
fase_3:
|
|
nombre: "Production - vLLM + Multi-LoRA"
|
|
estado: "completado"
|
|
completitud: "100%"
|
|
|
|
# =============================================================================
|
|
# REGISTRO EN WORKSPACE
|
|
# =============================================================================
|
|
registro_workspace:
|
|
devenv_ports:
|
|
archivo: "orchestration/inventarios/DEVENV-PORTS-INVENTORY.yml"
|
|
entrada: "local-llm-agent"
|
|
puertos: [3160, 3161]
|
|
|
|
dependency_graph:
|
|
archivo: "orchestration/DEPENDENCY-GRAPH.yml"
|
|
nodo: "infra:local-llm-agent"
|
|
|
|
# =============================================================================
|
|
# NOTAS
|
|
# =============================================================================
|
|
notas:
|
|
- "Proyecto STANDALONE de infraestructura"
|
|
- "Sirve a todos los proyectos del workspace via API"
|
|
- "No forma parte de la jerarquia ERP"
|
|
- "Phase 3 complete - Production ready"
|
|
- "GPU setup requiere WSL con NVIDIA drivers"
|