252 lines
7.2 KiB
YAML
252 lines
7.2 KiB
YAML
# =============================================================================
|
|
# ENVIRONMENT-INVENTORY.yml - Local LLM Agent
|
|
# =============================================================================
|
|
# Inventario de entorno de desarrollo
|
|
# Gestionado por: DevEnv Agent / Claude Code
|
|
# Fecha: 2026-01-20
|
|
# Version: 1.0.0
|
|
# =============================================================================
|
|
|
|
version: "1.0.0"
|
|
project_name: "local-llm-agent"
|
|
updated: "2026-01-20"
|
|
created_by: "Claude Code (Opus 4.5)"
|
|
|
|
# =============================================================================
|
|
# METADATA DEL PROYECTO
|
|
# =============================================================================
|
|
metadata:
|
|
nivel: "INFRASTRUCTURE"
|
|
tipo: "standalone"
|
|
estado: "desarrollo"
|
|
prioridad: "P1"
|
|
descripcion: "Gateway LLM local OpenAI-compatible"
|
|
|
|
# =============================================================================
|
|
# HERRAMIENTAS DE DESARROLLO
|
|
# =============================================================================
|
|
tools:
|
|
runtime:
|
|
node:
|
|
version: "20.x"
|
|
uso: "Gateway API (NestJS)"
|
|
python:
|
|
version: "3.11+"
|
|
uso: "Inference Engine (FastAPI)"
|
|
|
|
frameworks:
|
|
nestjs:
|
|
version: "10.x"
|
|
uso: "API Gateway"
|
|
fastapi:
|
|
version: "0.104+"
|
|
uso: "Inference Engine"
|
|
|
|
build:
|
|
typescript:
|
|
version: "5.x"
|
|
uso: "Gateway"
|
|
pip:
|
|
version: "latest"
|
|
uso: "Python dependencies"
|
|
|
|
contenedores:
|
|
docker:
|
|
version: "latest"
|
|
compose_version: "3.8"
|
|
|
|
# =============================================================================
|
|
# SERVICIOS
|
|
# =============================================================================
|
|
services:
|
|
gateway:
|
|
nombre: "API Gateway"
|
|
framework: "NestJS"
|
|
puerto: 3160
|
|
path: "apps/gateway"
|
|
health_endpoint: "/health"
|
|
descripcion: "API OpenAI-compatible + MCP Tools"
|
|
|
|
inference_engine:
|
|
nombre: "Inference Engine"
|
|
framework: "FastAPI"
|
|
puerto: 3161
|
|
path: "apps/inference-engine"
|
|
health_endpoint: "/health"
|
|
descripcion: "Motor de inferencia Python"
|
|
|
|
# =============================================================================
|
|
# BACKENDS DE INFERENCIA
|
|
# =============================================================================
|
|
inference_backends:
|
|
ollama:
|
|
tipo: "mvp"
|
|
puerto: 11434
|
|
host: "localhost"
|
|
modelo_default: "gpt-oss-20b"
|
|
estado: "activo"
|
|
|
|
vllm:
|
|
tipo: "produccion"
|
|
puerto: 8000
|
|
host: "wsl"
|
|
modelo_default: "gpt-oss-20b"
|
|
estado: "placeholder"
|
|
|
|
# =============================================================================
|
|
# BASES DE DATOS (INSTANCIA COMPARTIDA DEL WORKSPACE)
|
|
# =============================================================================
|
|
databases:
|
|
postgresql:
|
|
arquitectura: "instancia_unica_compartida"
|
|
host: "localhost"
|
|
port: 5432
|
|
database: "local_llm_dev"
|
|
user: "local_llm_dev"
|
|
uso: "Metricas y cache (opcional)"
|
|
nota: |
|
|
PostgreSQL es una instancia UNICA compartida por todos los proyectos.
|
|
La separacion es por nombre de base de datos y usuario.
|
|
NO crear instancias adicionales.
|
|
|
|
redis:
|
|
arquitectura: "instancia_unica_compartida"
|
|
host: "localhost"
|
|
port: 6379
|
|
db: 9
|
|
uso: "Cache de sesiones (opcional)"
|
|
nota: |
|
|
Redis es una instancia UNICA compartida por todos los proyectos.
|
|
La separacion es por numero de database (0-15).
|
|
NO crear instancias adicionales.
|
|
|
|
# =============================================================================
|
|
# MODELO LLM
|
|
# =============================================================================
|
|
modelo:
|
|
nombre: "gpt-oss-20b"
|
|
quantizacion: "Q4_K_M"
|
|
vram_mb: 14000
|
|
context_length: 16384
|
|
ubicacion: "models/base/"
|
|
|
|
# =============================================================================
|
|
# TIERS DE INFERENCIA
|
|
# =============================================================================
|
|
tiers:
|
|
small:
|
|
max_tokens: 512
|
|
max_context: 4096
|
|
latencia_target_ms: 500
|
|
uso: "Clasificacion, extraccion simple"
|
|
|
|
main:
|
|
max_tokens: 2048
|
|
max_context: 16384
|
|
latencia_target_ms: 2000
|
|
uso: "Tareas complejas"
|
|
|
|
# =============================================================================
|
|
# VARIABLES DE ENTORNO REQUERIDAS
|
|
# =============================================================================
|
|
env_variables:
|
|
gateway:
|
|
- nombre: "GATEWAY_PORT"
|
|
valor: "3160"
|
|
requerido: true
|
|
- nombre: "NODE_ENV"
|
|
valor: "development"
|
|
requerido: true
|
|
- nombre: "INFERENCE_HOST"
|
|
valor: "localhost"
|
|
requerido: true
|
|
- nombre: "INFERENCE_PORT"
|
|
valor: "3161"
|
|
requerido: true
|
|
|
|
inference_engine:
|
|
- nombre: "INFERENCE_PORT"
|
|
valor: "3161"
|
|
requerido: true
|
|
- nombre: "INFERENCE_BACKEND"
|
|
valor: "ollama"
|
|
requerido: true
|
|
- nombre: "OLLAMA_HOST"
|
|
valor: "http://localhost:11434"
|
|
requerido: true
|
|
- nombre: "MODEL_NAME"
|
|
valor: "gpt-oss-20b"
|
|
requerido: true
|
|
|
|
# =============================================================================
|
|
# DEPENDENCIAS EXTERNAS
|
|
# =============================================================================
|
|
dependencias_externas:
|
|
- nombre: "Ollama"
|
|
tipo: "inferencia"
|
|
puerto: 11434
|
|
obligatorio: true
|
|
instalacion: "https://ollama.ai"
|
|
|
|
- nombre: "PostgreSQL"
|
|
tipo: "database"
|
|
puerto: 5432
|
|
obligatorio: false
|
|
nota: "Instancia compartida del workspace"
|
|
|
|
- nombre: "Redis"
|
|
tipo: "cache"
|
|
puerto: 6379
|
|
obligatorio: false
|
|
nota: "Instancia compartida del workspace, DB 9"
|
|
|
|
# =============================================================================
|
|
# INSTRUCCIONES DE SETUP
|
|
# =============================================================================
|
|
setup:
|
|
prerequisitos:
|
|
- "Node.js 20.x instalado"
|
|
- "Python 3.11+ instalado"
|
|
- "Ollama instalado y corriendo en puerto 11434"
|
|
- "Modelo gpt-oss-20b descargado en Ollama"
|
|
|
|
pasos:
|
|
- descripcion: "Instalar dependencias Gateway"
|
|
comando: "cd apps/gateway && npm install"
|
|
|
|
- descripcion: "Instalar dependencias Inference Engine"
|
|
comando: "cd apps/inference-engine && pip install -r requirements.txt"
|
|
|
|
- descripcion: "Copiar variables de entorno"
|
|
comando: "cp .env.example .env"
|
|
|
|
- descripcion: "Iniciar Gateway"
|
|
comando: "cd apps/gateway && npm run start:dev"
|
|
|
|
- descripcion: "Iniciar Inference Engine"
|
|
comando: "cd apps/inference-engine && uvicorn src.main:app --reload --port 3161"
|
|
|
|
verificacion:
|
|
- descripcion: "Health check Gateway"
|
|
comando: "curl http://localhost:3160/health"
|
|
|
|
- descripcion: "Health check Inference Engine"
|
|
comando: "curl http://localhost:3161/health"
|
|
|
|
# =============================================================================
|
|
# REGISTRO EN WORKSPACE
|
|
# =============================================================================
|
|
registro_workspace:
|
|
devenv_ports_inventory:
|
|
archivo: "orchestration/inventarios/DEVENV-PORTS-INVENTORY.yml"
|
|
version: "3.7.0"
|
|
entrada: "local-llm-agent"
|
|
|
|
devenv_master_inventory:
|
|
archivo: "orchestration/inventarios/DEVENV-MASTER-INVENTORY.yml"
|
|
entrada: "local-llm-agent"
|
|
|
|
dependency_graph:
|
|
archivo: "orchestration/DEPENDENCY-GRAPH.yml"
|
|
nodo: "infra:local-llm-agent"
|