257 lines
6.4 KiB
YAML
257 lines
6.4 KiB
YAML
# ===============================================================================
|
|
# PROJECT-PROFILE.yml - Local LLM Agent
|
|
# ===============================================================================
|
|
#
|
|
# Sistema: SIMCO v4.3.0 + NEXUS v4.0
|
|
# Proposito: Perfil y metadata del proyecto
|
|
# Fecha: 2026-01-24
|
|
#
|
|
# ===============================================================================
|
|
|
|
proyecto:
|
|
nombre: "Local LLM Agent"
|
|
codigo: "local-llm-agent"
|
|
alias: "LLM"
|
|
tipo: "STANDALONE"
|
|
subtipo: "INFRASTRUCTURE"
|
|
nivel: "2A"
|
|
version: "0.6.0"
|
|
|
|
descripcion: |
|
|
Gateway de LLM local para el workspace-v2.
|
|
Permite a los agentes (Claude Code, Trae, Gemini) delegar tareas
|
|
simples para ahorrar contexto y tokens.
|
|
|
|
Caracteristicas principales:
|
|
- API Gateway OpenAI-compatible
|
|
- MCP Tools (classify, extract, rewrite, summarize)
|
|
- Multi-backend (Ollama CPU, vLLM GPU)
|
|
- Multi-LoRA adapters por proyecto
|
|
- Monitoring con Prometheus + Grafana
|
|
- Rate limiting por tier
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# STACK TECNOLOGICO
|
|
# -----------------------------------------------------------------------------
|
|
|
|
stack:
|
|
gateway:
|
|
framework: "NestJS"
|
|
version: "10.x"
|
|
lenguaje: "TypeScript"
|
|
runtime: "Node.js 20 LTS"
|
|
|
|
inference:
|
|
framework: "FastAPI"
|
|
lenguaje: "Python"
|
|
version: "3.11"
|
|
|
|
backends:
|
|
desarrollo:
|
|
nombre: "Ollama"
|
|
tipo: "CPU"
|
|
puerto: 11434
|
|
produccion:
|
|
nombre: "vLLM"
|
|
tipo: "GPU"
|
|
puerto: 8000
|
|
features:
|
|
- "Multi-LoRA"
|
|
- "Continuous Batching"
|
|
|
|
monitoring:
|
|
prometheus:
|
|
puerto: 9090
|
|
grafana:
|
|
puerto: 3000
|
|
|
|
database:
|
|
motor: "PostgreSQL"
|
|
version: "16"
|
|
nombre: "local_llm_dev"
|
|
obligatorio: false
|
|
|
|
cache:
|
|
motor: "Redis"
|
|
db: 9
|
|
obligatorio: false
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# SERVICIOS
|
|
# -----------------------------------------------------------------------------
|
|
|
|
servicios:
|
|
- id: "LLM-SVC-001"
|
|
nombre: "Gateway API"
|
|
puerto: 3160
|
|
path: "apps/gateway"
|
|
estado: "production-ready"
|
|
endpoints:
|
|
- "/v1/chat/completions"
|
|
- "/v1/models"
|
|
- "/v1/lora/*"
|
|
- "/mcp/tools/*"
|
|
- "/health"
|
|
|
|
- id: "LLM-SVC-002"
|
|
nombre: "Inference Engine"
|
|
puerto: 3161
|
|
path: "apps/inference-engine"
|
|
estado: "production-ready"
|
|
endpoints:
|
|
- "/chat"
|
|
- "/models"
|
|
- "/health"
|
|
- "/metrics"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# FASES DE DESARROLLO
|
|
# -----------------------------------------------------------------------------
|
|
|
|
fases:
|
|
fase_1_mvp:
|
|
nombre: "MVP - Gateway + Ollama"
|
|
estado: "completado"
|
|
completitud: "100%"
|
|
entregables:
|
|
- "Gateway NestJS"
|
|
- "Inference Engine Python"
|
|
- "Ollama backend"
|
|
- "Docker setup"
|
|
|
|
fase_2_mcp_tools:
|
|
nombre: "MCP Tools + Rate Limiting"
|
|
estado: "completado"
|
|
completitud: "100%"
|
|
entregables:
|
|
- "MCP Tools (4 herramientas)"
|
|
- "Tier Classification"
|
|
- "Rate Limiting"
|
|
- "98 tests pasando"
|
|
|
|
fase_3_produccion:
|
|
nombre: "Production - vLLM + Multi-LoRA"
|
|
estado: "completado"
|
|
completitud: "100%"
|
|
entregables:
|
|
- "vLLM backend GPU"
|
|
- "Multi-LoRA adapters"
|
|
- "Prometheus metrics"
|
|
- "Grafana dashboard"
|
|
- "Production docker-compose"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# MCP TOOLS
|
|
# -----------------------------------------------------------------------------
|
|
|
|
mcp_tools:
|
|
- nombre: "classify"
|
|
descripcion: "Clasificar texto en categorias"
|
|
tier: "small"
|
|
|
|
- nombre: "extract"
|
|
descripcion: "Extraer datos estructurados"
|
|
tier: "small"
|
|
|
|
- nombre: "rewrite"
|
|
descripcion: "Reescribir texto"
|
|
tier: "main"
|
|
|
|
- nombre: "summarize"
|
|
descripcion: "Resumir texto"
|
|
tier: "main"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# DEPENDENCIAS
|
|
# -----------------------------------------------------------------------------
|
|
|
|
dependencias:
|
|
runtime:
|
|
- nombre: "Ollama"
|
|
tipo: "external"
|
|
obligatorio: false
|
|
nota: "Backend CPU para desarrollo"
|
|
|
|
- nombre: "vLLM"
|
|
tipo: "external"
|
|
obligatorio: false
|
|
nota: "Backend GPU para produccion"
|
|
|
|
opcional:
|
|
- nombre: "Redis"
|
|
tipo: "cache"
|
|
puerto: 6379
|
|
|
|
- nombre: "PostgreSQL"
|
|
tipo: "database"
|
|
puerto: 5432
|
|
|
|
- nombre: "Prometheus"
|
|
tipo: "monitoring"
|
|
puerto: 9090
|
|
|
|
- nombre: "Grafana"
|
|
tipo: "dashboard"
|
|
puerto: 3000
|
|
|
|
gpu:
|
|
- nombre: "NVIDIA CUDA"
|
|
version: ">=12.6"
|
|
obligatorio: false
|
|
nota: "Solo para vLLM"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# ESTADO
|
|
# -----------------------------------------------------------------------------
|
|
|
|
estado:
|
|
general: "production-ready"
|
|
madurez: "stable"
|
|
completitud: "95%"
|
|
en_produccion: false
|
|
prioridad: "P1"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# METRICAS
|
|
# -----------------------------------------------------------------------------
|
|
|
|
metricas:
|
|
archivos_totales: 42
|
|
lineas_codigo: 3500
|
|
test_coverage: "90%"
|
|
tests_gateway: 54
|
|
tests_inference: 44
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# RUTAS
|
|
# -----------------------------------------------------------------------------
|
|
|
|
rutas:
|
|
proyecto: "projects/local-llm-agent"
|
|
gateway: "projects/local-llm-agent/apps/gateway"
|
|
inference: "projects/local-llm-agent/apps/inference-engine"
|
|
config: "projects/local-llm-agent/config"
|
|
docs: "projects/local-llm-agent/docs"
|
|
orchestration: "projects/local-llm-agent/orchestration"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# EQUIPO
|
|
# -----------------------------------------------------------------------------
|
|
|
|
equipo:
|
|
owner: "ISEM Development"
|
|
agentes_principales:
|
|
- "Claude Code"
|
|
- "Trae"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# NOTAS
|
|
# -----------------------------------------------------------------------------
|
|
|
|
notas:
|
|
- "Proyecto STANDALONE de infraestructura"
|
|
- "Sirve a todos los proyectos del workspace via API"
|
|
- "Prioridad P1 segun ROADMAP"
|
|
- "Phase 3 complete - Production ready"
|
|
- "GPU setup requiere WSL con NVIDIA drivers"
|