647 lines
17 KiB
Markdown
647 lines
17 KiB
Markdown
# Modelo de Datos y Definiciones de Objetos - Local LLM Agent
|
|
|
|
**Version:** 1.0.0
|
|
**Fecha:** 2026-01-20
|
|
**Proyecto:** local-llm-agent
|
|
|
|
---
|
|
|
|
## 1. VISION GENERAL
|
|
|
|
Este documento define los objetos de datos, interfaces y estructuras que componen el sistema Local LLM Agent.
|
|
|
|
### 1.1 Diagrama de Componentes
|
|
|
|
```
|
|
┌─────────────────────────────────────────────────────────────────────┐
|
|
│ API GATEWAY (NestJS) │
|
|
│ │
|
|
│ ┌─────────────────────────────────────────────────────────────┐ │
|
|
│ │ Request Pipeline │ │
|
|
│ │ │ │
|
|
│ │ [Request] → [Validation] → [TierClassifier] → [Router] │ │
|
|
│ │ │ │ │
|
|
│ └─────────────────────────────────────────────────────┼────────┘ │
|
|
│ │ │
|
|
│ ┌──────────────────┐ ┌──────────────────┐ ┌────────┴────────┐ │
|
|
│ │ ChatModule │ │ ModelsModule │ │ MCPToolsModule │ │
|
|
│ │ │ │ │ │ │ │
|
|
│ │ ChatController │ │ ModelsController │ │ MCPController │ │
|
|
│ │ ChatService │ │ ModelsService │ │ MCPService │ │
|
|
│ │ TierService │ │ │ │ ToolsRegistry │ │
|
|
│ └────────┬─────────┘ └────────┬─────────┘ └────────┬────────┘ │
|
|
│ │ │ │ │
|
|
└───────────┼─────────────────────┼─────────────────────┼────────────┘
|
|
│ │ │
|
|
└─────────────────────┼─────────────────────┘
|
|
│
|
|
[InferenceClient]
|
|
│
|
|
▼
|
|
┌─────────────────────────────────────────────────────────────────────┐
|
|
│ INFERENCE ENGINE (Python) │
|
|
│ │
|
|
│ ┌─────────────────────────────────────────────────────────────┐ │
|
|
│ │ Backend Manager │ │
|
|
│ │ │ │
|
|
│ │ [BackendFactory] → [OllamaBackend | VLLMBackend] │ │
|
|
│ └─────────────────────────────────────────────────────────────┘ │
|
|
│ │ │
|
|
│ ┌──────────────────────────────┼──────────────────────────────┐ │
|
|
│ │ Routes │ │
|
|
│ │ │ │
|
|
│ │ [/v1/chat/completions] [/v1/models] [/health] │ │
|
|
│ └──────────────────────────────────────────────────────────────┘ │
|
|
└─────────────────────────────────────────────────────────────────────┘
|
|
```
|
|
|
|
---
|
|
|
|
## 2. OBJETOS DE DOMINIO
|
|
|
|
### 2.1 Chat Completion
|
|
|
|
#### ChatMessage
|
|
```typescript
|
|
/**
|
|
* Mensaje individual en una conversacion
|
|
*/
|
|
interface ChatMessage {
|
|
/** Rol del emisor del mensaje */
|
|
role: "system" | "user" | "assistant";
|
|
|
|
/** Contenido textual del mensaje */
|
|
content: string;
|
|
|
|
/** Nombre opcional del emisor */
|
|
name?: string;
|
|
}
|
|
```
|
|
|
|
#### ChatCompletionRequest
|
|
```typescript
|
|
/**
|
|
* Request para crear una completion de chat
|
|
* Compatible con OpenAI API
|
|
*/
|
|
interface ChatCompletionRequest {
|
|
/** Identificador del modelo a usar */
|
|
model: string;
|
|
|
|
/** Lista de mensajes de la conversacion */
|
|
messages: ChatMessage[];
|
|
|
|
/** Maximo de tokens a generar (default: 512) */
|
|
max_tokens?: number;
|
|
|
|
/** Temperatura de muestreo 0.0-2.0 (default: 0.7) */
|
|
temperature?: number;
|
|
|
|
/** Top-p sampling 0.0-1.0 (default: 0.9) */
|
|
top_p?: number;
|
|
|
|
/** Si retornar respuesta en streaming (default: false) */
|
|
stream?: boolean;
|
|
|
|
/** Tier forzado (opcional, normalmente auto-detectado) */
|
|
x_tier?: "small" | "main";
|
|
}
|
|
|
|
// Constraints
|
|
const ChatCompletionRequestConstraints = {
|
|
model: { required: true, maxLength: 100 },
|
|
messages: { required: true, minItems: 1, maxItems: 100 },
|
|
max_tokens: { min: 1, max: 4096 },
|
|
temperature: { min: 0.0, max: 2.0 },
|
|
top_p: { min: 0.0, max: 1.0 },
|
|
};
|
|
```
|
|
|
|
#### ChatCompletionResponse
|
|
```typescript
|
|
/**
|
|
* Response de chat completion
|
|
* Compatible con OpenAI API
|
|
*/
|
|
interface ChatCompletionResponse {
|
|
/** ID unico de la completion */
|
|
id: string;
|
|
|
|
/** Tipo de objeto */
|
|
object: "chat.completion";
|
|
|
|
/** Timestamp de creacion (Unix) */
|
|
created: number;
|
|
|
|
/** Modelo usado */
|
|
model: string;
|
|
|
|
/** Lista de opciones generadas */
|
|
choices: ChatCompletionChoice[];
|
|
|
|
/** Estadisticas de uso de tokens */
|
|
usage: TokenUsage;
|
|
}
|
|
|
|
interface ChatCompletionChoice {
|
|
/** Indice de la opcion */
|
|
index: number;
|
|
|
|
/** Mensaje generado */
|
|
message: ChatMessage;
|
|
|
|
/** Razon de finalizacion */
|
|
finish_reason: "stop" | "length" | "content_filter";
|
|
}
|
|
|
|
interface TokenUsage {
|
|
/** Tokens en el prompt */
|
|
prompt_tokens: number;
|
|
|
|
/** Tokens generados */
|
|
completion_tokens: number;
|
|
|
|
/** Total de tokens */
|
|
total_tokens: number;
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
### 2.2 Models
|
|
|
|
#### Model
|
|
```typescript
|
|
/**
|
|
* Modelo disponible para inferencia
|
|
*/
|
|
interface Model {
|
|
/** Identificador unico del modelo */
|
|
id: string;
|
|
|
|
/** Tipo de objeto */
|
|
object: "model";
|
|
|
|
/** Timestamp de creacion */
|
|
created: number;
|
|
|
|
/** Propietario del modelo */
|
|
owned_by: string;
|
|
|
|
/** Permisos (vacio para modelos locales) */
|
|
permission: [];
|
|
|
|
/** Modelo raiz */
|
|
root: string;
|
|
|
|
/** Modelo padre (null si es base) */
|
|
parent: string | null;
|
|
}
|
|
|
|
interface ModelsListResponse {
|
|
object: "list";
|
|
data: Model[];
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
### 2.3 MCP Tools
|
|
|
|
#### MCPTool
|
|
```typescript
|
|
/**
|
|
* Definicion de herramienta MCP
|
|
*/
|
|
interface MCPTool {
|
|
/** Nombre unico de la herramienta */
|
|
name: string;
|
|
|
|
/** Descripcion legible */
|
|
description: string;
|
|
|
|
/** Schema JSON de parametros */
|
|
parameters: JSONSchema7;
|
|
|
|
/** Tier preferido para esta herramienta */
|
|
preferred_tier: "small" | "main";
|
|
|
|
/** Version de la herramienta */
|
|
version: string;
|
|
}
|
|
|
|
interface MCPToolsListResponse {
|
|
tools: MCPTool[];
|
|
}
|
|
```
|
|
|
|
#### MCPToolRequest
|
|
```typescript
|
|
/**
|
|
* Request generico para ejecutar herramienta MCP
|
|
*/
|
|
interface MCPToolRequest {
|
|
/** Nombre de la herramienta */
|
|
tool: string;
|
|
|
|
/** Parametros segun schema de la herramienta */
|
|
parameters: Record<string, unknown>;
|
|
|
|
/** Contexto adicional opcional */
|
|
context?: string;
|
|
}
|
|
```
|
|
|
|
#### Herramientas Especificas
|
|
|
|
```typescript
|
|
// Classify Tool
|
|
interface ClassifyRequest {
|
|
text: string;
|
|
categories: string[];
|
|
context?: string;
|
|
}
|
|
|
|
interface ClassifyResponse {
|
|
category: string;
|
|
confidence: number;
|
|
reasoning?: string;
|
|
}
|
|
|
|
// Extract Tool
|
|
interface ExtractRequest {
|
|
text: string;
|
|
schema: {
|
|
fields: Array<{
|
|
name: string;
|
|
type: "string" | "number" | "date" | "boolean" | "array";
|
|
description: string;
|
|
required?: boolean;
|
|
}>;
|
|
};
|
|
}
|
|
|
|
interface ExtractResponse {
|
|
data: Record<string, unknown>;
|
|
confidence: number;
|
|
missing_fields?: string[];
|
|
}
|
|
|
|
// Summarize Tool
|
|
interface SummarizeRequest {
|
|
text: string;
|
|
max_length?: number;
|
|
format?: "paragraph" | "bullets";
|
|
}
|
|
|
|
interface SummarizeResponse {
|
|
summary: string;
|
|
word_count: number;
|
|
key_points?: string[];
|
|
}
|
|
|
|
// Rewrite Tool
|
|
interface RewriteRequest {
|
|
text: string;
|
|
style: "formal" | "casual" | "technical" | "simple";
|
|
preserve_length?: boolean;
|
|
}
|
|
|
|
interface RewriteResponse {
|
|
rewritten: string;
|
|
changes_made: number;
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
### 2.4 System Objects
|
|
|
|
#### Health Status
|
|
```typescript
|
|
/**
|
|
* Estado de salud del sistema
|
|
*/
|
|
interface HealthStatus {
|
|
/** Estado general */
|
|
status: "healthy" | "degraded" | "unhealthy";
|
|
|
|
/** Timestamp de verificacion */
|
|
timestamp: string;
|
|
|
|
/** Version del servicio */
|
|
version: string;
|
|
|
|
/** Estado de dependencias */
|
|
dependencies: {
|
|
inference_engine: DependencyStatus;
|
|
ollama: DependencyStatus;
|
|
redis?: DependencyStatus;
|
|
};
|
|
|
|
/** Metricas de sistema */
|
|
metrics?: {
|
|
uptime_seconds: number;
|
|
requests_total: number;
|
|
requests_failed: number;
|
|
};
|
|
}
|
|
|
|
type DependencyStatus = "up" | "down" | "degraded";
|
|
```
|
|
|
|
#### TierConfig
|
|
```typescript
|
|
/**
|
|
* Configuracion de tiers de procesamiento
|
|
*/
|
|
interface TierConfig {
|
|
small: {
|
|
/** Maximo contexto en tokens */
|
|
max_context: number; // 4096
|
|
|
|
/** Maximo tokens de salida */
|
|
max_tokens: number; // 512
|
|
|
|
/** Latencia objetivo en ms */
|
|
latency_target_ms: number; // 500
|
|
};
|
|
|
|
main: {
|
|
max_context: number; // 16384
|
|
max_tokens: number; // 2048
|
|
latency_target_ms: number; // 2000
|
|
};
|
|
}
|
|
|
|
// Default configuration
|
|
const DEFAULT_TIER_CONFIG: TierConfig = {
|
|
small: {
|
|
max_context: 4096,
|
|
max_tokens: 512,
|
|
latency_target_ms: 500,
|
|
},
|
|
main: {
|
|
max_context: 16384,
|
|
max_tokens: 2048,
|
|
latency_target_ms: 2000,
|
|
},
|
|
};
|
|
```
|
|
|
|
#### Error Response
|
|
```typescript
|
|
/**
|
|
* Respuesta de error estandarizada
|
|
* Compatible con formato OpenAI
|
|
*/
|
|
interface ErrorResponse {
|
|
error: {
|
|
/** Codigo de error */
|
|
code: string;
|
|
|
|
/** Mensaje descriptivo */
|
|
message: string;
|
|
|
|
/** Tipo de error */
|
|
type: "invalid_request_error" | "authentication_error" | "rate_limit_error" | "server_error";
|
|
|
|
/** Parametro que causo el error (si aplica) */
|
|
param?: string;
|
|
};
|
|
}
|
|
|
|
// Codigos de error
|
|
enum ErrorCodes {
|
|
INVALID_REQUEST = "invalid_request",
|
|
MODEL_NOT_FOUND = "model_not_found",
|
|
CONTEXT_TOO_LONG = "context_length_exceeded",
|
|
INFERENCE_TIMEOUT = "inference_timeout",
|
|
BACKEND_UNAVAILABLE = "backend_unavailable",
|
|
RATE_LIMITED = "rate_limited",
|
|
INTERNAL_ERROR = "internal_error",
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 3. MODELOS DE BACKEND (Python)
|
|
|
|
### 3.1 Backend Interface
|
|
|
|
```python
|
|
from abc import ABC, abstractmethod
|
|
from typing import Any, Dict, List
|
|
|
|
class InferenceBackend(ABC):
|
|
"""
|
|
Interface abstracta para backends de inferencia.
|
|
Implementada por OllamaBackend, VLLMBackend.
|
|
"""
|
|
|
|
@abstractmethod
|
|
async def health_check(self) -> bool:
|
|
"""Verificar si el backend esta disponible."""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def list_models(self) -> List[Dict[str, Any]]:
|
|
"""Listar modelos disponibles."""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def chat_completion(
|
|
self,
|
|
model: str,
|
|
messages: List[Dict[str, str]],
|
|
max_tokens: int = 512,
|
|
temperature: float = 0.7,
|
|
top_p: float = 0.9,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Crear chat completion.
|
|
|
|
Args:
|
|
model: Identificador del modelo
|
|
messages: Lista de mensajes [{"role": str, "content": str}]
|
|
max_tokens: Maximo tokens a generar
|
|
temperature: Temperatura de muestreo
|
|
top_p: Top-p sampling
|
|
|
|
Returns:
|
|
Dict con id, content, usage, finish_reason
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def close(self) -> None:
|
|
"""Cerrar conexiones del backend."""
|
|
pass
|
|
```
|
|
|
|
### 3.2 Pydantic Models
|
|
|
|
```python
|
|
from pydantic import BaseModel, Field
|
|
from typing import List, Optional
|
|
from enum import Enum
|
|
|
|
class MessageRole(str, Enum):
|
|
SYSTEM = "system"
|
|
USER = "user"
|
|
ASSISTANT = "assistant"
|
|
|
|
class Message(BaseModel):
|
|
role: MessageRole
|
|
content: str = Field(..., min_length=1)
|
|
|
|
class ChatCompletionRequest(BaseModel):
|
|
model: str = Field(default="gpt-oss-20b")
|
|
messages: List[Message] = Field(..., min_items=1)
|
|
max_tokens: Optional[int] = Field(default=512, ge=1, le=4096)
|
|
temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0)
|
|
top_p: Optional[float] = Field(default=0.9, ge=0.0, le=1.0)
|
|
stream: Optional[bool] = Field(default=False)
|
|
|
|
class Usage(BaseModel):
|
|
prompt_tokens: int
|
|
completion_tokens: int
|
|
total_tokens: int
|
|
|
|
class Choice(BaseModel):
|
|
index: int
|
|
message: Message
|
|
finish_reason: str
|
|
|
|
class ChatCompletionResponse(BaseModel):
|
|
id: str
|
|
object: str = "chat.completion"
|
|
created: int
|
|
model: str
|
|
choices: List[Choice]
|
|
usage: Usage
|
|
```
|
|
|
|
---
|
|
|
|
## 4. DIAGRAMA ENTIDAD-RELACION
|
|
|
|
```
|
|
Este proyecto NO usa base de datos persistente en MVP.
|
|
|
|
Flujo de datos es request-response:
|
|
|
|
[AgentRequest]
|
|
│
|
|
▼
|
|
[ChatCompletionRequest] ──────> [ChatCompletionResponse]
|
|
│
|
|
├── [ChatCompletionChoice]
|
|
│ │
|
|
│ └── [ChatMessage]
|
|
│
|
|
└── [TokenUsage]
|
|
|
|
Datos persistidos en futuro (Fase 2+):
|
|
- Request logs (para analytics)
|
|
- Token usage metrics
|
|
- Model performance metrics
|
|
```
|
|
|
|
---
|
|
|
|
## 5. CONFIGURACION Y CONSTANTES
|
|
|
|
### 5.1 Environment Variables
|
|
|
|
```typescript
|
|
// Gateway Configuration
|
|
interface GatewayConfig {
|
|
GATEWAY_PORT: number; // Default: 3160
|
|
INFERENCE_HOST: string; // Default: "localhost"
|
|
INFERENCE_PORT: number; // Default: 3161
|
|
LOG_LEVEL: "debug" | "info" | "warn" | "error"; // Default: "info"
|
|
CORS_ORIGINS: string; // Default: "*"
|
|
}
|
|
|
|
// Inference Engine Configuration
|
|
interface InferenceConfig {
|
|
INFERENCE_PORT: number; // Default: 3161
|
|
INFERENCE_BACKEND: "ollama" | "vllm"; // Default: "ollama"
|
|
OLLAMA_HOST: string; // Default: "http://localhost:11434"
|
|
OLLAMA_MODEL: string; // Default: "gpt-oss-20b"
|
|
REQUEST_TIMEOUT_MS: number; // Default: 60000
|
|
LOG_LEVEL: string; // Default: "info"
|
|
}
|
|
```
|
|
|
|
### 5.2 Constantes del Sistema
|
|
|
|
```typescript
|
|
// Limites de sistema
|
|
const SYSTEM_LIMITS = {
|
|
MAX_MESSAGE_LENGTH: 100000, // caracteres
|
|
MAX_MESSAGES_PER_REQUEST: 100,
|
|
MAX_CONTEXT_TOKENS: 16384,
|
|
MAX_OUTPUT_TOKENS: 4096,
|
|
REQUEST_TIMEOUT_MS: 60000,
|
|
HEALTH_CHECK_INTERVAL_MS: 30000,
|
|
};
|
|
|
|
// Valores por defecto
|
|
const DEFAULTS = {
|
|
MODEL: "gpt-oss-20b",
|
|
MAX_TOKENS: 512,
|
|
TEMPERATURE: 0.7,
|
|
TOP_P: 0.9,
|
|
TIER: "small",
|
|
};
|
|
|
|
// Puertos asignados
|
|
const PORTS = {
|
|
GATEWAY: 3160,
|
|
INFERENCE_ENGINE: 3161,
|
|
OLLAMA: 11434,
|
|
};
|
|
```
|
|
|
|
---
|
|
|
|
## 6. VALIDACIONES
|
|
|
|
### 6.1 Request Validations
|
|
|
|
| Campo | Validacion | Error Code |
|
|
|-------|------------|------------|
|
|
| model | No vacio, max 100 chars | invalid_request |
|
|
| messages | Array no vacio, max 100 items | invalid_request |
|
|
| messages[].role | Enum: system/user/assistant | invalid_request |
|
|
| messages[].content | No vacio | invalid_request |
|
|
| max_tokens | 1-4096 | invalid_request |
|
|
| temperature | 0.0-2.0 | invalid_request |
|
|
| top_p | 0.0-1.0 | invalid_request |
|
|
|
|
### 6.2 Business Validations
|
|
|
|
| Regla | Descripcion | Error Code |
|
|
|-------|-------------|------------|
|
|
| Contexto excedido | Total tokens > max_context del tier | context_length_exceeded |
|
|
| Modelo no existe | Modelo no disponible en backend | model_not_found |
|
|
| Backend no disponible | Ollama no responde | backend_unavailable |
|
|
| Timeout | Request excede timeout | inference_timeout |
|
|
|
|
---
|
|
|
|
## 7. REFERENCIAS
|
|
|
|
- RF-REQUERIMIENTOS-FUNCIONALES.md
|
|
- RNF-REQUERIMIENTOS-NO-FUNCIONALES.md
|
|
- ADR-001: Runtime Selection
|
|
- ADR-002: Model Selection
|
|
|
|
---
|
|
|
|
**Documento Controlado**
|
|
- Autor: Requirements-Analyst Agent
|
|
- Fecha: 2026-01-20
|