local-llm-agent/scripts/test-e2e.sh
Adrian Flores Cortes 3def230d58 Initial commit: local-llm-agent infrastructure project
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 16:42:45 -06:00

282 lines
7.0 KiB
Bash

#!/bin/bash
# ==============================================================================
# End-to-End Test Script for Local LLM Agent
# ==============================================================================
# Tests the full stack: Gateway -> Inference Engine -> vLLM
#
# Usage:
# ./scripts/test-e2e.sh
#
# Prerequisites:
# - Stack running (docker-compose up -d)
# - curl installed
# ==============================================================================
set -euo pipefail
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Configuration
GATEWAY_URL="${GATEWAY_URL:-http://localhost:3160}"
INFERENCE_URL="${INFERENCE_URL:-http://localhost:3161}"
VLLM_URL="${VLLM_URL:-http://localhost:8000}"
TIMEOUT=30
# Counters
PASSED=0
FAILED=0
# Test functions
log_test() {
echo -e "${BLUE}[TEST]${NC} $1"
}
log_pass() {
echo -e "${GREEN}[PASS]${NC} $1"
((PASSED++))
}
log_fail() {
echo -e "${RED}[FAIL]${NC} $1"
((FAILED++))
}
log_info() {
echo -e "${YELLOW}[INFO]${NC} $1"
}
# Wait for service
wait_for_service() {
local url=$1
local name=$2
local max_attempts=30
local attempt=1
log_info "Waiting for $name ($url)..."
while [ $attempt -le $max_attempts ]; do
if curl -sf "$url" > /dev/null 2>&1; then
log_pass "$name is ready"
return 0
fi
sleep 2
((attempt++))
done
log_fail "$name is not ready after $max_attempts attempts"
return 1
}
# Test health endpoints
test_health() {
log_test "Testing health endpoints..."
# Gateway health
if curl -sf "$GATEWAY_URL/health" | grep -q "ok\|healthy"; then
log_pass "Gateway health check"
else
log_fail "Gateway health check"
fi
# Inference Engine health
if curl -sf "$INFERENCE_URL/health" | grep -q "ok\|healthy"; then
log_pass "Inference Engine health check"
else
log_fail "Inference Engine health check"
fi
# vLLM health (if available)
if curl -sf "$VLLM_URL/health" > /dev/null 2>&1; then
log_pass "vLLM health check"
else
log_info "vLLM health check skipped (may not be running)"
fi
}
# Test models endpoint
test_models() {
log_test "Testing models endpoint..."
response=$(curl -sf "$GATEWAY_URL/v1/models")
if echo "$response" | grep -q "data"; then
log_pass "Models endpoint returns data"
# Check for at least one model
model_count=$(echo "$response" | grep -o '"id"' | wc -l)
if [ "$model_count" -gt 0 ]; then
log_pass "Models endpoint returned $model_count model(s)"
else
log_fail "No models found"
fi
else
log_fail "Models endpoint failed"
fi
}
# Test chat completion
test_chat_completion() {
log_test "Testing chat completion..."
# Get available model
model=$(curl -sf "$GATEWAY_URL/v1/models" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
if [ -z "$model" ]; then
log_info "No model available, using default"
model="tinyllama"
fi
log_info "Using model: $model"
# Simple chat completion
response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \
-H "Content-Type: application/json" \
-d "{
\"model\": \"$model\",
\"messages\": [{\"role\": \"user\", \"content\": \"Hello, respond with exactly 'Hi there!'\"}],
\"max_tokens\": 50
}" --max-time $TIMEOUT 2>&1) || true
if echo "$response" | grep -q "choices"; then
log_pass "Chat completion returned response"
# Check for content
if echo "$response" | grep -q "content"; then
log_pass "Chat completion contains content"
else
log_fail "Chat completion missing content"
fi
# Check for usage
if echo "$response" | grep -q "usage"; then
log_pass "Chat completion contains usage stats"
else
log_fail "Chat completion missing usage stats"
fi
else
log_fail "Chat completion failed: $response"
fi
}
# Test metrics endpoint
test_metrics() {
log_test "Testing metrics endpoint..."
response=$(curl -sf "$INFERENCE_URL/metrics" 2>&1) || true
if echo "$response" | grep -q "llm_requests_total\|llm_backend_health"; then
log_pass "Metrics endpoint returns Prometheus metrics"
else
log_info "Metrics endpoint may not have data yet"
fi
}
# Test LoRA endpoints
test_lora() {
log_test "Testing LoRA endpoints..."
# List adapters
response=$(curl -sf "$GATEWAY_URL/v1/lora/adapters" 2>&1) || true
if echo "$response" | grep -q "adapters\|count"; then
log_pass "LoRA adapters endpoint works"
else
log_info "LoRA endpoint may not be available"
fi
# LoRA status
response=$(curl -sf "$GATEWAY_URL/v1/lora/status" 2>&1) || true
if echo "$response" | grep -q "enabled"; then
log_pass "LoRA status endpoint works"
else
log_info "LoRA status endpoint may not be available"
fi
}
# Test error handling
test_error_handling() {
log_test "Testing error handling..."
# Invalid model
response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \
-H "Content-Type: application/json" \
-d '{"model": "nonexistent-model-xyz", "messages": [{"role": "user", "content": "test"}]}' \
--max-time $TIMEOUT 2>&1) || response="error"
if echo "$response" | grep -qi "error\|not found"; then
log_pass "Invalid model returns error"
else
log_info "Invalid model handling: $response"
fi
# Invalid request
response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \
-H "Content-Type: application/json" \
-d '{"model": "test", "messages": []}' \
--max-time $TIMEOUT 2>&1) || response="error"
if echo "$response" | grep -qi "error\|invalid"; then
log_pass "Invalid request returns error"
else
log_info "Invalid request handling: $response"
fi
}
# Print summary
print_summary() {
echo ""
echo "=============================================="
echo "Test Summary"
echo "=============================================="
echo -e "Passed: ${GREEN}$PASSED${NC}"
echo -e "Failed: ${RED}$FAILED${NC}"
echo "=============================================="
if [ $FAILED -gt 0 ]; then
echo -e "${RED}Some tests failed!${NC}"
exit 1
else
echo -e "${GREEN}All tests passed!${NC}"
exit 0
fi
}
# Main
main() {
echo "=============================================="
echo "Local LLM Agent E2E Tests"
echo "=============================================="
echo ""
# Wait for services
wait_for_service "$GATEWAY_URL/health" "Gateway"
wait_for_service "$INFERENCE_URL/health" "Inference Engine"
echo ""
# Run tests
test_health
echo ""
test_models
echo ""
test_chat_completion
echo ""
test_metrics
echo ""
test_lora
echo ""
test_error_handling
# Summary
print_summary
}
# Run main
main "$@"