282 lines
7.0 KiB
Bash
282 lines
7.0 KiB
Bash
#!/bin/bash
|
|
# ==============================================================================
|
|
# End-to-End Test Script for Local LLM Agent
|
|
# ==============================================================================
|
|
# Tests the full stack: Gateway -> Inference Engine -> vLLM
|
|
#
|
|
# Usage:
|
|
# ./scripts/test-e2e.sh
|
|
#
|
|
# Prerequisites:
|
|
# - Stack running (docker-compose up -d)
|
|
# - curl installed
|
|
# ==============================================================================
|
|
|
|
set -euo pipefail
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
# Configuration
|
|
GATEWAY_URL="${GATEWAY_URL:-http://localhost:3160}"
|
|
INFERENCE_URL="${INFERENCE_URL:-http://localhost:3161}"
|
|
VLLM_URL="${VLLM_URL:-http://localhost:8000}"
|
|
TIMEOUT=30
|
|
|
|
# Counters
|
|
PASSED=0
|
|
FAILED=0
|
|
|
|
# Test functions
|
|
log_test() {
|
|
echo -e "${BLUE}[TEST]${NC} $1"
|
|
}
|
|
|
|
log_pass() {
|
|
echo -e "${GREEN}[PASS]${NC} $1"
|
|
((PASSED++))
|
|
}
|
|
|
|
log_fail() {
|
|
echo -e "${RED}[FAIL]${NC} $1"
|
|
((FAILED++))
|
|
}
|
|
|
|
log_info() {
|
|
echo -e "${YELLOW}[INFO]${NC} $1"
|
|
}
|
|
|
|
# Wait for service
|
|
wait_for_service() {
|
|
local url=$1
|
|
local name=$2
|
|
local max_attempts=30
|
|
local attempt=1
|
|
|
|
log_info "Waiting for $name ($url)..."
|
|
|
|
while [ $attempt -le $max_attempts ]; do
|
|
if curl -sf "$url" > /dev/null 2>&1; then
|
|
log_pass "$name is ready"
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
((attempt++))
|
|
done
|
|
|
|
log_fail "$name is not ready after $max_attempts attempts"
|
|
return 1
|
|
}
|
|
|
|
# Test health endpoints
|
|
test_health() {
|
|
log_test "Testing health endpoints..."
|
|
|
|
# Gateway health
|
|
if curl -sf "$GATEWAY_URL/health" | grep -q "ok\|healthy"; then
|
|
log_pass "Gateway health check"
|
|
else
|
|
log_fail "Gateway health check"
|
|
fi
|
|
|
|
# Inference Engine health
|
|
if curl -sf "$INFERENCE_URL/health" | grep -q "ok\|healthy"; then
|
|
log_pass "Inference Engine health check"
|
|
else
|
|
log_fail "Inference Engine health check"
|
|
fi
|
|
|
|
# vLLM health (if available)
|
|
if curl -sf "$VLLM_URL/health" > /dev/null 2>&1; then
|
|
log_pass "vLLM health check"
|
|
else
|
|
log_info "vLLM health check skipped (may not be running)"
|
|
fi
|
|
}
|
|
|
|
# Test models endpoint
|
|
test_models() {
|
|
log_test "Testing models endpoint..."
|
|
|
|
response=$(curl -sf "$GATEWAY_URL/v1/models")
|
|
|
|
if echo "$response" | grep -q "data"; then
|
|
log_pass "Models endpoint returns data"
|
|
|
|
# Check for at least one model
|
|
model_count=$(echo "$response" | grep -o '"id"' | wc -l)
|
|
if [ "$model_count" -gt 0 ]; then
|
|
log_pass "Models endpoint returned $model_count model(s)"
|
|
else
|
|
log_fail "No models found"
|
|
fi
|
|
else
|
|
log_fail "Models endpoint failed"
|
|
fi
|
|
}
|
|
|
|
# Test chat completion
|
|
test_chat_completion() {
|
|
log_test "Testing chat completion..."
|
|
|
|
# Get available model
|
|
model=$(curl -sf "$GATEWAY_URL/v1/models" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
|
|
|
|
if [ -z "$model" ]; then
|
|
log_info "No model available, using default"
|
|
model="tinyllama"
|
|
fi
|
|
|
|
log_info "Using model: $model"
|
|
|
|
# Simple chat completion
|
|
response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{
|
|
\"model\": \"$model\",
|
|
\"messages\": [{\"role\": \"user\", \"content\": \"Hello, respond with exactly 'Hi there!'\"}],
|
|
\"max_tokens\": 50
|
|
}" --max-time $TIMEOUT 2>&1) || true
|
|
|
|
if echo "$response" | grep -q "choices"; then
|
|
log_pass "Chat completion returned response"
|
|
|
|
# Check for content
|
|
if echo "$response" | grep -q "content"; then
|
|
log_pass "Chat completion contains content"
|
|
else
|
|
log_fail "Chat completion missing content"
|
|
fi
|
|
|
|
# Check for usage
|
|
if echo "$response" | grep -q "usage"; then
|
|
log_pass "Chat completion contains usage stats"
|
|
else
|
|
log_fail "Chat completion missing usage stats"
|
|
fi
|
|
else
|
|
log_fail "Chat completion failed: $response"
|
|
fi
|
|
}
|
|
|
|
# Test metrics endpoint
|
|
test_metrics() {
|
|
log_test "Testing metrics endpoint..."
|
|
|
|
response=$(curl -sf "$INFERENCE_URL/metrics" 2>&1) || true
|
|
|
|
if echo "$response" | grep -q "llm_requests_total\|llm_backend_health"; then
|
|
log_pass "Metrics endpoint returns Prometheus metrics"
|
|
else
|
|
log_info "Metrics endpoint may not have data yet"
|
|
fi
|
|
}
|
|
|
|
# Test LoRA endpoints
|
|
test_lora() {
|
|
log_test "Testing LoRA endpoints..."
|
|
|
|
# List adapters
|
|
response=$(curl -sf "$GATEWAY_URL/v1/lora/adapters" 2>&1) || true
|
|
|
|
if echo "$response" | grep -q "adapters\|count"; then
|
|
log_pass "LoRA adapters endpoint works"
|
|
else
|
|
log_info "LoRA endpoint may not be available"
|
|
fi
|
|
|
|
# LoRA status
|
|
response=$(curl -sf "$GATEWAY_URL/v1/lora/status" 2>&1) || true
|
|
|
|
if echo "$response" | grep -q "enabled"; then
|
|
log_pass "LoRA status endpoint works"
|
|
else
|
|
log_info "LoRA status endpoint may not be available"
|
|
fi
|
|
}
|
|
|
|
# Test error handling
|
|
test_error_handling() {
|
|
log_test "Testing error handling..."
|
|
|
|
# Invalid model
|
|
response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \
|
|
-H "Content-Type: application/json" \
|
|
-d '{"model": "nonexistent-model-xyz", "messages": [{"role": "user", "content": "test"}]}' \
|
|
--max-time $TIMEOUT 2>&1) || response="error"
|
|
|
|
if echo "$response" | grep -qi "error\|not found"; then
|
|
log_pass "Invalid model returns error"
|
|
else
|
|
log_info "Invalid model handling: $response"
|
|
fi
|
|
|
|
# Invalid request
|
|
response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \
|
|
-H "Content-Type: application/json" \
|
|
-d '{"model": "test", "messages": []}' \
|
|
--max-time $TIMEOUT 2>&1) || response="error"
|
|
|
|
if echo "$response" | grep -qi "error\|invalid"; then
|
|
log_pass "Invalid request returns error"
|
|
else
|
|
log_info "Invalid request handling: $response"
|
|
fi
|
|
}
|
|
|
|
# Print summary
|
|
print_summary() {
|
|
echo ""
|
|
echo "=============================================="
|
|
echo "Test Summary"
|
|
echo "=============================================="
|
|
echo -e "Passed: ${GREEN}$PASSED${NC}"
|
|
echo -e "Failed: ${RED}$FAILED${NC}"
|
|
echo "=============================================="
|
|
|
|
if [ $FAILED -gt 0 ]; then
|
|
echo -e "${RED}Some tests failed!${NC}"
|
|
exit 1
|
|
else
|
|
echo -e "${GREEN}All tests passed!${NC}"
|
|
exit 0
|
|
fi
|
|
}
|
|
|
|
# Main
|
|
main() {
|
|
echo "=============================================="
|
|
echo "Local LLM Agent E2E Tests"
|
|
echo "=============================================="
|
|
echo ""
|
|
|
|
# Wait for services
|
|
wait_for_service "$GATEWAY_URL/health" "Gateway"
|
|
wait_for_service "$INFERENCE_URL/health" "Inference Engine"
|
|
|
|
echo ""
|
|
|
|
# Run tests
|
|
test_health
|
|
echo ""
|
|
test_models
|
|
echo ""
|
|
test_chat_completion
|
|
echo ""
|
|
test_metrics
|
|
echo ""
|
|
test_lora
|
|
echo ""
|
|
test_error_handling
|
|
|
|
# Summary
|
|
print_summary
|
|
}
|
|
|
|
# Run main
|
|
main "$@"
|