#!/bin/bash # ============================================================================== # End-to-End Test Script for Local LLM Agent # ============================================================================== # Tests the full stack: Gateway -> Inference Engine -> vLLM # # Usage: # ./scripts/test-e2e.sh # # Prerequisites: # - Stack running (docker-compose up -d) # - curl installed # ============================================================================== set -euo pipefail # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # Configuration GATEWAY_URL="${GATEWAY_URL:-http://localhost:3160}" INFERENCE_URL="${INFERENCE_URL:-http://localhost:3161}" VLLM_URL="${VLLM_URL:-http://localhost:8000}" TIMEOUT=30 # Counters PASSED=0 FAILED=0 # Test functions log_test() { echo -e "${BLUE}[TEST]${NC} $1" } log_pass() { echo -e "${GREEN}[PASS]${NC} $1" ((PASSED++)) } log_fail() { echo -e "${RED}[FAIL]${NC} $1" ((FAILED++)) } log_info() { echo -e "${YELLOW}[INFO]${NC} $1" } # Wait for service wait_for_service() { local url=$1 local name=$2 local max_attempts=30 local attempt=1 log_info "Waiting for $name ($url)..." while [ $attempt -le $max_attempts ]; do if curl -sf "$url" > /dev/null 2>&1; then log_pass "$name is ready" return 0 fi sleep 2 ((attempt++)) done log_fail "$name is not ready after $max_attempts attempts" return 1 } # Test health endpoints test_health() { log_test "Testing health endpoints..." # Gateway health if curl -sf "$GATEWAY_URL/health" | grep -q "ok\|healthy"; then log_pass "Gateway health check" else log_fail "Gateway health check" fi # Inference Engine health if curl -sf "$INFERENCE_URL/health" | grep -q "ok\|healthy"; then log_pass "Inference Engine health check" else log_fail "Inference Engine health check" fi # vLLM health (if available) if curl -sf "$VLLM_URL/health" > /dev/null 2>&1; then log_pass "vLLM health check" else log_info "vLLM health check skipped (may not be running)" fi } # Test models endpoint test_models() { log_test "Testing models endpoint..." response=$(curl -sf "$GATEWAY_URL/v1/models") if echo "$response" | grep -q "data"; then log_pass "Models endpoint returns data" # Check for at least one model model_count=$(echo "$response" | grep -o '"id"' | wc -l) if [ "$model_count" -gt 0 ]; then log_pass "Models endpoint returned $model_count model(s)" else log_fail "No models found" fi else log_fail "Models endpoint failed" fi } # Test chat completion test_chat_completion() { log_test "Testing chat completion..." # Get available model model=$(curl -sf "$GATEWAY_URL/v1/models" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4) if [ -z "$model" ]; then log_info "No model available, using default" model="tinyllama" fi log_info "Using model: $model" # Simple chat completion response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \ -H "Content-Type: application/json" \ -d "{ \"model\": \"$model\", \"messages\": [{\"role\": \"user\", \"content\": \"Hello, respond with exactly 'Hi there!'\"}], \"max_tokens\": 50 }" --max-time $TIMEOUT 2>&1) || true if echo "$response" | grep -q "choices"; then log_pass "Chat completion returned response" # Check for content if echo "$response" | grep -q "content"; then log_pass "Chat completion contains content" else log_fail "Chat completion missing content" fi # Check for usage if echo "$response" | grep -q "usage"; then log_pass "Chat completion contains usage stats" else log_fail "Chat completion missing usage stats" fi else log_fail "Chat completion failed: $response" fi } # Test metrics endpoint test_metrics() { log_test "Testing metrics endpoint..." response=$(curl -sf "$INFERENCE_URL/metrics" 2>&1) || true if echo "$response" | grep -q "llm_requests_total\|llm_backend_health"; then log_pass "Metrics endpoint returns Prometheus metrics" else log_info "Metrics endpoint may not have data yet" fi } # Test LoRA endpoints test_lora() { log_test "Testing LoRA endpoints..." # List adapters response=$(curl -sf "$GATEWAY_URL/v1/lora/adapters" 2>&1) || true if echo "$response" | grep -q "adapters\|count"; then log_pass "LoRA adapters endpoint works" else log_info "LoRA endpoint may not be available" fi # LoRA status response=$(curl -sf "$GATEWAY_URL/v1/lora/status" 2>&1) || true if echo "$response" | grep -q "enabled"; then log_pass "LoRA status endpoint works" else log_info "LoRA status endpoint may not be available" fi } # Test error handling test_error_handling() { log_test "Testing error handling..." # Invalid model response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \ -H "Content-Type: application/json" \ -d '{"model": "nonexistent-model-xyz", "messages": [{"role": "user", "content": "test"}]}' \ --max-time $TIMEOUT 2>&1) || response="error" if echo "$response" | grep -qi "error\|not found"; then log_pass "Invalid model returns error" else log_info "Invalid model handling: $response" fi # Invalid request response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \ -H "Content-Type: application/json" \ -d '{"model": "test", "messages": []}' \ --max-time $TIMEOUT 2>&1) || response="error" if echo "$response" | grep -qi "error\|invalid"; then log_pass "Invalid request returns error" else log_info "Invalid request handling: $response" fi } # Print summary print_summary() { echo "" echo "==============================================" echo "Test Summary" echo "==============================================" echo -e "Passed: ${GREEN}$PASSED${NC}" echo -e "Failed: ${RED}$FAILED${NC}" echo "==============================================" if [ $FAILED -gt 0 ]; then echo -e "${RED}Some tests failed!${NC}" exit 1 else echo -e "${GREEN}All tests passed!${NC}" exit 0 fi } # Main main() { echo "==============================================" echo "Local LLM Agent E2E Tests" echo "==============================================" echo "" # Wait for services wait_for_service "$GATEWAY_URL/health" "Gateway" wait_for_service "$INFERENCE_URL/health" "Inference Engine" echo "" # Run tests test_health echo "" test_models echo "" test_chat_completion echo "" test_metrics echo "" test_lora echo "" test_error_handling # Summary print_summary } # Run main main "$@"