local-llm-agent/scripts/test-e2e.sh

#!/bin/bash
# ==============================================================================
# End-to-End Test Script for Local LLM Agent
# ==============================================================================
# Tests the full stack: Gateway -> Inference Engine -> vLLM
#
# Usage:
#   ./scripts/test-e2e.sh
#
# Prerequisites:
#   - Stack running (docker-compose up -d)
#   - curl installed
# ==============================================================================

set -euo pipefail

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# Configuration
GATEWAY_URL="${GATEWAY_URL:-http://localhost:3160}"
INFERENCE_URL="${INFERENCE_URL:-http://localhost:3161}"
VLLM_URL="${VLLM_URL:-http://localhost:8000}"
TIMEOUT=30

# Counters
PASSED=0
FAILED=0

# Test functions
log_test() {
    echo -e "${BLUE}[TEST]${NC} $1"
}

log_pass() {
    echo -e "${GREEN}[PASS]${NC} $1"
    ((PASSED++))
}

log_fail() {
    echo -e "${RED}[FAIL]${NC} $1"
    ((FAILED++))
}

log_info() {
    echo -e "${YELLOW}[INFO]${NC} $1"
}

# Wait for service
wait_for_service() {
    local url=$1
    local name=$2
    local max_attempts=30
    local attempt=1

    log_info "Waiting for $name ($url)..."

    while [ $attempt -le $max_attempts ]; do
        if curl -sf "$url" > /dev/null 2>&1; then
            log_pass "$name is ready"
            return 0
        fi
        sleep 2
        ((attempt++))
    done

    log_fail "$name is not ready after $max_attempts attempts"
    return 1
}

# Test health endpoints
test_health() {
    log_test "Testing health endpoints..."

    # Gateway health
    if curl -sf "$GATEWAY_URL/health" | grep -q "ok\|healthy"; then
        log_pass "Gateway health check"
    else
        log_fail "Gateway health check"
    fi

    # Inference Engine health
    if curl -sf "$INFERENCE_URL/health" | grep -q "ok\|healthy"; then
        log_pass "Inference Engine health check"
    else
        log_fail "Inference Engine health check"
    fi

    # vLLM health (if available)
    if curl -sf "$VLLM_URL/health" > /dev/null 2>&1; then
        log_pass "vLLM health check"
    else
        log_info "vLLM health check skipped (may not be running)"
    fi
}

# Test models endpoint
test_models() {
    log_test "Testing models endpoint..."

    response=$(curl -sf "$GATEWAY_URL/v1/models")

    if echo "$response" | grep -q "data"; then
        log_pass "Models endpoint returns data"

        # Check for at least one model
        model_count=$(echo "$response" | grep -o '"id"' | wc -l)
        if [ "$model_count" -gt 0 ]; then
            log_pass "Models endpoint returned $model_count model(s)"
        else
            log_fail "No models found"
        fi
    else
        log_fail "Models endpoint failed"
    fi
}

# Test chat completion
test_chat_completion() {
    log_test "Testing chat completion..."

    # Get available model
    model=$(curl -sf "$GATEWAY_URL/v1/models" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)

    if [ -z "$model" ]; then
        log_info "No model available, using default"
        model="tinyllama"
    fi

    log_info "Using model: $model"

    # Simple chat completion
    response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \
        -H "Content-Type: application/json" \
        -d "{
            \"model\": \"$model\",
            \"messages\": [{\"role\": \"user\", \"content\": \"Hello, respond with exactly 'Hi there!'\"}],
            \"max_tokens\": 50
        }" --max-time $TIMEOUT 2>&1) || true

    if echo "$response" | grep -q "choices"; then
        log_pass "Chat completion returned response"

        # Check for content
        if echo "$response" | grep -q "content"; then
            log_pass "Chat completion contains content"
        else
            log_fail "Chat completion missing content"
        fi

        # Check for usage
        if echo "$response" | grep -q "usage"; then
            log_pass "Chat completion contains usage stats"
        else
            log_fail "Chat completion missing usage stats"
        fi
    else
        log_fail "Chat completion failed: $response"
    fi
}

# Test metrics endpoint
test_metrics() {
    log_test "Testing metrics endpoint..."

    response=$(curl -sf "$INFERENCE_URL/metrics" 2>&1) || true

    if echo "$response" | grep -q "llm_requests_total\|llm_backend_health"; then
        log_pass "Metrics endpoint returns Prometheus metrics"
    else
        log_info "Metrics endpoint may not have data yet"
    fi
}

# Test LoRA endpoints
test_lora() {
    log_test "Testing LoRA endpoints..."

    # List adapters
    response=$(curl -sf "$GATEWAY_URL/v1/lora/adapters" 2>&1) || true

    if echo "$response" | grep -q "adapters\|count"; then
        log_pass "LoRA adapters endpoint works"
    else
        log_info "LoRA endpoint may not be available"
    fi

    # LoRA status
    response=$(curl -sf "$GATEWAY_URL/v1/lora/status" 2>&1) || true

    if echo "$response" | grep -q "enabled"; then
        log_pass "LoRA status endpoint works"
    else
        log_info "LoRA status endpoint may not be available"
    fi
}

# Test error handling
test_error_handling() {
    log_test "Testing error handling..."

    # Invalid model
    response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \
        -H "Content-Type: application/json" \
        -d '{"model": "nonexistent-model-xyz", "messages": [{"role": "user", "content": "test"}]}' \
        --max-time $TIMEOUT 2>&1) || response="error"

    if echo "$response" | grep -qi "error\|not found"; then
        log_pass "Invalid model returns error"
    else
        log_info "Invalid model handling: $response"
    fi

    # Invalid request
    response=$(curl -sf -X POST "$GATEWAY_URL/v1/chat/completions" \
        -H "Content-Type: application/json" \
        -d '{"model": "test", "messages": []}' \
        --max-time $TIMEOUT 2>&1) || response="error"

    if echo "$response" | grep -qi "error\|invalid"; then
        log_pass "Invalid request returns error"
    else
        log_info "Invalid request handling: $response"
    fi
}

# Print summary
print_summary() {
    echo ""
    echo "=============================================="
    echo "Test Summary"
    echo "=============================================="
    echo -e "Passed: ${GREEN}$PASSED${NC}"
    echo -e "Failed: ${RED}$FAILED${NC}"
    echo "=============================================="

    if [ $FAILED -gt 0 ]; then
        echo -e "${RED}Some tests failed!${NC}"
        exit 1
    else
        echo -e "${GREEN}All tests passed!${NC}"
        exit 0
    fi
}

# Main
main() {
    echo "=============================================="
    echo "Local LLM Agent E2E Tests"
    echo "=============================================="
    echo ""

    # Wait for services
    wait_for_service "$GATEWAY_URL/health" "Gateway"
    wait_for_service "$INFERENCE_URL/health" "Inference Engine"

    echo ""

    # Run tests
    test_health
    echo ""
    test_models
    echo ""
    test_chat_completion
    echo ""
    test_metrics
    echo ""
    test_lora
    echo ""
    test_error_handling

    # Summary
    print_summary
}

# Run main
main "$@"