# Local LLM Agent - Monitoring Stack (Prometheus + Grafana) # =========================================================================== # # Add this to your existing compose setup for monitoring: # docker-compose -f docker-compose.yml -f docker-compose.monitoring.yml up -d # # For vLLM production: # docker-compose -f docker-compose.vllm.yml -f docker-compose.monitoring.yml up -d # # =========================================================================== services: # Prometheus - Metrics Collection prometheus: image: prom/prometheus:v2.48.0 container_name: local-llm-prometheus ports: - "9090:9090" volumes: - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro - prometheus-data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=30d' - '--web.enable-lifecycle' - '--web.enable-admin-api' networks: - llm-network restart: unless-stopped healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy"] interval: 30s timeout: 10s retries: 3 # Grafana - Visualization grafana: image: grafana/grafana:10.2.0 container_name: local-llm-grafana ports: - "3000:3000" environment: - GF_SECURITY_ADMIN_USER=admin - GF_SECURITY_ADMIN_PASSWORD=admin - GF_USERS_ALLOW_SIGN_UP=false - GF_SERVER_ROOT_URL=http://localhost:3000 - GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/var/lib/grafana/dashboards/local-llm.json volumes: - grafana-data:/var/lib/grafana - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro - ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro depends_on: prometheus: condition: service_healthy networks: - llm-network restart: unless-stopped healthcheck: test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3000/api/health || exit 1"] interval: 30s timeout: 10s retries: 3 # Node Exporter - System Metrics (optional) node-exporter: image: prom/node-exporter:v1.7.0 container_name: local-llm-node-exporter ports: - "9100:9100" volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro command: - '--path.procfs=/host/proc' - '--path.sysfs=/host/sys' - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' networks: - llm-network restart: unless-stopped # NVIDIA GPU Exporter (for GPU metrics, requires nvidia-container-toolkit) # Uncomment if using GPU # nvidia-gpu-exporter: # image: utkuozdemir/nvidia_gpu_exporter:1.2.0 # container_name: local-llm-gpu-exporter # ports: # - "9835:9835" # deploy: # resources: # reservations: # devices: # - driver: nvidia # count: all # capabilities: [gpu] # networks: # - llm-network # restart: unless-stopped networks: llm-network: external: true volumes: prometheus-data: name: local-llm-prometheus-data grafana-data: name: local-llm-grafana-data # ============================================================================ # Usage: # ============================================================================ # # 1. Start monitoring with development stack: # docker-compose -f docker-compose.yml -f docker-compose.monitoring.yml up -d # # 2. Start monitoring with vLLM stack: # docker-compose -f docker-compose.vllm.yml -f docker-compose.monitoring.yml up -d # # 3. Access: # - Prometheus: http://localhost:9090 # - Grafana: http://localhost:3000 (admin/admin) # - Node Exporter: http://localhost:9100/metrics # # 4. Check metrics endpoint: # curl http://localhost:3161/metrics # # 5. Useful Prometheus queries: # - Request rate: rate(llm_requests_total[5m]) # - P99 latency: histogram_quantile(0.99, rate(llm_request_duration_seconds_bucket[5m])) # - Error rate: rate(llm_errors_total[5m]) # - Tokens per second: rate(llm_completion_tokens_total[5m]) # # ============================================================================