77 lines
1.9 KiB
YAML
77 lines
1.9 KiB
YAML
# Prometheus configuration for Local LLM Agent
|
|
# ===========================================================================
|
|
|
|
global:
|
|
scrape_interval: 15s
|
|
evaluation_interval: 15s
|
|
external_labels:
|
|
monitor: 'local-llm-agent'
|
|
|
|
# Alertmanager configuration (optional)
|
|
# alerting:
|
|
# alertmanagers:
|
|
# - static_configs:
|
|
# - targets:
|
|
# - alertmanager:9093
|
|
|
|
# Rule files (optional)
|
|
# rule_files:
|
|
# - /etc/prometheus/rules/*.yml
|
|
|
|
# Scrape configurations
|
|
scrape_configs:
|
|
# Prometheus self-monitoring
|
|
- job_name: 'prometheus'
|
|
static_configs:
|
|
- targets: ['localhost:9090']
|
|
labels:
|
|
service: 'prometheus'
|
|
|
|
# Inference Engine metrics
|
|
- job_name: 'inference-engine'
|
|
static_configs:
|
|
- targets: ['inference-engine:3161']
|
|
labels:
|
|
service: 'inference-engine'
|
|
metrics_path: /metrics
|
|
scrape_interval: 10s
|
|
scrape_timeout: 5s
|
|
|
|
# Gateway metrics (if available)
|
|
- job_name: 'gateway'
|
|
static_configs:
|
|
- targets: ['gateway:3160']
|
|
labels:
|
|
service: 'gateway'
|
|
metrics_path: /metrics
|
|
scrape_interval: 10s
|
|
scrape_timeout: 5s
|
|
# Gateway may not have metrics endpoint yet
|
|
honor_labels: true
|
|
|
|
# Node Exporter (system metrics)
|
|
- job_name: 'node-exporter'
|
|
static_configs:
|
|
- targets: ['node-exporter:9100']
|
|
labels:
|
|
service: 'node-exporter'
|
|
|
|
# vLLM metrics (when using vLLM backend)
|
|
- job_name: 'vllm'
|
|
static_configs:
|
|
- targets: ['vllm:8000']
|
|
labels:
|
|
service: 'vllm'
|
|
metrics_path: /metrics
|
|
scrape_interval: 10s
|
|
scrape_timeout: 5s
|
|
# vLLM may not always be available
|
|
honor_labels: true
|
|
|
|
# GPU Exporter (uncomment if using nvidia-gpu-exporter)
|
|
# - job_name: 'nvidia-gpu'
|
|
# static_configs:
|
|
# - targets: ['nvidia-gpu-exporter:9835']
|
|
# labels:
|
|
# service: 'nvidia-gpu'
|