diff --git a/docker-compose.yml b/docker-compose.yml index 57b8e83..0fbb6b9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -version: "2" +version: "3.1" volumes: prometheus_data: {} @@ -6,22 +6,19 @@ volumes: networks: front-tier: - driver: bridge back-tier: - driver: bridge services: prometheus: - image: prom/prometheus - container_name: prometheus + image: prom/prometheus:v2.0.0 volumes: - ./prometheus/:/etc/prometheus/ - prometheus_data:/prometheus command: - - '-config.file=/etc/prometheus/prometheus.yml' - - '-storage.local.path=/prometheus' - expose: - - 9090 + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' ports: - 9090:9090 links: diff --git a/prometheus/alert.rules b/prometheus/alert.rules index 1da000a..15f28c1 100644 --- a/prometheus/alert.rules +++ b/prometheus/alert.rules @@ -1,13 +1,22 @@ -ALERT service_down - IF up == 0 - ANNOTATIONS { - summary = "Instance {{ $labels.instance }} is down :( ", - description = "{{ $labels.instance }} of job {{ $labels.job }} is not happy.", - } - -ALERT high_load - IF node_load1 > 0.5 - ANNOTATIONS { - summary = "Instance {{ $labels.instance }} under high load", - description = "{{ $labels.instance }} of job {{ $labels.job }} is under high load.", - } +groups: +- name: example + rules: + + # Alert for any instance that is unreachable for >5 minutes. + - alert: service_down + expr: up == 0 + for: 2m + labels: + severity: page + annotations: + summary: "Instance {{ $labels.instance }} down" + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes." + + - alert: high_load + expr: node_load1 > 0.5 + for: 2m + labels: + severity: page + annotations: + summary: "Instance {{ $labels.instance }} under high load" + description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load." diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index b2e792d..ae6ffd7 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -11,21 +11,27 @@ global: # Load and evaluate rules in this file every 'evaluation_interval' seconds. rule_files: - - "alert.rules" + - 'alert.rules' # - "first.rules" # - "second.rules" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: - # The job name is added as a label `job=` to any timeseries scraped from this config. - - job_name: 'Monitoring_Mayhem' + + - job_name: 'prometheus' # Override the global default and scrape targets from this job every 5 seconds. -# scrape_interval: 5s - - # metrics_path defaults to '/metrics' - # scheme defaults to 'http'. + scrape_interval: 5s static_configs: - - targets: ['localhost:9090', 'metrics:9171'] + - targets: ['localhost:9090'] + + + - job_name: 'metrics' + + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + + static_configs: + - targets: ['metrics:9171']