diff --git a/Grafana-Org-Stats.json b/Grafana-Org-Stats.json index 44196b9..b1f35b2 100644 --- a/Grafana-Org-Stats.json +++ b/Grafana-Org-Stats.json @@ -234,4 +234,4 @@ "timezone": "browser", "title": "Org Stats", "version": 0 -} \ No newline at end of file +} diff --git a/README.md b/README.md index 91adb03..b93e01d 100644 --- a/README.md +++ b/README.md @@ -1,63 +1,69 @@ -# Internet Monitoring Docker Stack with Prometheus + Grafana +# Internet Speed Monitoring Stack -Stand-up a Docker [Prometheus](http://prometheus.io/) stack containing Prometheus, Grafana with [blackbox-exporter](https://github.com/prometheus/blackbox_exporter), and [speedtest-exporter](https://github.com/MiguelNdeCarvalho/speedtest-exporter) to collect and graph home Internet reliability and throughput. +A simple and modern tool for monitoring internet speed history and uptime of specified URLs. + +This is all implemented with a simple docker-compose stack with [Prometheus](http://prometheus.io/) + [Grafana](https://grafana.com/) with [blackbox-exporter](https://github.com/prometheus/blackbox_exporter) and [speedtest-exporter](https://github.com/MiguelNdeCarvalho/speedtest-exporter) to collect and graph home Internet reliability and throughput. + +![dashboard-1](https://user-images.githubusercontent.com/20902250/124679414-db054500-deb3-11eb-8757-c578d59366d6.png) +![dashboard-2](https://user-images.githubusercontent.com/20902250/124681501-2f122880-deb8-11eb-86a3-8a990d035215.png) ## Pre-requisites -Make sure Docker and [Docker Compose](https://docs.docker.com/compose/install/) are installed on your Docker host machine. +Make sure [Docker](https://docs.docker.com/engine/install/) and [Docker Compose](https://docs.docker.com/compose/install/) are installed on your Docker host machine. ## Configuration -To change what hosts you ping you change the `targets` section in [/prometheus/pinghosts.yaml](./prometheus/pinghosts.yaml) file. +- To change default grafana credentials, change the `GF_SECURITY_ADMIN_USER` and `GF_SECURITY_ADMIN_PASSWORD` variables in [`./grafana/auth.env`](./grafana/auth.env) file. +- To change what hosts you ping you change the `targets` section in [`./prometheus/blackbox.yaml`](./prometheus/blackbox.yaml) file. +- To change how often speedtest is ran, edit the `scrape_interval` under `speedtest` category in [`./prometheus/prometheus.yml`](./prometheus/prometheus.yml), default duration is 30 minutes, which might be too much if you have limit on downloads. -For speedtest the only relevant configuration is how often you want the check to happen. It is at 30 minutes by default which might be too much if you have limit on downloads. This is changed by editing `scrape_interval` under `speedtest` in [/prometheus/prometheus.yml](./prometheus/prometheus.yml). +## Run the docker containers -## Start the monitoring containers +After you've configured everything, you can simply use docker-compose to spin up all of the needed services: -Use `docker-compose` which handles starting all of the needed containers and specifies how they communicate. - -``` +```sh docker-compose up -d ``` -It will take a while until grafana loads, so be patient. +It will take a while until grafana loads, so be patient, after it does, it will be accessible via: `http://:3000` (default credentials are username: admin, password: admin). -The Grafana Dashboard will now be accessible via: `http://:3030` for example +If all works you should see 2 dashboards after clicking on Search button in the left navbar: -username - admin -password - admin (Password is stored in the `config.monitoring` env file) - -The DataSource and Dashboard for Grafana are automatically provisioned. - -If all works it should be available at - if no data shows up try change the timeduration to something smaller. - -![image](https://user-images.githubusercontent.com/20902250/124660583-ecd7ef80-de95-11eb-89f4-cf4b87644f8b.png) + ## Interesting urls +- speedtest exporter endpoint. Does take about 30 seconds to show its result as it runs an actual speedtest when requested. +- blackbox exporter endpoint. Lets you see what have failed/succeeded. - shows status of monitored targets as seen from prometheus - in this case which hosts being pinged and speedtest. note: speedtest will take a while before it shows as UP as it takes about 30s to respond. - shows prometheus value for `probe_http_status_code` for each host. You can edit/play with additional values. Useful to check everything is okay in prometheus (in case Grafana is not showing the data you expect). -- blackbox exporter endpoint. Lets you see what have failed/succeeded. -- speedtest exporter endpoint. Does take about 30 seconds to show its result as it runs an actual speedtest when requested. ## Editing the gauges -Everyone has a bit different internet speed requirements and different standards of what's considered as fast, for that reason, you can simply click on the name of any of the gauges, and click edit +Everyone has a bit different internet speed requirements and different standards of what's considered as fast, for that reason, you can simply click on the name of any of the gauges, and click edit -An edit menu will show where in the left pannel you can configure the minimum and maximum values shown on the gauge: +An edit menu will show where in the left panel you can configure the minimum and maximum values shown on the gauge: -You can also edit the color thresholds depending on the speeds to your liking (i.e. you can set after which point should the color become red/orange/...). Note that with the download/upload gauges you will need to enter the speed in bytes per second, not megabites! +You can also edit the color thresholds depending on the speeds to your liking (i.e. you can set after which point should the color become red/orange/...). Note that with the download/upload gauges you will need to enter the speed in bits per second, not megabits! +After your editing is done and you're happy with your layout, make sure you save the layout and copy the JSON: + + + + + +Simply click on Copy JSON to clipboard and paste it to [`./grafana/provisioning/dashboards/internet-connection.json`](./grafana/provisioning/dashboards/internet-connection.json). Alternatively, if you're editing the Blackbox probe dashboard, store them in [`probe.json`](./grafana/provisioning/dashboards/probe.json) + ## Thanks and a disclaimer -- Thanks to @maxandersen for making the original project this fork is based on. -- Thanks to @vegasbrianc work on making a [super easy docker](https://github.com/vegasbrianc/github-monitoring) stack for running prometheus and grafana. -- Thanks to @geerlingguy for perfecting the original project. +- Thanks to @maxandersen for making the [original project](https://github.com/maxandersen/internet-monitoring) this fork is based on. +- Thanks to @geerlingguy for perfecting the original project with a [custom fork](https://github.com/geerlingguy/internet-pi/tree/master/internet-monitoring). +- Thanks to @goddenrich for perfecting the original project with a [custom fork](https://github.com/goddenrich/local-network-monitoring) This setup is not secured in any way, so please only use on non-public networks, or find a way to secure it on your own. diff --git a/blackbox/config/blackbox.yml b/blackbox/config/blackbox.yml index 4e94827..23fb8ed 100644 --- a/blackbox/config/blackbox.yml +++ b/blackbox/config/blackbox.yml @@ -13,7 +13,7 @@ modules: prober: tcp tcp: query_response: - - expect: "^+OK" + - expect: "^+OK" tls: true tls_config: insecure_skip_verify: false @@ -21,15 +21,15 @@ modules: prober: tcp tcp: query_response: - - expect: "^SSH-2.0-" + - expect: "^SSH-2.0-" irc_banner: prober: tcp tcp: query_response: - - send: "NICK prober" - - send: "USER prober prober prober :prober" - - expect: "PING :([^ ]+)" - send: "PONG ${1}" - - expect: "^:[^ ]+ 001" + - send: "NICK prober" + - send: "USER prober prober prober :prober" + - expect: "PING :([^ ]+)" + send: "PONG ${1}" + - expect: "^:[^ ]+ 001" icmp: prober: icmp diff --git a/docker-compose.yml b/docker-compose.yml index d04f722..c4845cb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,7 +23,7 @@ services: ports: - 9090:9090 links: - - ping:ping + - blackbox:blackbox - speedtest:speedtest networks: - back-tier @@ -37,14 +37,15 @@ services: depends_on: - prometheus ports: - - 3030:3000 + - 3000:3000 env_file: - ./grafana/config.monitoring + - ./grafana/auth.env networks: - back-tier - front-tier - ping: + blackbox: tty: true stdin_open: true expose: @@ -71,21 +72,3 @@ services: restart: always networks: - back-tier - - nodeexp: - privileged: true - image: prom/node-exporter - volumes: - - /proc:/host/proc:ro - - /sys:/host/sys:ro - - /:/rootfs:ro - ports: - - 9100:9100 - restart: always - command: - - '--path.procfs=/host/proc' - - '--path.sysfs=/host/sys' - - --collector.filesystem.ignored-mount-points - - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" - networks: - - back-tier diff --git a/grafana/auth.env b/grafana/auth.env new file mode 100644 index 0000000..1b48124 --- /dev/null +++ b/grafana/auth.env @@ -0,0 +1,2 @@ +GF_SECURITY_ADMIN_USER=admin +GF_SECURITY_ADMIN_PASSWORD=admin diff --git a/grafana/config.monitoring b/grafana/config.monitoring index 2f2c8a8..51ba4c2 100644 --- a/grafana/config.monitoring +++ b/grafana/config.monitoring @@ -1,3 +1,2 @@ -GF_SECURITY_ADMIN_PASSWORD=admin GF_USERS_ALLOW_SIGN_UP=false GF_INSTALL_PLUGINS=flant-statusmap-panel,ae3e-plotly-panel diff --git a/grafana/provisioning/dashboards/internet-connection.json b/grafana/provisioning/dashboards/internet-connection.json index 80c7e60..3b0034d 100644 --- a/grafana/provisioning/dashboards/internet-connection.json +++ b/grafana/provisioning/dashboards/internet-connection.json @@ -17,7 +17,6 @@ "editable": true, "gnetId": null, "graphTooltip": 0, - "id": 1, "links": [], "panels": [ { @@ -71,7 +70,7 @@ "overrides": [] }, "gridPos": { - "h": 9, + "h": 8, "w": 6, "x": 0, "y": 0 @@ -154,7 +153,7 @@ "overrides": [] }, "gridPos": { - "h": 9, + "h": 8, "w": 6, "x": 6, "y": 0 @@ -238,8 +237,8 @@ "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 8, + "w": 6, "x": 12, "y": 0 }, @@ -263,6 +262,7 @@ "pluginVersion": "8.0.4", "targets": [ { + "exemplar": true, "expr": "speedtest_ping_latency_milliseconds", "format": "time_series", "instant": false, @@ -275,6 +275,92 @@ "title": "Speedtest Ping", "type": "gauge" }, + { + "cacheTimeout": null, + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 20, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "yellow", + "value": 3 + }, + { + "color": "orange", + "value": 8 + }, + { + "color": "red", + "value": 15 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 50000, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.0.4", + "targets": [ + { + "exemplar": true, + "expr": "speedtest_jitter_latency_milliseconds", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Speedtest Jitter", + "type": "gauge" + }, { "datasource": "prometheus", "fieldConfig": { @@ -330,10 +416,10 @@ "overrides": [] }, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 0, - "y": 9 + "y": 8 }, "id": 6, "links": [], @@ -373,6 +459,102 @@ "title": "Speedtest Graph", "type": "timeseries" }, + { + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 50, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dtdurationms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 16, + "links": [], + "maxDataPoints": 50000, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "7.4.5", + "targets": [ + { + "exemplar": true, + "expr": "speedtest_ping_latency_milliseconds{}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Ping", + "refId": "A" + }, + { + "exemplar": true, + "expr": "speedtest_jitter_latency_milliseconds{}", + "hide": false, + "interval": "", + "legendFormat": "Jitter", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Speedtest Graph", + "type": "timeseries" + }, { "cards": { "cardHSpacing": 2, @@ -406,10 +588,10 @@ }, "datasource": "prometheus", "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 9 + "h": 6, + "w": 24, + "x": 0, + "y": 15 }, "highlightCards": true, "id": 14, @@ -459,100 +641,6 @@ "labelTemplate": "", "usingSplitLabel": false } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 18 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.4", - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "host", - "repeatDirection": "h", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(probe_http_duration_seconds) by (instance)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "HTTP Request Duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } } ], "refresh": "5m", @@ -566,7 +654,7 @@ "list": [] }, "time": { - "from": "now-12h", + "from": "now-5m", "to": "now" }, "timepicker": { @@ -597,5 +685,5 @@ "timezone": "browser", "title": "Internet connection", "uid": "NLDu4qznk", - "version": 3 + "version": 2 } diff --git a/grafana/provisioning/dashboards/probe.json b/grafana/provisioning/dashboards/probe.json new file mode 100644 index 0000000..aed8fc5 --- /dev/null +++ b/grafana/provisioning/dashboards/probe.json @@ -0,0 +1,610 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Graph ICMP metrics from the blackbox exporter, Smokeping-style", + "editable": true, + "gnetId": 12412, + "graphTooltip": 0, + "iteration": 1603722601260, + "links": [], + "panels": [ + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateRdYlGn", + "exponent": 0.5, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 7, + "legend": { + "show": true + }, + "reverseYBuckets": true, + "targets": [ + { + "expr": "sum(probe_http_duration_seconds) by (instance)", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ICMP RTT", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "middle", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateRdYlGn", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 8, + "legend": { + "show": true + }, + "reverseYBuckets": true, + "targets": [ + { + "expr": "1-avg_over_time(probe_success{instance=~\"$instance\"}[$__interval])", + "format": "time_series", + "hide": false, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ICMP packet loss", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "percentunit", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "middle", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": null, + "description": "This uses the blackbox exporter, which does not expose packet loss, for example. It could be improved with https://github.com/SuperQ/smokeping_prober because it also keeps track of lost samples (https://github.com/SuperQ/smokeping_prober/issues/24). Unfortunately, that still won't make graphs as nice as smokeping, because each probe only keeps one sample, instead of doing multiple like smokeping does (https://github.com/SuperQ/smokeping_prober/issues/36).", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "maxPerRow": 2, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.1", + "pointradius": 0.5, + "points": false, + "renderer": "flot", + "repeat": "instance", + "repeatDirection": "v", + "scopedVars": { + "instance": { + "selected": false, + "text": "github.com", + "value": "github.com" + } + }, + "seriesOverrides": [ + { + "alias": "packet loss", + "color": "#C4162A", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum(probe_http_duration_seconds{instance=~\"$instance\"}) by (instance) > 0", + "instant": false, + "interval": "", + "legendFormat": "RTT", + "refId": "A" + }, + { + "expr": "1-avg_over_time(probe_success{instance=~\"$instance\"}[$__interval])", + "format": "time_series", + "legendFormat": "packet loss", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ICMP round trip time ($instance)", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "dtdurations", + "label": "RTT", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "percentunit", + "label": "packet loss", + "logBase": 1, + "max": "1", + "min": "0.0001", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": null, + "description": "This uses the blackbox exporter, which does not expose packet loss, for example. It could be improved with https://github.com/SuperQ/smokeping_prober because it also keeps track of lost samples (https://github.com/SuperQ/smokeping_prober/issues/24). Unfortunately, that still won't make graphs as nice as smokeping, because each probe only keeps one sample, instead of doing multiple like smokeping does (https://github.com/SuperQ/smokeping_prober/issues/36).", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 23 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "maxPerRow": 2, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.1", + "pointradius": 0.5, + "points": false, + "renderer": "flot", + "repeatDirection": "v", + "repeatIteration": 1603722601260, + "repeatPanelId": 2, + "scopedVars": { + "instance": { + "selected": false, + "text": "google.com", + "value": "google.com" + } + }, + "seriesOverrides": [ + { + "alias": "packet loss", + "color": "#C4162A", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum(probe_http_duration_seconds{instance=~\"$instance\"}) by (instance) > 0", + "instant": false, + "interval": "", + "legendFormat": "RTT", + "refId": "A" + }, + { + "expr": "1-avg_over_time(probe_success{instance=~\"$instance\"}[$__interval])", + "format": "time_series", + "legendFormat": "packet loss", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ICMP round trip time ($instance)", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "dtdurations", + "label": "RTT", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "percentunit", + "label": "packet loss", + "logBase": 1, + "max": "1", + "min": "0.0001", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": null, + "description": "This uses the blackbox exporter, which does not expose packet loss, for example. It could be improved with https://github.com/SuperQ/smokeping_prober because it also keeps track of lost samples (https://github.com/SuperQ/smokeping_prober/issues/24). Unfortunately, that still won't make graphs as nice as smokeping, because each probe only keeps one sample, instead of doing multiple like smokeping does (https://github.com/SuperQ/smokeping_prober/issues/36).", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "maxPerRow": 2, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.1", + "pointradius": 0.5, + "points": false, + "renderer": "flot", + "repeatDirection": "v", + "repeatIteration": 1603722601260, + "repeatPanelId": 2, + "scopedVars": { + "instance": { + "selected": false, + "text": "https://192.168.1.1:8443", + "value": "https://192.168.1.1:8443" + } + }, + "seriesOverrides": [ + { + "alias": "packet loss", + "color": "#C4162A", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum(probe_http_duration_seconds{instance=~\"$instance\"}) by (instance) > 0", + "instant": false, + "interval": "", + "legendFormat": "RTT", + "refId": "A" + }, + { + "expr": "1-avg_over_time(probe_success{instance=~\"$instance\"}[$__interval])", + "format": "time_series", + "legendFormat": "packet loss", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ICMP round trip time ($instance)", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "dtdurations", + "label": "RTT", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "percentunit", + "label": "packet loss", + "logBase": 1, + "max": "1", + "min": "0.0001", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "prometheus", + "definition": "label_values(probe_success, instance)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "instance", + "options": [], + "query": "label_values(probe_success, instance)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Blackbox probe", + "uid": "oe-msimGz", + "version": 1 +} diff --git a/prometheus/pinghosts.yaml b/prometheus/blackbox.yaml similarity index 78% rename from prometheus/pinghosts.yaml rename to prometheus/blackbox.yaml index 18f0122..5bbcff7 100644 --- a/prometheus/pinghosts.yaml +++ b/prometheus/blackbox.yaml @@ -2,4 +2,4 @@ - https://google.com/;Google;external - https://github.com/;GitHub;external - https://ping.archlinux.org/;ArchLinux;external - - https://192.168.0.107/;Proxmox;internal + - http://192.168.0.1:1024/;Router;internal diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index 45a85d8..6a6efec 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -21,14 +21,14 @@ scrape_configs: static_configs: - targets: ['speedtest:9798'] - - job_name: 'ping' + - job_name: 'blackbox' metrics_path: /probe - scrape_interval: 5s # How often should we probe the pinghosts + scrape_interval: 15s # How often should we probe the pinghosts params: module: [http_2xx] # Look for a HTTP 200 response. file_sd_configs: - files: - - pinghosts.yaml + - blackbox.yaml relabel_configs: - source_labels: [__address__] regex: '(.*);(.*);(.*)' # first is the url, thus unique for instance @@ -45,8 +45,4 @@ scrape_configs: - source_labels: [instance] target_label: __param_target - target_label: __address__ - replacement: ping:9115 # The blackbox exporter's real hostname:port. - - - job_name: 'nodeexp' - static_configs: - - targets: ['nodeexp:9100'] + replacement: blackbox:9115 # The blackbox exporter's real hostname:port.