diff --git a/discourse-mixin/alerts.libsonnet b/discourse-mixin/alerts.libsonnet index 2501cf355..6111038ca 100644 --- a/discourse-mixin/alerts.libsonnet +++ b/discourse-mixin/alerts.libsonnet @@ -24,7 +24,7 @@ { alert: 'DiscourseHigh4xxErrors', expr: ||| - 100 * rate(discourse_http_requests{status=~"^4.*"}[5m]) / on() group_left() (sum(rate(discourse_http_requests[5m])) by (instance)) > %(alertsWarning4xxResponses)s + 100 * rate(discourse_http_requests{status=~"4..", %(filteringSelector)s}[5m]) / on() group_left() (sum(rate(discourse_http_requests[5m])) by (instance)) > %(alertsWarning4xxResponses)s ||| % this.config, 'for': '5m', labels: { diff --git a/discourse-mixin/config.libsonnet b/discourse-mixin/config.libsonnet index d49cc5015..182103ff8 100644 --- a/discourse-mixin/config.libsonnet +++ b/discourse-mixin/config.libsonnet @@ -2,7 +2,7 @@ local this = self, // Filtering - filteringSelector: 'job=~"$job", instance=~"$instance"', + filteringSelector: 'job="integrations/discourse"', groupLabels: ['job'], instanceLabels: ['instance'], @@ -26,9 +26,7 @@ // Signal categories signals: { - http: (import './signals/http.libsonnet')(this), - requests: (import './signals/requests.libsonnet')(this), + overview: (import './signals/overview.libsonnet')(this), jobs: (import './signals/jobs.libsonnet')(this), - memory: (import './signals/memory.libsonnet')(this), }, } diff --git a/discourse-mixin/dashboards_out/discourse-jobs.json b/discourse-mixin/dashboards_out/discourse-jobs.json index ddcac75d4..b5a39fb50 100644 --- a/discourse-mixin/dashboards_out/discourse-jobs.json +++ b/discourse-mixin/dashboards_out/discourse-jobs.json @@ -23,18 +23,19 @@ }, "id": 1, "panels": [ ], - "title": "Job Statistics", + "title": "Job statistics", "type": "row" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Current number of Sidekiq Workers.", "fieldConfig": { "defaults": { "color": { + "fixedColor": "text", "mode": "thresholds" }, "mappings": [ @@ -74,25 +75,26 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "count(discourse_rss{type=\"sidekiq\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"})", + "expr": "count(discourse_rss{type=\"sidekiq\",job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "instant": false, - "legendFormat": "", + "legendFormat": "{{job}}: Sidekiq worker count", "refId": "Sidekiq worker count" } ], - "title": "Sidekiq Workers", + "title": "Sidekiq workers", "type": "stat" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Current number of Web Workers.", "fieldConfig": { "defaults": { "color": { + "fixedColor": "text", "mode": "thresholds" }, "mappings": [ @@ -132,25 +134,26 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "count(discourse_rss{type=\"web\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"})", + "expr": "count(discourse_rss{type=\"web\",job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "instant": false, - "legendFormat": "", + "legendFormat": "{{job}}: Web worker count", "refId": "Web worker count" } ], - "title": "Web Workers", + "title": "Web workers", "type": "stat" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Current number of jobs in Sidekiq queue.", "fieldConfig": { "defaults": { "color": { + "fixedColor": "text", "mode": "thresholds" }, "mappings": [ @@ -190,14 +193,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "max by (job) (\n discourse_sidekiq_jobs_enqueued{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", + "expr": "max by (job) (\n discourse_sidekiq_jobs_enqueued{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", "refId": "Sidekiq jobs enqueued" } ], - "title": "Sidekiq Queued", + "title": "Sidekiq queued", "type": "stat" }, { @@ -210,15 +213,27 @@ }, "id": 5, "panels": [ ], - "title": "Job Counts", + "title": "Job counts", "type": "row" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The number of scheduled jobs ran over an interval.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "none" + } + }, "gridPos": { "h": 6, "w": 12, @@ -226,6 +241,16 @@ "y": 7 }, "id": 6, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -233,22 +258,31 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,job_name) (\n rate(discourse_scheduled_job_count{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", + "expr": "sum by (job) (\n increase(discourse_scheduled_job_count{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", "instant": false, "legendFormat": "{{job_name}}", "refId": "Scheduled job count" } ], - "title": "Scheduled Jobs", + "title": "Scheduled jobs", "type": "timeseries" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The amount of sidekiq jobs ran over an interval.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "text", + "mode": "fixed" + }, + "unit": "none" + } + }, "gridPos": { "h": 6, "w": 12, @@ -256,6 +290,15 @@ "y": 7 }, "id": 7, + "options": { + "colorMode": "fixed", + "graphMode": "none", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -263,15 +306,15 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,job_name) (\n rate(discourse_sidekiq_job_count{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", + "expr": "sum by (job) (\n increase(discourse_sidekiq_job_count{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", "instant": false, "legendFormat": "{{job_name}}", "refId": "Sidekiq job count" } ], - "title": "Sidekiq Jobs", - "type": "timeseries" + "title": "Sidekiq jobs", + "type": "stat" }, { "collapsed": false, @@ -283,23 +326,20 @@ }, "id": 8, "panels": [ ], - "title": "Job Duration", + "title": "Duration", "type": "row" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Time spent in scheduled jobs broken out by job name.", "fieldConfig": { "defaults": { - "custom": { - "fillOpacity": 30, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "color": { + "fixedColor": "text", + "mode": "fixed" }, "unit": "s" } @@ -311,12 +351,6 @@ "y": 14 }, "id": 9, - "options": { - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, "pluginVersion": "v11.0.0", "targets": [ { @@ -324,30 +358,27 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,job_name) (\n rate(discourse_scheduled_job_duration_seconds{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", + "expr": "sum by (job,job_name) (\n rate(discourse_scheduled_job_duration_seconds{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", "instant": false, "legendFormat": "{{job_name}}", "refId": "Scheduled job duration" } ], - "title": "Scheduled Job Duration", - "type": "timeseries" + "title": "Scheduled job duration", + "type": "stat" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Time spent in Sidekiq jobs broken out by job name.", "fieldConfig": { "defaults": { - "custom": { - "fillOpacity": 30, - "showPoints": "never", - "stacking": { - "mode": "normal" - } + "color": { + "fixedColor": "text", + "mode": "fixed" }, "unit": "s" } @@ -359,12 +390,6 @@ "y": 14 }, "id": 10, - "options": { - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, "pluginVersion": "v11.0.0", "targets": [ { @@ -372,15 +397,15 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,job_name) (\n rate(discourse_sidekiq_job_duration_seconds{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", + "expr": "sum by (job,job_name) (\n rate(discourse_sidekiq_job_duration_seconds{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", "instant": false, "legendFormat": "{{job_name}}", "refId": "Sidekiq job duration" } ], - "title": "Sidekiq Job Duration", - "type": "timeseries" + "title": "Sidekiq job duration", + "type": "stat" }, { "collapsed": false, @@ -397,12 +422,19 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Total RSS Memory used by process. Broken up by pid.", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "bytes" } }, @@ -413,6 +445,16 @@ "y": 21 }, "id": 12, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -420,24 +462,31 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,pid) (\n discourse_rss{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", + "expr": "sum by (job) (\n discourse_rss{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", "instant": false, "legendFormat": "pid: {{pid}}", "refId": "RSS memory" } ], - "title": "Used RSS Memory", + "title": "Used RSS memory", "type": "timeseries" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Current heap size of V8 engine. Broken up by process type", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "bytes" } }, @@ -448,6 +497,16 @@ "y": 21 }, "id": 13, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -455,14 +514,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,type) (\n discourse_v8_used_heap_size{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", + "expr": "sum by (job) (\n discourse_v8_used_heap_size{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", "instant": false, "legendFormat": "{{type}}", "refId": "V8 heap size" } ], - "title": "V8 Heap Size", + "title": "V8 heap size", "type": "timeseries" } ], @@ -490,7 +549,7 @@ "label": "Job", "multi": true, "name": "job", - "query": "label_values(discourse_page_views{job=~\"$job\", instance=~\"$instance\"}, job)", + "query": "label_values(discourse_page_views{job=\"integrations/discourse\"}, job)", "refresh": 2, "sort": 1, "type": "query" @@ -505,7 +564,7 @@ "label": "Instance", "multi": true, "name": "instance", - "query": "label_values(discourse_page_views{job=~\"$job\", instance=~\"$instance\",job=~\"$job\"}, instance)", + "query": "label_values(discourse_page_views{job=\"integrations/discourse\",job=~\"$job\"}, instance)", "refresh": 2, "sort": 1, "type": "query" diff --git a/discourse-mixin/dashboards_out/discourse-overview.json b/discourse-mixin/dashboards_out/discourse-overview.json index 9106176d3..f127c2ba0 100644 --- a/discourse-mixin/dashboards_out/discourse-overview.json +++ b/discourse-mixin/dashboards_out/discourse-overview.json @@ -28,12 +28,21 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Rate of HTTP traffic over time for the entire application. Grouped by response code.", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "noValue": "No traffic", "unit": "reqps" } }, @@ -44,6 +53,16 @@ "y": 1 }, "id": 2, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -51,24 +70,33 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,status) (\n rate(discourse_http_requests{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", + "expr": "sum by (job,status) (\n rate(discourse_http_requests{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", "instant": false, - "legendFormat": "{{status}}", + "legendFormat": "{{ api }} - {{ verb }} - {{ status }}", "refId": "HTTP requests" } ], - "title": "Traffic by Response Code", + "title": "Traffic by response code", "type": "timeseries" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Active web requests for the entire application", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "noValue": "No traffic", "unit": "reqps" } }, @@ -79,6 +107,16 @@ "y": 1 }, "id": 3, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -86,24 +124,33 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "discourse_active_app_reqs{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}", + "expr": "sum by (job) (\n discourse_active_app_reqs{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", "refId": "Active requests" } ], - "title": "Active Requests", + "title": "Active requests", "type": "timeseries" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Queued web requests for the entire application.", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "noValue": "No traffic", "unit": "reqps" } }, @@ -114,6 +161,16 @@ "y": 7 }, "id": 4, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -121,36 +178,34 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "discourse_queued_app_reqs{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}", + "expr": "sum by (job) (\n discourse_queued_app_reqs{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", "refId": "Queued requests" } ], - "title": "Queued Requests", + "title": "Queued requests", "type": "timeseries" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "Rate of pageviews for the entire application. Grouped by type and service.", "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "unit": "views/sec" + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "noValue": "No traffic", + "unit": "none" } }, "gridPos": { @@ -160,6 +215,16 @@ "y": 7 }, "id": 5, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -167,14 +232,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "rate(discourse_page_views{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "sum by (job) (\n rate(discourse_page_views{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", "instant": false, "legendFormat": "{{instance}}", "refId": "Page views" } ], - "title": "Page Views", + "title": "Page views", "type": "timeseries" }, { @@ -192,12 +257,21 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The median amount of time for \"latest\" page requests for the selected site.", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "noValue": "No packets", "unit": "s" } }, @@ -208,6 +282,16 @@ "y": 14 }, "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -215,24 +299,33 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,controller) (\n discourse_http_duration_seconds{quantile=\"0.5\",action=\"latest\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", + "expr": "sum by (job) (\n discourse_http_duration_seconds{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\",quantile=\"0.5\",action=\"latest\"}\n)", "format": "time_series", "instant": false, - "legendFormat": "{{controller}}", + "legendFormat": "{{ instance }}", "refId": "Latest median request time" } ], - "title": "Latest Median Request Time", + "title": "Latest median request time", "type": "timeseries" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The median amount of time for \"topics show\" requests for the selected site.", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "noValue": "No packets", "unit": "s" } }, @@ -243,6 +336,16 @@ "y": 14 }, "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -250,24 +353,33 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,controller) (\n discourse_http_duration_seconds{quantile=\"0.5\",controller=\"topics\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", + "expr": "sum by (job) (\n discourse_http_duration_seconds{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\",quantile=\"0.5\",controller=\"topics\"}\n)", "format": "time_series", "instant": false, - "legendFormat": "{{controller}}", + "legendFormat": "{{ instance }}", "refId": "Topic median request time" } ], - "title": "Topic Show Median Request Time", + "title": "Topic show median request time", "type": "timeseries" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The 99th percentile amount of time for \"latest\" page requests for the selected site.", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "noValue": "No packets", "unit": "s" } }, @@ -278,6 +390,16 @@ "y": 20 }, "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -285,24 +407,33 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,controller) (\n discourse_http_duration_seconds{quantile=\"0.99\",action=\"latest\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", + "expr": "sum by (job) (\n discourse_http_duration_seconds{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\",quantile=\"0.99\",action=\"latest\"}\n)", "format": "time_series", "instant": false, - "legendFormat": "{{controller}}", + "legendFormat": "{{ instance }}", "refId": "Latest 99th percentile request time" } ], - "title": "Latest 99th percentile Request Time", + "title": "Latest 99th percentile request time", "type": "timeseries" }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${datasource}" }, "description": "The 99th percentile amount of time for \"topics show\" requests for the selected site.", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "noValue": "No packets", "unit": "s" } }, @@ -313,6 +444,16 @@ "y": 20 }, "id": 10, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -320,14 +461,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum by (job,controller) (\n discourse_http_duration_seconds{quantile=\"0.99\",controller=\"topics\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", + "expr": "sum by (job) (\n discourse_http_duration_seconds{job=\"integrations/discourse\",job=~\"$job\",instance=~\"$instance\",quantile=\"0.99\",controller=\"topics\"}\n)", "format": "time_series", "instant": false, - "legendFormat": "{{controller}}", + "legendFormat": "{{ instance }}", "refId": "Topic 99th percentile request time" } ], - "title": "Topic Show 99th percentile Request Time", + "title": "Topic show 99th percentile request time", "type": "timeseries" } ], @@ -355,7 +496,7 @@ "label": "Job", "multi": true, "name": "job", - "query": "label_values(discourse_page_views{job=~\"$job\", instance=~\"$instance\"}, job)", + "query": "label_values(discourse_page_views{job=\"integrations/discourse\"}, job)", "refresh": 2, "sort": 1, "type": "query" @@ -370,7 +511,7 @@ "label": "Instance", "multi": true, "name": "instance", - "query": "label_values(discourse_page_views{job=~\"$job\", instance=~\"$instance\",job=~\"$job\"}, instance)", + "query": "label_values(discourse_page_views{job=\"integrations/discourse\",job=~\"$job\"}, instance)", "refresh": 2, "sort": 1, "type": "query" diff --git a/discourse-mixin/links.libsonnet b/discourse-mixin/links.libsonnet index 8108fea20..c4bea87c6 100644 --- a/discourse-mixin/links.libsonnet +++ b/discourse-mixin/links.libsonnet @@ -3,11 +3,11 @@ local g = import './g.libsonnet'; { new(this): { overview: - g.dashboard.link.link.new('Discourse overview', '/d/' + this.config.uid + '-overview') + g.dashboard.link.link.new(this.config.dashboardNamePrefix + ' overview', '/d/' + this.config.uid + '-overview') + g.dashboard.link.link.options.withKeepTime(true), jobs: - g.dashboard.link.link.new('Discourse jobs', '/d/' + this.config.uid + '-jobs') + g.dashboard.link.link.new(this.config.dashboardNamePrefix + ' jobs', '/d/' + this.config.uid + '-jobs') + g.dashboard.link.link.options.withKeepTime(true), }, } diff --git a/discourse-mixin/main.libsonnet b/discourse-mixin/main.libsonnet index 32c94c6ff..1b7dd4ce8 100644 --- a/discourse-mixin/main.libsonnet +++ b/discourse-mixin/main.libsonnet @@ -23,6 +23,14 @@ local commonlib = import 'common-lib/common/main.libsonnet'; }, grafana: { + variables: commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='discourse_page_views', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ), links: (import './links.libsonnet').new(this), panels: (import './panels.libsonnet').new(this), rows: (import './rows.libsonnet').new(this), @@ -31,6 +39,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; prometheus: { alerts: alerts.new(this), + recordingRules: {}, }, }, } diff --git a/discourse-mixin/mixin.libsonnet b/discourse-mixin/mixin.libsonnet index b28c41414..cba9e1794 100644 --- a/discourse-mixin/mixin.libsonnet +++ b/discourse-mixin/mixin.libsonnet @@ -1,15 +1,16 @@ +local config = import './config.libsonnet'; local lib = import './main.libsonnet'; local discourse = lib.new() + lib.withConfigMixin({ - // Override defaults if needed + filteringSelector: config.filteringSelector, + uid: config.uid, + enableLokiLogs: config.enableLokiLogs, }); { grafanaDashboards+:: discourse.grafana.dashboards, prometheusAlerts+:: discourse.prometheus.alerts, - prometheusRules+:: { - groups+: [], - }, + prometheusRules+:: discourse.prometheus.recordingRules, } diff --git a/discourse-mixin/panels.libsonnet b/discourse-mixin/panels.libsonnet index 210f887d4..d3e3feec2 100644 --- a/discourse-mixin/panels.libsonnet +++ b/discourse-mixin/panels.libsonnet @@ -7,139 +7,91 @@ local commonlib = import 'common-lib/common/main.libsonnet'; // Overview dashboard panels trafficByResponseCode: - g.panel.timeSeries.new('Traffic by Response Code') + commonlib.panels.network.timeSeries.traffic.new( + 'Traffic by response code', + targets=[signals.overview.httpRequests.asTarget()] + ) + g.panel.timeSeries.panelOptions.withDescription('Rate of HTTP traffic over time for the entire application. Grouped by response code.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.http.httpRequests.asTarget(), - ]) + g.panel.timeSeries.standardOptions.withUnit('reqps'), activeRequests: - g.panel.timeSeries.new('Active Requests') + commonlib.panels.network.timeSeries.traffic.new( + 'Active requests', + targets=[signals.overview.activeRequests.asTarget()] + ) + g.panel.timeSeries.panelOptions.withDescription('Active web requests for the entire application') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.requests.activeRequests.asTarget(), - ]) + g.panel.timeSeries.standardOptions.withUnit('reqps'), queuedRequests: - g.panel.timeSeries.new('Queued Requests') + commonlib.panels.network.timeSeries.traffic.new( + 'Queued requests', + targets=[signals.overview.queuedRequests.asTarget()] + ) + g.panel.timeSeries.panelOptions.withDescription('Queued web requests for the entire application.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.requests.queuedRequests.asTarget(), - ]) + g.panel.timeSeries.standardOptions.withUnit('reqps'), pageViews: - g.panel.timeSeries.new('Page Views') + commonlib.panels.network.timeSeries.traffic.new( + 'Page views', + targets=[signals.overview.pageViews.asTarget()] + ) + g.panel.timeSeries.panelOptions.withDescription('Rate of pageviews for the entire application. Grouped by type and service.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.requests.pageViews.asTarget(), - ]) - + g.panel.timeSeries.standardOptions.withUnit('views/sec') - + g.panel.timeSeries.standardOptions.withMappings([ - g.panel.timeSeries.standardOptions.mapping.SpecialValueMap.withType() - + g.panel.timeSeries.standardOptions.mapping.SpecialValueMap.withOptions({ - match: 'null', - result: { text: 'N/A' }, - }), - ]), + + g.panel.timeSeries.standardOptions.withUnit('none'), latestMedianRequestTime: - g.panel.timeSeries.new('Latest Median Request Time') + commonlib.panels.network.timeSeries.base.new('Latest median request time', targets=[signals.overview.latestMedianRequestTime.asTarget()]) + g.panel.timeSeries.panelOptions.withDescription('The median amount of time for "latest" page requests for the selected site.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.http.latestMedianRequestTime.asTarget(), - ]) + g.panel.timeSeries.standardOptions.withUnit('s'), topicMedianRequestTime: - g.panel.timeSeries.new('Topic Show Median Request Time') + commonlib.panels.network.timeSeries.base.new('Topic show median request time', targets=[signals.overview.topicMedianRequestTime.asTarget()]) + g.panel.timeSeries.panelOptions.withDescription('The median amount of time for "topics show" requests for the selected site.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.http.topicMedianRequestTime.asTarget(), - ]) + g.panel.timeSeries.standardOptions.withUnit('s'), latest99thPercentileRequestTime: - g.panel.timeSeries.new('Latest 99th percentile Request Time') + commonlib.panels.network.timeSeries.base.new('Latest 99th percentile request time', targets=[signals.overview.latest99thPercentileRequestTime.asTarget()]) + g.panel.timeSeries.panelOptions.withDescription('The 99th percentile amount of time for "latest" page requests for the selected site.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.http.latest99thPercentileRequestTime.asTarget(), - ]) + g.panel.timeSeries.standardOptions.withUnit('s'), topic99thPercentileRequestTime: - g.panel.timeSeries.new('Topic Show 99th percentile Request Time') + commonlib.panels.network.timeSeries.base.new('Topic show 99th percentile request time', targets=[signals.overview.topic99thPercentileRequestTime.asTarget()]) + g.panel.timeSeries.panelOptions.withDescription('The 99th percentile amount of time for "topics show" requests for the selected site.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.http.topic99thPercentileRequestTime.asTarget(), - ]) + g.panel.timeSeries.standardOptions.withUnit('s'), // Jobs dashboard panels sidekiqJobDuration: - g.panel.timeSeries.new('Sidekiq Job Duration') - + g.panel.timeSeries.panelOptions.withDescription('Time spent in Sidekiq jobs broken out by job name.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.jobs.sidekiqJobDuration.asTarget(), - ]) - + g.panel.timeSeries.standardOptions.withUnit('s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(30) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), + commonlib.panels.generic.stat.base.new('Sidekiq job duration', targets=[signals.jobs.sidekiqJobDuration.asTarget()]) + + g.panel.stat.panelOptions.withDescription('Time spent in Sidekiq jobs broken out by job name.') + + g.panel.stat.standardOptions.withUnit('s'), scheduledJobDuration: - g.panel.timeSeries.new('Scheduled Job Duration') - + g.panel.timeSeries.panelOptions.withDescription('Time spent in scheduled jobs broken out by job name.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.jobs.scheduledJobDuration.asTarget(), - ]) - + g.panel.timeSeries.standardOptions.withUnit('s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(30) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), - - scheduledJobCount: - g.panel.timeSeries.new('Scheduled Jobs') - + g.panel.timeSeries.panelOptions.withDescription('The number of scheduled jobs ran over an interval.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.jobs.scheduledJobCount.asTarget(), - ]), + commonlib.panels.generic.stat.base.new('Scheduled job duration', targets=[signals.jobs.scheduledJobDuration.asTarget()]) + + g.panel.stat.panelOptions.withDescription('Time spent in scheduled jobs broken out by job name.') + + g.panel.stat.standardOptions.withUnit('s'), sidekiqJobCount: - g.panel.timeSeries.new('Sidekiq Jobs') - + g.panel.timeSeries.panelOptions.withDescription('The amount of sidekiq jobs ran over an interval.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.jobs.sidekiqJobCount.asTarget(), - ]), + commonlib.panels.generic.stat.info.new('Sidekiq jobs', targets=[signals.jobs.sidekiqJobCount.asTarget()]) + + g.panel.stat.panelOptions.withDescription('The amount of sidekiq jobs ran over an interval.') + + g.panel.stat.standardOptions.withUnit('none'), + + scheduledJobCount: + commonlib.panels.generic.timeSeries.base.new('Scheduled jobs', targets=[signals.jobs.scheduledJobCount.asTarget()]) + + g.panel.stat.panelOptions.withDescription('The number of scheduled jobs ran over an interval.') + + g.panel.timeSeries.standardOptions.withUnit('none'), usedRSSMemory: - g.panel.timeSeries.new('Used RSS Memory') + commonlib.panels.generic.timeSeries.base.new('Used RSS memory', targets=[signals.jobs.rssMemory.asTarget()]) + g.panel.timeSeries.panelOptions.withDescription('Total RSS Memory used by process. Broken up by pid.') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.memory.rssMemory.asTarget(), - ]) + g.panel.timeSeries.standardOptions.withUnit('bytes'), v8HeapSize: - g.panel.timeSeries.new('V8 Heap Size') + commonlib.panels.generic.timeSeries.base.new('V8 heap size', targets=[signals.jobs.v8HeapSize.asTarget()]) + g.panel.timeSeries.panelOptions.withDescription('Current heap size of V8 engine. Broken up by process type') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.memory.v8HeapSize.asTarget(), - ]) + g.panel.timeSeries.standardOptions.withUnit('bytes'), sidekiqWorkers: - g.panel.stat.new('Sidekiq Workers') + commonlib.panels.generic.stat.base.new('Sidekiq workers', targets=[signals.jobs.sidekiqWorkerCount.asTarget()]) + g.panel.stat.panelOptions.withDescription('Current number of Sidekiq Workers.') - + g.panel.stat.queryOptions.withTargets([ - signals.jobs.sidekiqWorkerCount.asTarget(), - ]) + g.panel.stat.standardOptions.withUnit('none') + g.panel.stat.standardOptions.color.withMode('thresholds') + g.panel.stat.standardOptions.withMappings([ @@ -154,11 +106,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.stat.options.withTextMode('auto'), webWorkers: - g.panel.stat.new('Web Workers') + commonlib.panels.generic.stat.base.new('Web workers', targets=[signals.jobs.webWorkerCount.asTarget()]) + g.panel.stat.panelOptions.withDescription('Current number of Web Workers.') - + g.panel.stat.queryOptions.withTargets([ - signals.jobs.webWorkerCount.asTarget(), - ]) + g.panel.stat.standardOptions.withUnit('none') + g.panel.stat.standardOptions.color.withMode('thresholds') + g.panel.stat.standardOptions.withMappings([ @@ -173,11 +122,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; + g.panel.stat.options.withTextMode('auto'), sidekiqQueued: - g.panel.stat.new('Sidekiq Queued') + commonlib.panels.generic.stat.base.new('Sidekiq queued', targets=[signals.jobs.sidekiqJobsEnqueued.asTarget()]) + g.panel.stat.panelOptions.withDescription('Current number of jobs in Sidekiq queue.') - + g.panel.stat.queryOptions.withTargets([ - signals.jobs.sidekiqJobsEnqueued.asTarget(), - ]) + g.panel.stat.standardOptions.withUnit('none') + g.panel.stat.standardOptions.color.withMode('thresholds') + g.panel.stat.standardOptions.withMappings([ diff --git a/discourse-mixin/prometheus_rules_out/prometheus_alerts.yaml b/discourse-mixin/prometheus_rules_out/prometheus_alerts.yaml index 059a4b97b..d19035710 100644 --- a/discourse-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/discourse-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -15,7 +15,7 @@ groups: description: '{{ printf "%.2f" $value }}% of all requests are resulting in 400 status code, which is above the threshold 30%, indicating a potentially larger issue for {{$labels.instance}}' summary: More than 30% of all requests result in a 4XX. expr: | - 100 * rate(discourse_http_requests{status=~"^4.*"}[5m]) / on() group_left() (sum(rate(discourse_http_requests[5m])) by (instance)) > 30 + 100 * rate(discourse_http_requests{status=~"4..", job="integrations/discourse"}[5m]) / on() group_left() (sum(rate(discourse_http_requests[5m])) by (instance)) > 30 for: 5m labels: severity: warning diff --git a/discourse-mixin/prometheus_rules_out/prometheus_rules.yaml b/discourse-mixin/prometheus_rules_out/prometheus_rules.yaml index 2ae22208b..e69de29bb 100644 --- a/discourse-mixin/prometheus_rules_out/prometheus_rules.yaml +++ b/discourse-mixin/prometheus_rules_out/prometheus_rules.yaml @@ -1 +0,0 @@ -groups: [] diff --git a/discourse-mixin/rows.libsonnet b/discourse-mixin/rows.libsonnet index 721217eb7..98cdfff0b 100644 --- a/discourse-mixin/rows.libsonnet +++ b/discourse-mixin/rows.libsonnet @@ -7,50 +7,56 @@ local g = import './g.libsonnet'; // discourse-overview rows overviewRow: g.panel.row.new('Overview') + + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ - panels.trafficByResponseCode { gridPos: { h: 6, w: 12, x: 0, y: 0 } }, - panels.activeRequests { gridPos: { h: 6, w: 12, x: 12, y: 0 } }, - panels.queuedRequests { gridPos: { h: 6, w: 12, x: 0, y: 6 } }, - panels.pageViews { gridPos: { h: 6, w: 12, x: 12, y: 6 } }, + panels.trafficByResponseCode { gridPos: { h: 6, w: 12 } }, + panels.activeRequests { gridPos: { h: 6, w: 12 } }, + panels.queuedRequests { gridPos: { h: 6, w: 12 } }, + panels.pageViews { gridPos: { h: 6, w: 12 } }, ]), latencyRow: g.panel.row.new('Latency') + + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ - panels.latestMedianRequestTime { gridPos: { h: 6, w: 12, x: 0, y: 0 } }, - panels.topicMedianRequestTime { gridPos: { h: 6, w: 12, x: 12, y: 0 } }, - panels.latest99thPercentileRequestTime { gridPos: { h: 6, w: 12, x: 0, y: 6 } }, - panels.topic99thPercentileRequestTime { gridPos: { h: 6, w: 12, x: 12, y: 6 } }, + panels.latestMedianRequestTime { gridPos: { h: 6, w: 12 } }, + panels.topicMedianRequestTime { gridPos: { h: 6, w: 12 } }, + panels.latest99thPercentileRequestTime { gridPos: { h: 6, w: 12 } }, + panels.topic99thPercentileRequestTime { gridPos: { h: 6, w: 12 } }, ]), // discourse-jobs rows jobStatsRow: - g.panel.row.new('Job Statistics') + g.panel.row.new('Job statistics') + + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ - panels.sidekiqWorkers { gridPos: { h: 5, w: 8, x: 0, y: 0 } }, - panels.webWorkers { gridPos: { h: 5, w: 8, x: 8, y: 0 } }, - panels.sidekiqQueued { gridPos: { h: 5, w: 8, x: 16, y: 0 } }, + panels.sidekiqWorkers { gridPos: { h: 5, w: 8 } }, + panels.webWorkers { gridPos: { h: 5, w: 8 } }, + panels.sidekiqQueued { gridPos: { h: 5, w: 8 } }, ]), jobCountsRow: - g.panel.row.new('Job Counts') + g.panel.row.new('Job counts') + + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ - panels.scheduledJobCount { gridPos: { h: 6, w: 12, x: 0, y: 0 } }, - panels.sidekiqJobCount { gridPos: { h: 6, w: 12, x: 12, y: 0 } }, + panels.scheduledJobCount { gridPos: { h: 6, w: 12 } }, + panels.sidekiqJobCount { gridPos: { h: 6, w: 12 } }, ]), jobDurationRow: - g.panel.row.new('Job Duration') + g.panel.row.new('Duration') + + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ - panels.scheduledJobDuration { gridPos: { h: 6, w: 12, x: 0, y: 0 } }, - panels.sidekiqJobDuration { gridPos: { h: 6, w: 12, x: 12, y: 0 } }, + panels.scheduledJobDuration { gridPos: { h: 6, w: 12 } }, + panels.sidekiqJobDuration { gridPos: { h: 6, w: 12 } }, ]), memoryRow: g.panel.row.new('Memory') + + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ - panels.usedRSSMemory { gridPos: { h: 6, w: 12, x: 0, y: 0 } }, - panels.v8HeapSize { gridPos: { h: 6, w: 12, x: 12, y: 0 } }, + panels.usedRSSMemory { gridPos: { h: 6, w: 12 } }, + panels.v8HeapSize { gridPos: { h: 6, w: 12 } }, ]), }, } diff --git a/discourse-mixin/signals/http.libsonnet b/discourse-mixin/signals/http.libsonnet deleted file mode 100644 index 833cddd12..000000000 --- a/discourse-mixin/signals/http.libsonnet +++ /dev/null @@ -1,79 +0,0 @@ -function(this) - { - filteringSelector: this.filteringSelector, - groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels, - aggLevel: 'group', - aggFunction: 'sum', - signals: { - httpRequests: { - name: 'HTTP requests', - type: 'counter', - unit: 'reqps', - description: 'Rate of HTTP requests by status code.', - sources: { - prometheus: { - expr: 'discourse_http_requests{%(queriesSelector)s}', - aggKeepLabels: ['status'], - legendCustomTemplate: '{{status}}', - }, - }, - }, - - latestMedianRequestTime: { - name: 'Latest median request time', - type: 'gauge', - unit: 's', - description: 'The median amount of time for "latest" page requests.', - sources: { - prometheus: { - expr: 'discourse_http_duration_seconds{quantile="0.5",action="latest",%(queriesSelector)s}', - aggKeepLabels: ['controller'], - legendCustomTemplate: '{{controller}}', - }, - }, - }, - - topicMedianRequestTime: { - name: 'Topic median request time', - type: 'gauge', - unit: 's', - description: 'The median amount of time for "topics show" requests.', - sources: { - prometheus: { - expr: 'discourse_http_duration_seconds{quantile="0.5",controller="topics",%(queriesSelector)s}', - aggKeepLabels: ['controller'], - legendCustomTemplate: '{{controller}}', - }, - }, - }, - - latest99thPercentileRequestTime: { - name: 'Latest 99th percentile request time', - type: 'gauge', - unit: 's', - description: 'The 99th percentile amount of time for "latest" page requests.', - sources: { - prometheus: { - expr: 'discourse_http_duration_seconds{quantile="0.99",action="latest",%(queriesSelector)s}', - aggKeepLabels: ['controller'], - legendCustomTemplate: '{{controller}}', - }, - }, - }, - - topic99thPercentileRequestTime: { - name: 'Topic 99th percentile request time', - type: 'gauge', - unit: 's', - description: 'The 99th percentile amount of time for "topics show" requests.', - sources: { - prometheus: { - expr: 'discourse_http_duration_seconds{quantile="0.99",controller="topics",%(queriesSelector)s}', - aggKeepLabels: ['controller'], - legendCustomTemplate: '{{controller}}', - }, - }, - }, - }, - } diff --git a/discourse-mixin/signals/jobs.libsonnet b/discourse-mixin/signals/jobs.libsonnet index 2a4c6d45c..ebde76d45 100644 --- a/discourse-mixin/signals/jobs.libsonnet +++ b/discourse-mixin/signals/jobs.libsonnet @@ -3,9 +3,11 @@ function(this) filteringSelector: this.filteringSelector, groupLabels: this.groupLabels, instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, aggLevel: 'group', aggFunction: 'sum', signals: { + // Job processing signals sidekiqJobDuration: { name: 'Sidekiq job duration', type: 'counter', @@ -37,13 +39,12 @@ function(this) sidekiqJobCount: { name: 'Sidekiq job count', type: 'counter', - rangeFunction: 'increase', unit: 'none', description: 'The amount of sidekiq jobs ran over an interval.', sources: { prometheus: { expr: 'discourse_sidekiq_job_count{%(queriesSelector)s}', - aggKeepLabels: ['job_name'], + rangeFunction: 'increase', legendCustomTemplate: '{{job_name}}', }, }, @@ -52,13 +53,12 @@ function(this) scheduledJobCount: { name: 'Scheduled job count', type: 'counter', - rangeFunction: 'increase', unit: 'none', description: 'The number of scheduled jobs ran over an interval.', sources: { prometheus: { expr: 'discourse_scheduled_job_count{%(queriesSelector)s}', - aggKeepLabels: ['job_name'], + rangeFunction: 'increase', legendCustomTemplate: '{{job_name}}', }, }, @@ -86,7 +86,6 @@ function(this) sources: { prometheus: { expr: 'count(discourse_rss{type="sidekiq",%(queriesSelector)s})', - legendCustomTemplate: '', }, }, }, @@ -99,7 +98,33 @@ function(this) sources: { prometheus: { expr: 'count(discourse_rss{type="web",%(queriesSelector)s})', - legendCustomTemplate: '', + }, + }, + }, + + // Memory signals + rssMemory: { + name: 'RSS memory', + type: 'gauge', + unit: 'bytes', + description: 'Total RSS memory used by process.', + sources: { + prometheus: { + expr: 'discourse_rss{%(queriesSelector)s}', + legendCustomTemplate: 'pid: {{pid}}', + }, + }, + }, + + v8HeapSize: { + name: 'V8 heap size', + type: 'gauge', + unit: 'bytes', + description: 'Current heap size of V8 engine broken up by process type.', + sources: { + prometheus: { + expr: 'discourse_v8_used_heap_size{%(queriesSelector)s}', + legendCustomTemplate: '{{type}}', }, }, }, diff --git a/discourse-mixin/signals/memory.libsonnet b/discourse-mixin/signals/memory.libsonnet deleted file mode 100644 index bf20bd894..000000000 --- a/discourse-mixin/signals/memory.libsonnet +++ /dev/null @@ -1,37 +0,0 @@ -function(this) - { - filteringSelector: this.filteringSelector, - groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels, - aggLevel: 'group', - aggFunction: 'sum', - signals: { - rssMemory: { - name: 'RSS memory', - type: 'gauge', - unit: 'bytes', - description: 'Total RSS memory used by process.', - sources: { - prometheus: { - expr: 'discourse_rss{%(queriesSelector)s}', - aggKeepLabels: ['pid'], - legendCustomTemplate: 'pid: {{pid}}', - }, - }, - }, - - v8HeapSize: { - name: 'V8 heap size', - type: 'gauge', - unit: 'bytes', - description: 'Current heap size of V8 engine broken up by process type.', - sources: { - prometheus: { - expr: 'discourse_v8_used_heap_size{%(queriesSelector)s}', - aggKeepLabels: ['type'], - legendCustomTemplate: '{{type}}', - }, - }, - }, - }, - } diff --git a/discourse-mixin/signals/overview.libsonnet b/discourse-mixin/signals/overview.libsonnet new file mode 100644 index 000000000..465b11127 --- /dev/null +++ b/discourse-mixin/signals/overview.libsonnet @@ -0,0 +1,115 @@ +function(this) + local legendCustomTemplate = '{{ instance }}'; + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + legendCustomTemplate: legendCustomTemplate, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + // HTTP traffic and latency signals + httpRequests: { + name: 'HTTP requests', + type: 'counter', + unit: 'reqps', + description: 'Rate of HTTP requests by status code.', + sources: { + prometheus: { + expr: 'discourse_http_requests{%(queriesSelector)s}', + aggKeepLabels: ['status'], + legendCustomTemplate: '{{ api }} - {{ verb }} - {{ status }}', + }, + }, + }, + + latestMedianRequestTime: { + name: 'Latest median request time', + type: 'gauge', + unit: 's', + description: 'The median amount of time for "latest" page requests.', + sources: { + prometheus: { + expr: 'discourse_http_duration_seconds{%(queriesSelector)s,quantile="0.5",action="latest"}', + }, + }, + }, + + topicMedianRequestTime: { + name: 'Topic median request time', + type: 'gauge', + unit: 's', + description: 'The median amount of time for "topics show" requests.', + sources: { + prometheus: { + expr: 'discourse_http_duration_seconds{%(queriesSelector)s,quantile="0.5",controller="topics"}', + }, + }, + }, + + latest99thPercentileRequestTime: { + name: 'Latest 99th percentile request time', + type: 'gauge', + unit: 's', + description: 'The 99th percentile amount of time for "latest" page requests.', + sources: { + prometheus: { + expr: 'discourse_http_duration_seconds{%(queriesSelector)s,quantile="0.99",action="latest"}', + }, + }, + }, + + topic99thPercentileRequestTime: { + name: 'Topic 99th percentile request time', + type: 'gauge', + unit: 's', + description: 'The 99th percentile amount of time for "topics show" requests.', + sources: { + prometheus: { + expr: 'discourse_http_duration_seconds{%(queriesSelector)s,quantile="0.99",controller="topics"}', + }, + }, + }, + + // Request queue signals + activeRequests: { + name: 'Active requests', + type: 'gauge', + unit: 'reqps', + description: 'Active web requests for the entire application.', + sources: { + prometheus: { + expr: 'discourse_active_app_reqs{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + queuedRequests: { + name: 'Queued requests', + type: 'gauge', + unit: 'reqps', + description: 'Queued web requests for the entire application.', + sources: { + prometheus: { + expr: 'discourse_queued_app_reqs{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + pageViews: { + name: 'Page views', + type: 'counter', + unit: 'views/sec', + description: 'Rate of pageviews for the entire application.', + sources: { + prometheus: { + expr: 'discourse_page_views{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + }, + } diff --git a/discourse-mixin/signals/requests.libsonnet b/discourse-mixin/signals/requests.libsonnet deleted file mode 100644 index bbe3aa95a..000000000 --- a/discourse-mixin/signals/requests.libsonnet +++ /dev/null @@ -1,47 +0,0 @@ -function(this) - { - filteringSelector: this.filteringSelector, - groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels, - aggLevel: 'none', - signals: { - activeRequests: { - name: 'Active requests', - type: 'gauge', - unit: 'reqps', - description: 'Active web requests for the entire application.', - sources: { - prometheus: { - expr: 'discourse_active_app_reqs{%(queriesSelector)s}', - legendCustomTemplate: '{{instance}}', - }, - }, - }, - - queuedRequests: { - name: 'Queued requests', - type: 'gauge', - unit: 'reqps', - description: 'Queued web requests for the entire application.', - sources: { - prometheus: { - expr: 'discourse_queued_app_reqs{%(queriesSelector)s}', - legendCustomTemplate: '{{instance}}', - }, - }, - }, - - pageViews: { - name: 'Page views', - type: 'counter', - unit: 'views/sec', - description: 'Rate of pageviews for the entire application.', - sources: { - prometheus: { - expr: 'discourse_page_views{%(queriesSelector)s}', - legendCustomTemplate: '{{instance}}', - }, - }, - }, - }, - }