diff --git a/.gitignore b/.gitignore index d68c86c04..1a57d51f0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ vendor jsonnetfile.lock.json *.zip +.worktrees diff --git a/discourse-mixin/.lint b/discourse-mixin/.lint index c25f83b67..cfb8e524b 100644 --- a/discourse-mixin/.lint +++ b/discourse-mixin/.lint @@ -10,3 +10,5 @@ exclusions: - panel: "Sidekiq Workers" template-instance-rule: reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'" + panel-datasource-rule: + reason: "Modern mixins use signal-based architecture where datasource references are handled by the framework" diff --git a/discourse-mixin/alerts/alerts.libsonnet b/discourse-mixin/alerts.libsonnet similarity index 78% rename from discourse-mixin/alerts/alerts.libsonnet rename to discourse-mixin/alerts.libsonnet index a5cac1602..2501cf355 100644 --- a/discourse-mixin/alerts/alerts.libsonnet +++ b/discourse-mixin/alerts.libsonnet @@ -1,41 +1,41 @@ { - prometheusAlerts+:: { - groups+: [ + new(this): { + groups: [ { - name: 'DiscourseAlerts', + name: this.config.uid + '-alerts', rules: [ { - alert: 'DiscourseRequestsHigh5xxErrors', + alert: 'DiscourseHigh5xxErrors', expr: ||| 100 * rate(discourse_http_requests{status="500"}[5m]) / on() group_left() (sum(rate(discourse_http_requests[5m])) by (instance)) > %(alertsCritical5xxResponses)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', }, annotations: { - summary: 'More than %(alertsCritical5xxResponses)s%% of all requests result in a 5XX.' % $._config, + summary: 'More than %(alertsCritical5xxResponses)s%% of all requests result in a 5XX.' % this.config, description: ('{{ printf "%%.2f" $value }}%% of all requests are resulting in 500 status codes, ' + 'which is above the threshold %(alertsCritical5xxResponses)s%%, ' + - 'indicating a potentially larger issue for {{$labels.instance}}') % $._config, + 'indicating a potentially larger issue for {{$labels.instance}}') % this.config, }, }, { - alert: 'DiscourseRequestsHigh4xxErrors', + alert: 'DiscourseHigh4xxErrors', expr: ||| 100 * rate(discourse_http_requests{status=~"^4.*"}[5m]) / on() group_left() (sum(rate(discourse_http_requests[5m])) by (instance)) > %(alertsWarning4xxResponses)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', }, annotations: { - summary: 'More than %(alertsWarning4xxResponses)s%% of all requests result in a 4XX.' % $._config, + summary: 'More than %(alertsWarning4xxResponses)s%% of all requests result in a 4XX.' % this.config, description: ('{{ printf "%%.2f" $value }}%% of all requests are resulting in 400 status code, ' + 'which is above the threshold %(alertsWarning4xxResponses)s%%, ' + - 'indicating a potentially larger issue for {{$labels.instance}}') % $._config, + 'indicating a potentially larger issue for {{$labels.instance}}') % this.config, }, }, ], diff --git a/discourse-mixin/config.libsonnet b/discourse-mixin/config.libsonnet index ea47fc2e9..d49cc5015 100644 --- a/discourse-mixin/config.libsonnet +++ b/discourse-mixin/config.libsonnet @@ -1,12 +1,34 @@ { - _config+:: { - dashboardTags: ['discourse-mixin'], - dashboardPeriod: 'now-1h', - dashboardTimezone: 'default', - dashboardRefresh: '1m', - - // for alerts - alertsCritical5xxResponses: '10', // % - alertsWarning4xxResponses: '30', // % + local this = self, + + // Filtering + filteringSelector: 'job=~"$job", instance=~"$instance"', + groupLabels: ['job'], + instanceLabels: ['instance'], + + // Dashboard settings + dashboardTags: ['discourse-mixin'], + dashboardPeriod: 'now-1h', + dashboardTimezone: 'default', + dashboardRefresh: '1m', + dashboardNamePrefix: 'Discourse', + uid: 'discourse', + + // Logs configuration + enableLokiLogs: false, + + // Alert thresholds + alertsCritical5xxResponses: 10, // % + alertsWarning4xxResponses: 30, // % + + // Metrics source + metricsSource: 'prometheus', + + // Signal categories + signals: { + http: (import './signals/http.libsonnet')(this), + requests: (import './signals/requests.libsonnet')(this), + jobs: (import './signals/jobs.libsonnet')(this), + memory: (import './signals/memory.libsonnet')(this), }, } diff --git a/discourse-mixin/dashboards.libsonnet b/discourse-mixin/dashboards.libsonnet new file mode 100644 index 000000000..22b56e9c5 --- /dev/null +++ b/discourse-mixin/dashboards.libsonnet @@ -0,0 +1,86 @@ +local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + local root = self, + new(this): + local prefix = this.config.dashboardNamePrefix; + local links = this.grafana.links; + local tags = this.config.dashboardTags; + local uid = this.config.uid; + local vars = commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='discourse_page_views', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ); + local annotations = {}; + local refresh = this.config.dashboardRefresh; + local period = this.config.dashboardPeriod; + local timezone = this.config.dashboardTimezone; + + { + 'discourse-overview.json': + g.dashboard.new(prefix + ' overview') + + g.dashboard.withDescription('Overview of Discourse application performance and traffic.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.overviewRow, + this.grafana.rows.latencyRow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance, + uid + '-overview', + tags, + links { overview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + 'discourse-jobs.json': + g.dashboard.new(prefix + ' jobs processing') + + g.dashboard.withDescription('Discourse job processing, workers, and memory usage.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.jobStatsRow, + this.grafana.rows.jobCountsRow, + this.grafana.rows.jobDurationRow, + this.grafana.rows.memoryRow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance, + uid + '-jobs', + tags, + links { jobs+:: {} }, + annotations, + timezone, + refresh, + period + ), + }, + + applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period): + g.dashboard.withTags(tags) + + g.dashboard.withUid(uid) + + g.dashboard.withLinks(std.objectValues(links)) + + g.dashboard.withTimezone(timezone) + + g.dashboard.withRefresh(refresh) + + g.dashboard.time.withFrom(period) + + g.dashboard.withVariables(vars) + + g.dashboard.withAnnotations(std.objectValues(annotations)) + + g.dashboard.graphTooltip.withSharedCrosshair(), +} diff --git a/discourse-mixin/dashboards/dashboards.libsonnet b/discourse-mixin/dashboards/dashboards.libsonnet deleted file mode 100644 index 0d75d6eb0..000000000 --- a/discourse-mixin/dashboards/dashboards.libsonnet +++ /dev/null @@ -1,2 +0,0 @@ -(import 'discourse-jobs.libsonnet') + -(import 'discourse-overview.libsonnet') diff --git a/discourse-mixin/dashboards/discourse-jobs.libsonnet b/discourse-mixin/dashboards/discourse-jobs.libsonnet deleted file mode 100644 index 69e516617..000000000 --- a/discourse-mixin/dashboards/discourse-jobs.libsonnet +++ /dev/null @@ -1,775 +0,0 @@ -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local dashboardUid = 'discourse-jobs'; - -local prometheus = grafana.prometheus; -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local skJobDurationPanel = { - datasource: promDatasource, - description: 'Time spent in Sidekiq jobs broken out by job name.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - links: [], - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, - pluginVersion: '9.1.8', - targets: [ - prometheus.target( - 'sum(rate(discourse_sidekiq_job_duration_seconds{instance=~"$instance",job=~"$job"}[$__rate_interval])) by (job_name)', - datasource=promDatasource, - legendFormat='{{job_name}}' - ), - ], - title: 'Sidekiq Job Duration', - type: 'timeseries', -}; - -local sheduledJobDurationPanel = { - datasource: promDatasource, - description: 'Time spent in scheduled jobs broken out by job name.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 30, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - links: [], - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, - pluginVersion: '9.1.8', - targets: [ - prometheus.target( - 'sum(rate(discourse_scheduled_job_duration_seconds{instance=~"$instance",job=~"$job"}[$__rate_interval])) by (job_name)', - datasource=promDatasource, - legendFormat='{{job_name}}', - ), - ], - title: 'Scheduled Job Duration', - type: 'timeseries', -}; - -local usedRSSMemoryPanel = { - datasource: promDatasource, - description: 'Total RSS Memory used by process. Broken up by pid.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'sum(discourse_rss{instance=~"$instance",job=~"$job"}) by (pid)', - datasource=promDatasource, - legendFormat='pid: {{pid}}', - ), - ], - title: 'Used RSS Memory', - type: 'timeseries', -}; - -local scheduledJobsPanel = { - datasource: promDatasource, - description: 'The number of scheduled jobs ran over an interval.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'increase(discourse_scheduled_job_count{instance=~"$instance", job=~"$job"}[$__rate_interval])', - legendFormat='{{job_name}}', - datasource=promDatasource, - ), - ], - title: 'Scheduled Jobs', - type: 'timeseries', -}; - -local sidekiqJobsPanel = { - datasource: promDatasource, - description: 'The amount of sidekiq jobs ran over an interval.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'increase(discourse_sidekiq_job_count{instance=~"$instance", job=~"$job"}[$__rate_interval])', - legendFormat='{{job_name}}', - datasource=promDatasource, - ), - ], - title: 'Sidekiq Jobs', - type: 'timeseries', -}; - -local v8HeapSizePanel = { - datasource: promDatasource, - description: 'Current heap size of V8 engine. Broken up by process type', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'sum(discourse_v8_used_heap_size{instance=~"$instance",job=~"$job"}) by (type)', - datasource=promDatasource, - legendFormat='{{type}}', - ), - ], - title: 'V8 Heap Size', - type: 'timeseries', -}; - -local skWorkerScore = { - datasource: promDatasource, - description: 'Current number of Sidekiq Workers.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [ - { - options: { - match: 'null', - result: { - text: 'N/A', - }, - }, - type: 'special', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - links: [], - maxDataPoints: 100, - options: { - colorMode: 'none', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.1.8', - targets: [ - { - datasource: promDatasource, - editorMode: 'code', - expr: 'count(discourse_rss{type="sidekiq",instance=~"$instance",job=~"$job"})', - format: 'time_series', - intervalFactor: 2, - legendFormat: '', - range: true, - refId: 'A', - step: 40, - target: '', - }, - ], - title: 'Sidekiq Workers', - type: 'stat', -}; - -local webWorkersStat = { - datasource: promDatasource, - description: 'Current number of Web Workers.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [ - { - options: { - match: 'null', - result: { - text: 'N/A', - }, - }, - type: 'special', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - links: [], - maxDataPoints: 100, - options: { - colorMode: 'none', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.1.8', - targets: [ - { - datasource: promDatasource, - editorMode: 'code', - expr: "count(discourse_rss{type='web',instance=~\"$instance\",job=~\"$job\"})", - format: 'time_series', - intervalFactor: 2, - legendFormat: '', - range: true, - refId: 'A', - step: 40, - target: '', - }, - ], - title: 'Web Workers', - type: 'stat', -}; - -local skQueuedStat = { - datasource: promDatasource, - description: 'Current number of jobs in Sidekiq queue.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [ - { - options: { - match: 'null', - result: { - text: 'N/A', - }, - }, - type: 'special', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - links: [], - maxDataPoints: 100, - options: { - colorMode: 'none', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - targets: [ - { - datasource: promDatasource, - editorMode: 'code', - expr: 'max(discourse_sidekiq_jobs_enqueued{instance=~"$instance",job=~"$job"})', - format: 'time_series', - intervalFactor: 2, - legendFormat: '', - range: true, - refId: 'A', - step: 40, - target: '', - }, - ], - title: 'Sidekiq Queued', - type: 'stat', -}; -{ - grafanaDashboards+:: { - 'discourse-jobs.json': - dashboard.new( - 'Discourse Jobs Processing', - time_from='%s' % $._config.dashboardPeriod, - editable=false, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - graphTooltip='shared_crosshair', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other Discourse dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )).addTemplates( - [ - { - hide: 0, - label: 'Data source', - name: 'prometheus_datasource', - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', - }, - template.new( - name='instance', - label='instance', - datasource='$prometheus_datasource', - query='label_values(discourse_page_views, instance)', - current='', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=1 - ), - template.new( - 'job', - promDatasource, - query='label_values(discourse_page_views{}, job)', - label='Job', - refresh='time', - includeAll=true, - multi=true, - allValues='.+', - sort=1 - ), - ] - ) - .addPanels( - std.flattenArrays([ - [ - skWorkerScore { gridPos: { h: 5, w: 8, x: 0, y: 0 } }, - webWorkersStat { gridPos: { h: 5, w: 8, x: 8, y: 0 } }, - skQueuedStat { gridPos: { h: 5, w: 8, x: 16, y: 0 } }, - ], - [ - scheduledJobsPanel { gridPos: { h: 6, w: 12, x: 0, y: 6 } }, - sidekiqJobsPanel { gridPos: { h: 6, w: 12, x: 12, y: 6 } }, - ], - [ - sheduledJobDurationPanel { gridPos: { h: 6, w: 12, x: 0, y: 12 } }, - skJobDurationPanel { gridPos: { h: 6, w: 12, x: 12, y: 12 } }, - ], - //next row - [ - usedRSSMemoryPanel { gridPos: { h: 6, w: 12, x: 0, y: 18 } }, - v8HeapSizePanel { gridPos: { h: 6, w: 12, x: 12, y: 18 } }, - ], - ]) - ), - }, -} diff --git a/discourse-mixin/dashboards/discourse-overview.libsonnet b/discourse-mixin/dashboards/discourse-overview.libsonnet deleted file mode 100644 index 0d2615077..000000000 --- a/discourse-mixin/dashboards/discourse-overview.libsonnet +++ /dev/null @@ -1,735 +0,0 @@ -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local dashboardUid = 'discourse-overview'; - -local prometheus = grafana.prometheus; -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local overviewRow = { - collapsed: false, - title: 'Overview', - type: 'row', -}; - -local trafficPanel = { - datasource: promDatasource, - description: 'Rate of HTTP traffic over time for the entire application. Grouped by response code.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'sum(rate(discourse_http_requests{instance=~"$instance",job=~"$job"}[$__rate_interval])) by (status)', - datasource=promDatasource, - legendFormat='{{status}}' - ), - ], - title: 'Traffic by Response Code', - type: 'timeseries', -}; - -local activeRequests = { - datasource: promDatasource, - description: 'Active web requests for the entire application', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'discourse_active_app_reqs{instance=~"$instance",job=~"$job"}', - datasource=promDatasource, - ), - ], - title: 'Active Requests', - type: 'timeseries', -}; - -local queuedRequestsPanel = { - datasource: promDatasource, - description: 'Queued web requests for the entire application.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'discourse_queued_app_reqs{instance=~"$instance",job=~"$job"}', - datasource=promDatasource, - ), - ], - title: 'Queued Requests', - type: 'timeseries', -}; - -local pageviewsPanel = { - datasource: promDatasource, - description: 'Rate of pageviews for the entire application. Grouped by type and service.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [ - { - options: { - match: 'null', - result: { - text: 'N/A', - }, - }, - type: 'special', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'views/sec', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'rate(discourse_page_views{instance=~"$instance",job=~"$job"}[$__rate_interval])', - datasource=promDatasource, - ), - ], - title: 'Page Views', - type: 'timeseries', -}; -local latencyRow = { - collapsed: false, - title: 'Latency', - type: 'row', -}; -local medianLatencyPanel = { - datasource: promDatasource, - description: 'The median amount of time for “latest” page requests for the selected site.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'sum(discourse_http_duration_seconds{quantile="0.5",action="latest",instance=~"$instance",job=~"$job"}) by (controller)', - datasource=promDatasource, - legendFormat='{{controller}}', - ), - ], - title: 'Latest Median Request Time', - type: 'timeseries', -}; - -local topicMedianPanel = { - datasource: promDatasource, - description: 'The median amount of time for “topics show” requests for the selected site.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'sum(discourse_http_duration_seconds{quantile="0.5",controller="topics",instance=~"$instance",job=~"$job"}) by (controller)', - datasource=promDatasource, - legendFormat='{{controller}}', - ), - ], - title: 'Topic Show Median Request Time', - type: 'timeseries', -}; -local ninetyNinthPercentileRequestLatency = { - datasource: promDatasource, - description: 'The 99th percentile amount of time for “latest” page requests for the selected site.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'sum(discourse_http_duration_seconds{quantile="0.99",action="latest",instance=~"$instance",job=~"$job"}) by (controller)', - datasource=promDatasource, - legendFormat='{{controller}}', - ), - ], - title: 'Latest 99th percentile Request Time', - type: 'timeseries', -}; -local ninetyNinthTopicShowPercentileRequestLatency = { - datasource: promDatasource, - description: 'The 99th percentile amount of time for “topics show” requests for the selected site.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - targets: [ - prometheus.target( - 'discourse_http_duration_seconds{quantile="0.99",controller="topics",instance=~"$instance",job=~"$job"}', - datasource=promDatasource, - legendFormat='{{controller}}', - ), - ], - title: 'Topic Show 99th percentile Request Time', - type: 'timeseries', -}; - -{ - grafanaDashboards+:: { - 'discourse-overview.json': - dashboard.new( - 'Discourse Overview', - time_from='%s' % $._config.dashboardPeriod, - editable=false, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - graphTooltip='shared_crosshair', - uid=dashboardUid, - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='Other discourse dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )).addTemplates( - [ - { - hide: 0, - label: 'Data source', - name: 'prometheus_datasource', - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', - }, - template.new( - name='instance', - label='instance', - datasource='$prometheus_datasource', - query='label_values(discourse_page_views{}, instance)', - current='', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=1 - ), - template.new( - name='job', - datasource=promDatasource, - query='label_values(discourse_page_views{}, job)', - label='Job', - refresh='time', - includeAll=true, - multi=true, - allValues='.+', - sort=1 - ), - ] - ) - .addPanels( - std.flattenArrays([ - [ - overviewRow { gridPos: { h: 1, w: 24, x: 0, y: 0 } }, - trafficPanel { gridPos: { h: 6, w: 12, x: 0, y: 1 } }, - activeRequests { gridPos: { h: 6, w: 12, x: 12, y: 1 } }, - queuedRequestsPanel { gridPos: { h: 6, w: 12, x: 0, y: 7 } }, - pageviewsPanel { gridPos: { h: 6, w: 12, x: 12, y: 7 } }, - ], - // next row - [ - latencyRow { gridPos: { h: 1, w: 24, x: 0, y: 12 } }, - medianLatencyPanel { gridPos: { h: 6, w: 12, x: 0, y: 13 } }, - topicMedianPanel { gridPos: { h: 6, w: 12, x: 12, y: 13 } }, - ninetyNinthPercentileRequestLatency { gridPos: { h: 6, w: 12, x: 0, y: 18 } }, - ninetyNinthTopicShowPercentileRequestLatency { gridPos: { h: 6, w: 12, x: 12, y: 18 } }, - ], - ]) - ), - }, -} diff --git a/discourse-mixin/dashboards_out/discourse-jobs.json b/discourse-mixin/dashboards_out/discourse-jobs.json index 7827ee5e9..ddcac75d4 100644 --- a/discourse-mixin/dashboards_out/discourse-jobs.json +++ b/discourse-mixin/dashboards_out/discourse-jobs.json @@ -1,33 +1,35 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "editable": false, - "gnetId": null, + "description": "Discourse job processing, workers, and memory usage.", "graphTooltip": 1, - "hideControls": false, - "id": null, "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": true, "keepTime": true, - "tags": [ - "discourse-mixin" - ], - "targetBlank": false, - "title": "Other Discourse dashboards", - "type": "dashboards", - "url": "" + "title": "Discourse overview", + "type": "link", + "url": "/d/discourse-overview" } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Job Statistics", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Current number of Sidekiq Workers.", "fieldConfig": { @@ -46,60 +48,37 @@ "type": "special" } ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 5, "w": 8, "x": 0, - "y": 0 + "y": 1 }, "id": 2, - "links": [ ], - "maxDataPoints": 100, "options": { - "colorMode": "none", "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "textMode": "auto" }, - "pluginVersion": "9.1.8", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "editorMode": "code", - "expr": "count(discourse_rss{type=\"sidekiq\",instance=~\"$instance\",job=~\"$job\"})", + "expr": "count(discourse_rss{type=\"sidekiq\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, + "instant": false, "legendFormat": "", - "range": true, - "refId": "A", - "step": 40, - "target": "" + "refId": "Sidekiq worker count" } ], "title": "Sidekiq Workers", @@ -107,7 +86,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Current number of Web Workers.", "fieldConfig": { @@ -126,60 +106,37 @@ "type": "special" } ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 5, "w": 8, "x": 8, - "y": 0 + "y": 1 }, "id": 3, - "links": [ ], - "maxDataPoints": 100, "options": { - "colorMode": "none", "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "textMode": "auto" }, - "pluginVersion": "9.1.8", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "editorMode": "code", - "expr": "count(discourse_rss{type='web',instance=~\"$instance\",job=~\"$job\"})", + "expr": "count(discourse_rss{type=\"web\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", - "intervalFactor": 2, + "instant": false, "legendFormat": "", - "range": true, - "refId": "A", - "step": 40, - "target": "" + "refId": "Web worker count" } ], "title": "Web Workers", @@ -187,7 +144,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Current number of jobs in Sidekiq queue.", "fieldConfig": { @@ -206,149 +164,80 @@ "type": "special" } ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 5, "w": 8, "x": 16, - "y": 0 + "y": 1 }, "id": 4, - "links": [ ], - "maxDataPoints": 100, "options": { - "colorMode": "none", "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "textMode": "auto" }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "editorMode": "code", - "expr": "max(discourse_sidekiq_jobs_enqueued{instance=~\"$instance\",job=~\"$job\"})", + "expr": "max by (job) (\n discourse_sidekiq_jobs_enqueued{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "range": true, - "refId": "A", - "step": 40, - "target": "" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Sidekiq jobs enqueued" } ], "title": "Sidekiq Queued", "type": "stat" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 6 + }, + "id": 5, + "panels": [ ], + "title": "Job Counts", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of scheduled jobs ran over an interval.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ ] - }, "gridPos": { "h": 6, "w": 12, "x": 0, - "y": 6 - }, - "id": 5, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 7 }, + "id": 6, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(discourse_scheduled_job_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])", + "expr": "sum by (job,job_name) (\n rate(discourse_scheduled_job_count{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{job_name}}" + "instant": false, + "legendFormat": "{{job_name}}", + "refId": "Scheduled job count" } ], "title": "Scheduled Jobs", @@ -356,182 +245,90 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The amount of sidekiq jobs ran over an interval.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ ] - }, "gridPos": { "h": 6, "w": 12, "x": 12, - "y": 6 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 7 }, + "id": 7, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(discourse_sidekiq_job_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])", + "expr": "sum by (job,job_name) (\n rate(discourse_sidekiq_job_count{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{job_name}}" + "instant": false, + "legendFormat": "{{job_name}}", + "refId": "Sidekiq job count" } ], "title": "Sidekiq Jobs", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 13 + }, + "id": 8, + "panels": [ ], + "title": "Job Duration", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Time spent in scheduled jobs broken out by job name.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 12, "x": 0, - "y": 12 + "y": 14 }, - "id": 7, - "links": [ ], + "id": 9, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, - "pluginVersion": "9.1.8", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum(rate(discourse_scheduled_job_duration_seconds{instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])) by (job_name)", + "expr": "sum by (job,job_name) (\n rate(discourse_scheduled_job_duration_seconds{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{job_name}}" + "instant": false, + "legendFormat": "{{job_name}}", + "refId": "Scheduled job duration" } ], "title": "Scheduled Job Duration", @@ -539,183 +336,95 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Time spent in Sidekiq jobs broken out by job name.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 12, "x": 12, - "y": 12 + "y": 14 }, - "id": 8, - "links": [ ], + "id": 10, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, - "pluginVersion": "9.1.8", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum(rate(discourse_sidekiq_job_duration_seconds{instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])) by (job_name)", + "expr": "sum by (job,job_name) (\n rate(discourse_sidekiq_job_duration_seconds{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{job_name}}" + "instant": false, + "legendFormat": "{{job_name}}", + "refId": "Sidekiq job duration" } ], "title": "Sidekiq Job Duration", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 20 + }, + "id": 11, + "panels": [ ], + "title": "Memory", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Total RSS Memory used by process. Broken up by pid.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "bytes" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 12, "x": 0, - "y": 18 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 21 }, + "id": 12, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum(discourse_rss{instance=~\"$instance\",job=~\"$job\"}) by (pid)", + "expr": "sum by (job,pid) (\n discourse_rss{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "pid: {{pid}}" + "instant": false, + "legendFormat": "pid: {{pid}}", + "refId": "RSS memory" } ], "title": "Used RSS Memory", @@ -723,90 +432,34 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Current heap size of V8 engine. Broken up by process type", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "bytes" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 12, "x": 12, - "y": 18 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 21 }, + "id": 13, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum(discourse_v8_used_heap_size{instance=~\"$instance\",job=~\"$job\"}) by (type)", + "expr": "sum by (job,type) (\n discourse_v8_used_heap_size{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}" + "instant": false, + "legendFormat": "{{type}}", + "refId": "V8 heap size" } ], "title": "V8 Heap Size", @@ -814,67 +467,48 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "discourse-mixin" ], "templating": { "list": [ { - "hide": 0, "label": "Data source", - "name": "prometheus_datasource", + "name": "datasource", "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": ".+", - "current": { - "text": "", - "value": "" + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "datasource": "$prometheus_datasource", - "hide": 0, "includeAll": true, - "label": "instance", + "label": "Job", "multi": true, - "name": "instance", - "options": [ ], - "query": "label_values(discourse_page_views, instance)", + "name": "job", + "query": "label_values(discourse_page_views{job=~\"$job\", instance=~\"$instance\"}, job)", "refresh": 2, - "regex": "", "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Job", + "label": "Instance", "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(discourse_page_views{}, job)", + "name": "instance", + "query": "label_values(discourse_page_views{job=~\"$job\", instance=~\"$instance\",job=~\"$job\"}, instance)", "refresh": 2, - "regex": "", "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" } ] }, @@ -882,33 +516,7 @@ "from": "now-1h", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", - "title": "Discourse Jobs Processing", - "uid": "discourse-jobs", - "version": 0 + "title": "Discourse jobs processing", + "uid": "discourse-jobs" } \ No newline at end of file diff --git a/discourse-mixin/dashboards_out/discourse-overview.json b/discourse-mixin/dashboards_out/discourse-overview.json index 261198525..9106176d3 100644 --- a/discourse-mixin/dashboards_out/discourse-overview.json +++ b/discourse-mixin/dashboards_out/discourse-overview.json @@ -1,27 +1,15 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "editable": false, - "gnetId": null, + "description": "Overview of Discourse application performance and traffic.", "graphTooltip": 1, - "hideControls": false, - "id": null, "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": true, "keepTime": true, - "tags": [ - "discourse-mixin" - ], - "targetBlank": false, - "title": "Other discourse dashboards", - "type": "dashboards", - "url": "" + "title": "Discourse jobs", + "type": "link", + "url": "/d/discourse-jobs" } ], "panels": [ @@ -29,71 +17,25 @@ "collapsed": false, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, "y": 0 }, - "id": 2, + "id": 1, + "panels": [ ], "title": "Overview", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Rate of HTTP traffic over time for the entire application. Grouped by response code.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, @@ -101,28 +43,19 @@ "x": 0, "y": 1 }, - "id": 3, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "id": 2, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum(rate(discourse_http_requests{instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])) by (status)", + "expr": "sum by (job,status) (\n rate(discourse_http_requests{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{status}}" + "instant": false, + "legendFormat": "{{status}}", + "refId": "HTTP requests" } ], "title": "Traffic by Response Code", @@ -130,61 +63,14 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Active web requests for the entire application", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, @@ -192,28 +78,19 @@ "x": 12, "y": 1 }, - "id": 4, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "id": 3, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "discourse_active_app_reqs{instance=~\"$instance\",job=~\"$job\"}", + "expr": "discourse_active_app_reqs{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Active requests" } ], "title": "Active Requests", @@ -221,61 +98,14 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Queued web requests for the entire application.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, @@ -283,28 +113,19 @@ "x": 0, "y": 7 }, - "id": 5, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "id": 4, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "discourse_queued_app_reqs{instance=~\"$instance\",job=~\"$job\"}", + "expr": "discourse_queued_app_reqs{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Queued requests" } ], "title": "Queued Requests", @@ -312,44 +133,12 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Rate of pageviews for the entire application. Grouped by type and service.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, "mappings": [ { "options": { @@ -361,22 +150,8 @@ "type": "special" } ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "views/sec" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, @@ -384,28 +159,19 @@ "x": 12, "y": 7 }, - "id": 6, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "id": 5, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(discourse_page_views{instance=~\"$instance\",job=~\"$job\"}[$__rate_interval])", + "expr": "rate(discourse_page_views{job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Page views" } ], "title": "Page Views", @@ -415,100 +181,45 @@ "collapsed": false, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 12 + "y": 13 }, - "id": 7, + "id": 6, + "panels": [ ], "title": "Latency", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The median amount of time for “latest” page requests for the selected site.", + "description": "The median amount of time for \"latest\" page requests for the selected site.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 12, "x": 0, - "y": 13 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 14 }, + "id": 7, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum(discourse_http_duration_seconds{quantile=\"0.5\",action=\"latest\",instance=~\"$instance\",job=~\"$job\"}) by (controller)", + "expr": "sum by (job,controller) (\n discourse_http_duration_seconds{quantile=\"0.5\",action=\"latest\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{controller}}" + "instant": false, + "legendFormat": "{{controller}}", + "refId": "Latest median request time" } ], "title": "Latest Median Request Time", @@ -516,90 +227,34 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The median amount of time for “topics show” requests for the selected site.", + "description": "The median amount of time for \"topics show\" requests for the selected site.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 12, "x": 12, - "y": 13 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 14 }, + "id": 8, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum(discourse_http_duration_seconds{quantile=\"0.5\",controller=\"topics\",instance=~\"$instance\",job=~\"$job\"}) by (controller)", + "expr": "sum by (job,controller) (\n discourse_http_duration_seconds{quantile=\"0.5\",controller=\"topics\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{controller}}" + "instant": false, + "legendFormat": "{{controller}}", + "refId": "Topic median request time" } ], "title": "Topic Show Median Request Time", @@ -607,90 +262,34 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The 99th percentile amount of time for “latest” page requests for the selected site.", + "description": "The 99th percentile amount of time for \"latest\" page requests for the selected site.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 12, "x": 0, - "y": 18 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 20 }, + "id": 9, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum(discourse_http_duration_seconds{quantile=\"0.99\",action=\"latest\",instance=~\"$instance\",job=~\"$job\"}) by (controller)", + "expr": "sum by (job,controller) (\n discourse_http_duration_seconds{quantile=\"0.99\",action=\"latest\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{controller}}" + "instant": false, + "legendFormat": "{{controller}}", + "refId": "Latest 99th percentile request time" } ], "title": "Latest 99th percentile Request Time", @@ -698,90 +297,34 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The 99th percentile amount of time for “topics show” requests for the selected site.", + "description": "The 99th percentile amount of time for \"topics show\" requests for the selected site.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 6, "w": 12, "x": 12, - "y": 18 - }, - "id": 11, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 20 }, + "id": 10, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "discourse_http_duration_seconds{quantile=\"0.99\",controller=\"topics\",instance=~\"$instance\",job=~\"$job\"}", + "expr": "sum by (job,controller) (\n discourse_http_duration_seconds{quantile=\"0.99\",controller=\"topics\",job=~\"$job\", instance=~\"$instance\",job=~\"$job\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{controller}}" + "instant": false, + "legendFormat": "{{controller}}", + "refId": "Topic 99th percentile request time" } ], "title": "Topic Show 99th percentile Request Time", @@ -789,67 +332,48 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "discourse-mixin" ], "templating": { "list": [ { - "hide": 0, "label": "Data source", - "name": "prometheus_datasource", + "name": "datasource", "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": ".+", - "current": { - "text": "", - "value": "" + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "datasource": "$prometheus_datasource", - "hide": 0, "includeAll": true, - "label": "instance", + "label": "Job", "multi": true, - "name": "instance", - "options": [ ], - "query": "label_values(discourse_page_views{}, instance)", + "name": "job", + "query": "label_values(discourse_page_views{job=~\"$job\", instance=~\"$instance\"}, job)", "refresh": 2, - "regex": "", "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Job", + "label": "Instance", "multi": true, - "name": "job", - "options": [ ], - "query": "label_values(discourse_page_views{}, job)", + "name": "instance", + "query": "label_values(discourse_page_views{job=~\"$job\", instance=~\"$instance\",job=~\"$job\"}, instance)", "refresh": 2, - "regex": "", "sort": 1, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "type": "query" } ] }, @@ -857,33 +381,7 @@ "from": "now-1h", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", - "title": "Discourse Overview", - "uid": "discourse-overview", - "version": 0 + "title": "Discourse overview", + "uid": "discourse-overview" } \ No newline at end of file diff --git a/discourse-mixin/g.libsonnet b/discourse-mixin/g.libsonnet new file mode 100644 index 000000000..ba90fd9b0 --- /dev/null +++ b/discourse-mixin/g.libsonnet @@ -0,0 +1,3 @@ +// grafonnet must be imported with "g" alias +local g = import './vendor/grafonnet-v11.0.0/main.libsonnet'; +g diff --git a/discourse-mixin/jsonnetfile.json b/discourse-mixin/jsonnetfile.json index 64258d167..46d5af011 100644 --- a/discourse-mixin/jsonnetfile.json +++ b/discourse-mixin/jsonnetfile.json @@ -1,15 +1,24 @@ { - "version": 1, - "dependencies": [ - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" - } - }, - "version": "master" + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet", + "subdir": "gen/grafonnet-v11.0.0" } - ], - "legacyImports": true -} \ No newline at end of file + }, + "version": "main" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" + } + }, + "version": "master" + } + ], + "legacyImports": true +} diff --git a/discourse-mixin/links.libsonnet b/discourse-mixin/links.libsonnet new file mode 100644 index 000000000..8108fea20 --- /dev/null +++ b/discourse-mixin/links.libsonnet @@ -0,0 +1,13 @@ +local g = import './g.libsonnet'; + +{ + new(this): { + overview: + g.dashboard.link.link.new('Discourse overview', '/d/' + this.config.uid + '-overview') + + g.dashboard.link.link.options.withKeepTime(true), + + jobs: + g.dashboard.link.link.new('Discourse jobs', '/d/' + this.config.uid + '-jobs') + + g.dashboard.link.link.options.withKeepTime(true), + }, +} diff --git a/discourse-mixin/main.libsonnet b/discourse-mixin/main.libsonnet new file mode 100644 index 000000000..32c94c6ff --- /dev/null +++ b/discourse-mixin/main.libsonnet @@ -0,0 +1,36 @@ +local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; +local dashboards = import './dashboards.libsonnet'; +local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + withConfigMixin(config): { + config+: config, + }, + + new(): { + local this = self, + config: config, + + signals: + { + [sig]: commonlib.signals.unmarshallJsonMulti( + this.config.signals[sig], + type=this.config.metricsSource + ) + for sig in std.objectFields(this.config.signals) + }, + + grafana: { + links: (import './links.libsonnet').new(this), + panels: (import './panels.libsonnet').new(this), + rows: (import './rows.libsonnet').new(this), + dashboards: dashboards.new(this), + }, + + prometheus: { + alerts: alerts.new(this), + }, + }, +} diff --git a/discourse-mixin/mixin.libsonnet b/discourse-mixin/mixin.libsonnet index 119d2cdde..b28c41414 100644 --- a/discourse-mixin/mixin.libsonnet +++ b/discourse-mixin/mixin.libsonnet @@ -1,3 +1,15 @@ -(import 'alerts/alerts.libsonnet') + -(import 'dashboards/dashboards.libsonnet') + -(import 'config.libsonnet') +local lib = import './main.libsonnet'; + +local discourse = + lib.new() + + lib.withConfigMixin({ + // Override defaults if needed + }); + +{ + grafanaDashboards+:: discourse.grafana.dashboards, + prometheusAlerts+:: discourse.prometheus.alerts, + prometheusRules+:: { + groups+: [], + }, +} diff --git a/discourse-mixin/panels.libsonnet b/discourse-mixin/panels.libsonnet new file mode 100644 index 000000000..210f887d4 --- /dev/null +++ b/discourse-mixin/panels.libsonnet @@ -0,0 +1,194 @@ +local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + new(this): { + local signals = this.signals, + + // Overview dashboard panels + trafficByResponseCode: + g.panel.timeSeries.new('Traffic by Response Code') + + g.panel.timeSeries.panelOptions.withDescription('Rate of HTTP traffic over time for the entire application. Grouped by response code.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.http.httpRequests.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + activeRequests: + g.panel.timeSeries.new('Active Requests') + + g.panel.timeSeries.panelOptions.withDescription('Active web requests for the entire application') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.requests.activeRequests.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + queuedRequests: + g.panel.timeSeries.new('Queued Requests') + + g.panel.timeSeries.panelOptions.withDescription('Queued web requests for the entire application.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.requests.queuedRequests.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + pageViews: + g.panel.timeSeries.new('Page Views') + + g.panel.timeSeries.panelOptions.withDescription('Rate of pageviews for the entire application. Grouped by type and service.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.requests.pageViews.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('views/sec') + + g.panel.timeSeries.standardOptions.withMappings([ + g.panel.timeSeries.standardOptions.mapping.SpecialValueMap.withType() + + g.panel.timeSeries.standardOptions.mapping.SpecialValueMap.withOptions({ + match: 'null', + result: { text: 'N/A' }, + }), + ]), + + latestMedianRequestTime: + g.panel.timeSeries.new('Latest Median Request Time') + + g.panel.timeSeries.panelOptions.withDescription('The median amount of time for "latest" page requests for the selected site.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.http.latestMedianRequestTime.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + topicMedianRequestTime: + g.panel.timeSeries.new('Topic Show Median Request Time') + + g.panel.timeSeries.panelOptions.withDescription('The median amount of time for "topics show" requests for the selected site.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.http.topicMedianRequestTime.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + latest99thPercentileRequestTime: + g.panel.timeSeries.new('Latest 99th percentile Request Time') + + g.panel.timeSeries.panelOptions.withDescription('The 99th percentile amount of time for "latest" page requests for the selected site.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.http.latest99thPercentileRequestTime.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + topic99thPercentileRequestTime: + g.panel.timeSeries.new('Topic Show 99th percentile Request Time') + + g.panel.timeSeries.panelOptions.withDescription('The 99th percentile amount of time for "topics show" requests for the selected site.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.http.topic99thPercentileRequestTime.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + // Jobs dashboard panels + sidekiqJobDuration: + g.panel.timeSeries.new('Sidekiq Job Duration') + + g.panel.timeSeries.panelOptions.withDescription('Time spent in Sidekiq jobs broken out by job name.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.jobs.sidekiqJobDuration.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(30) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + scheduledJobDuration: + g.panel.timeSeries.new('Scheduled Job Duration') + + g.panel.timeSeries.panelOptions.withDescription('Time spent in scheduled jobs broken out by job name.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.jobs.scheduledJobDuration.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(30) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + scheduledJobCount: + g.panel.timeSeries.new('Scheduled Jobs') + + g.panel.timeSeries.panelOptions.withDescription('The number of scheduled jobs ran over an interval.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.jobs.scheduledJobCount.asTarget(), + ]), + + sidekiqJobCount: + g.panel.timeSeries.new('Sidekiq Jobs') + + g.panel.timeSeries.panelOptions.withDescription('The amount of sidekiq jobs ran over an interval.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.jobs.sidekiqJobCount.asTarget(), + ]), + + usedRSSMemory: + g.panel.timeSeries.new('Used RSS Memory') + + g.panel.timeSeries.panelOptions.withDescription('Total RSS Memory used by process. Broken up by pid.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.memory.rssMemory.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes'), + + v8HeapSize: + g.panel.timeSeries.new('V8 Heap Size') + + g.panel.timeSeries.panelOptions.withDescription('Current heap size of V8 engine. Broken up by process type') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.memory.v8HeapSize.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes'), + + sidekiqWorkers: + g.panel.stat.new('Sidekiq Workers') + + g.panel.stat.panelOptions.withDescription('Current number of Sidekiq Workers.') + + g.panel.stat.queryOptions.withTargets([ + signals.jobs.sidekiqWorkerCount.asTarget(), + ]) + + g.panel.stat.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.withMappings([ + g.panel.stat.standardOptions.mapping.SpecialValueMap.withType() + + g.panel.stat.standardOptions.mapping.SpecialValueMap.withOptions({ + match: 'null', + result: { text: 'N/A' }, + }), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.stat.options.withGraphMode('none') + + g.panel.stat.options.withTextMode('auto'), + + webWorkers: + g.panel.stat.new('Web Workers') + + g.panel.stat.panelOptions.withDescription('Current number of Web Workers.') + + g.panel.stat.queryOptions.withTargets([ + signals.jobs.webWorkerCount.asTarget(), + ]) + + g.panel.stat.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.withMappings([ + g.panel.stat.standardOptions.mapping.SpecialValueMap.withType() + + g.panel.stat.standardOptions.mapping.SpecialValueMap.withOptions({ + match: 'null', + result: { text: 'N/A' }, + }), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.stat.options.withGraphMode('none') + + g.panel.stat.options.withTextMode('auto'), + + sidekiqQueued: + g.panel.stat.new('Sidekiq Queued') + + g.panel.stat.panelOptions.withDescription('Current number of jobs in Sidekiq queue.') + + g.panel.stat.queryOptions.withTargets([ + signals.jobs.sidekiqJobsEnqueued.asTarget(), + ]) + + g.panel.stat.standardOptions.withUnit('none') + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.withMappings([ + g.panel.stat.standardOptions.mapping.SpecialValueMap.withType() + + g.panel.stat.standardOptions.mapping.SpecialValueMap.withOptions({ + match: 'null', + result: { text: 'N/A' }, + }), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.stat.options.withGraphMode('none') + + g.panel.stat.options.withTextMode('auto'), + }, +} diff --git a/discourse-mixin/prometheus_rules_out/prometheus_alerts.yaml b/discourse-mixin/prometheus_rules_out/prometheus_alerts.yaml index 93fff9a6d..059a4b97b 100644 --- a/discourse-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/discourse-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -1,7 +1,7 @@ groups: - - name: DiscourseAlerts + - name: discourse-alerts rules: - - alert: DiscourseRequestsHigh5xxErrors + - alert: DiscourseHigh5xxErrors annotations: description: '{{ printf "%.2f" $value }}% of all requests are resulting in 500 status codes, which is above the threshold 10%, indicating a potentially larger issue for {{$labels.instance}}' summary: More than 10% of all requests result in a 5XX. @@ -10,7 +10,7 @@ groups: for: 5m labels: severity: critical - - alert: DiscourseRequestsHigh4xxErrors + - alert: DiscourseHigh4xxErrors annotations: description: '{{ printf "%.2f" $value }}% of all requests are resulting in 400 status code, which is above the threshold 30%, indicating a potentially larger issue for {{$labels.instance}}' summary: More than 30% of all requests result in a 4XX. diff --git a/discourse-mixin/prometheus_rules_out/prometheus_rules.yaml b/discourse-mixin/prometheus_rules_out/prometheus_rules.yaml index e69de29bb..2ae22208b 100644 --- a/discourse-mixin/prometheus_rules_out/prometheus_rules.yaml +++ b/discourse-mixin/prometheus_rules_out/prometheus_rules.yaml @@ -0,0 +1 @@ +groups: [] diff --git a/discourse-mixin/rows.libsonnet b/discourse-mixin/rows.libsonnet new file mode 100644 index 000000000..721217eb7 --- /dev/null +++ b/discourse-mixin/rows.libsonnet @@ -0,0 +1,56 @@ +local g = import './g.libsonnet'; + +{ + new(this): { + local panels = this.grafana.panels, + + // discourse-overview rows + overviewRow: + g.panel.row.new('Overview') + + g.panel.row.withPanels([ + panels.trafficByResponseCode { gridPos: { h: 6, w: 12, x: 0, y: 0 } }, + panels.activeRequests { gridPos: { h: 6, w: 12, x: 12, y: 0 } }, + panels.queuedRequests { gridPos: { h: 6, w: 12, x: 0, y: 6 } }, + panels.pageViews { gridPos: { h: 6, w: 12, x: 12, y: 6 } }, + ]), + + latencyRow: + g.panel.row.new('Latency') + + g.panel.row.withPanels([ + panels.latestMedianRequestTime { gridPos: { h: 6, w: 12, x: 0, y: 0 } }, + panels.topicMedianRequestTime { gridPos: { h: 6, w: 12, x: 12, y: 0 } }, + panels.latest99thPercentileRequestTime { gridPos: { h: 6, w: 12, x: 0, y: 6 } }, + panels.topic99thPercentileRequestTime { gridPos: { h: 6, w: 12, x: 12, y: 6 } }, + ]), + + // discourse-jobs rows + jobStatsRow: + g.panel.row.new('Job Statistics') + + g.panel.row.withPanels([ + panels.sidekiqWorkers { gridPos: { h: 5, w: 8, x: 0, y: 0 } }, + panels.webWorkers { gridPos: { h: 5, w: 8, x: 8, y: 0 } }, + panels.sidekiqQueued { gridPos: { h: 5, w: 8, x: 16, y: 0 } }, + ]), + + jobCountsRow: + g.panel.row.new('Job Counts') + + g.panel.row.withPanels([ + panels.scheduledJobCount { gridPos: { h: 6, w: 12, x: 0, y: 0 } }, + panels.sidekiqJobCount { gridPos: { h: 6, w: 12, x: 12, y: 0 } }, + ]), + + jobDurationRow: + g.panel.row.new('Job Duration') + + g.panel.row.withPanels([ + panels.scheduledJobDuration { gridPos: { h: 6, w: 12, x: 0, y: 0 } }, + panels.sidekiqJobDuration { gridPos: { h: 6, w: 12, x: 12, y: 0 } }, + ]), + + memoryRow: + g.panel.row.new('Memory') + + g.panel.row.withPanels([ + panels.usedRSSMemory { gridPos: { h: 6, w: 12, x: 0, y: 0 } }, + panels.v8HeapSize { gridPos: { h: 6, w: 12, x: 12, y: 0 } }, + ]), + }, +} diff --git a/discourse-mixin/signals/http.libsonnet b/discourse-mixin/signals/http.libsonnet new file mode 100644 index 000000000..833cddd12 --- /dev/null +++ b/discourse-mixin/signals/http.libsonnet @@ -0,0 +1,79 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + httpRequests: { + name: 'HTTP requests', + type: 'counter', + unit: 'reqps', + description: 'Rate of HTTP requests by status code.', + sources: { + prometheus: { + expr: 'discourse_http_requests{%(queriesSelector)s}', + aggKeepLabels: ['status'], + legendCustomTemplate: '{{status}}', + }, + }, + }, + + latestMedianRequestTime: { + name: 'Latest median request time', + type: 'gauge', + unit: 's', + description: 'The median amount of time for "latest" page requests.', + sources: { + prometheus: { + expr: 'discourse_http_duration_seconds{quantile="0.5",action="latest",%(queriesSelector)s}', + aggKeepLabels: ['controller'], + legendCustomTemplate: '{{controller}}', + }, + }, + }, + + topicMedianRequestTime: { + name: 'Topic median request time', + type: 'gauge', + unit: 's', + description: 'The median amount of time for "topics show" requests.', + sources: { + prometheus: { + expr: 'discourse_http_duration_seconds{quantile="0.5",controller="topics",%(queriesSelector)s}', + aggKeepLabels: ['controller'], + legendCustomTemplate: '{{controller}}', + }, + }, + }, + + latest99thPercentileRequestTime: { + name: 'Latest 99th percentile request time', + type: 'gauge', + unit: 's', + description: 'The 99th percentile amount of time for "latest" page requests.', + sources: { + prometheus: { + expr: 'discourse_http_duration_seconds{quantile="0.99",action="latest",%(queriesSelector)s}', + aggKeepLabels: ['controller'], + legendCustomTemplate: '{{controller}}', + }, + }, + }, + + topic99thPercentileRequestTime: { + name: 'Topic 99th percentile request time', + type: 'gauge', + unit: 's', + description: 'The 99th percentile amount of time for "topics show" requests.', + sources: { + prometheus: { + expr: 'discourse_http_duration_seconds{quantile="0.99",controller="topics",%(queriesSelector)s}', + aggKeepLabels: ['controller'], + legendCustomTemplate: '{{controller}}', + }, + }, + }, + }, + } diff --git a/discourse-mixin/signals/jobs.libsonnet b/discourse-mixin/signals/jobs.libsonnet new file mode 100644 index 000000000..2a4c6d45c --- /dev/null +++ b/discourse-mixin/signals/jobs.libsonnet @@ -0,0 +1,107 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + sidekiqJobDuration: { + name: 'Sidekiq job duration', + type: 'counter', + unit: 's', + description: 'Time spent in Sidekiq jobs broken out by job name.', + sources: { + prometheus: { + expr: 'discourse_sidekiq_job_duration_seconds{%(queriesSelector)s}', + aggKeepLabels: ['job_name'], + legendCustomTemplate: '{{job_name}}', + }, + }, + }, + + scheduledJobDuration: { + name: 'Scheduled job duration', + type: 'counter', + unit: 's', + description: 'Time spent in scheduled jobs broken out by job name.', + sources: { + prometheus: { + expr: 'discourse_scheduled_job_duration_seconds{%(queriesSelector)s}', + aggKeepLabels: ['job_name'], + legendCustomTemplate: '{{job_name}}', + }, + }, + }, + + sidekiqJobCount: { + name: 'Sidekiq job count', + type: 'counter', + rangeFunction: 'increase', + unit: 'none', + description: 'The amount of sidekiq jobs ran over an interval.', + sources: { + prometheus: { + expr: 'discourse_sidekiq_job_count{%(queriesSelector)s}', + aggKeepLabels: ['job_name'], + legendCustomTemplate: '{{job_name}}', + }, + }, + }, + + scheduledJobCount: { + name: 'Scheduled job count', + type: 'counter', + rangeFunction: 'increase', + unit: 'none', + description: 'The number of scheduled jobs ran over an interval.', + sources: { + prometheus: { + expr: 'discourse_scheduled_job_count{%(queriesSelector)s}', + aggKeepLabels: ['job_name'], + legendCustomTemplate: '{{job_name}}', + }, + }, + }, + + sidekiqJobsEnqueued: { + name: 'Sidekiq jobs enqueued', + type: 'gauge', + aggFunction: 'max', + unit: 'none', + description: 'Current number of jobs in Sidekiq queue.', + sources: { + prometheus: { + expr: 'discourse_sidekiq_jobs_enqueued{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + sidekiqWorkerCount: { + name: 'Sidekiq worker count', + type: 'raw', + unit: 'none', + description: 'Current number of Sidekiq workers.', + sources: { + prometheus: { + expr: 'count(discourse_rss{type="sidekiq",%(queriesSelector)s})', + legendCustomTemplate: '', + }, + }, + }, + + webWorkerCount: { + name: 'Web worker count', + type: 'raw', + unit: 'none', + description: 'Current number of web workers.', + sources: { + prometheus: { + expr: 'count(discourse_rss{type="web",%(queriesSelector)s})', + legendCustomTemplate: '', + }, + }, + }, + }, + } diff --git a/discourse-mixin/signals/memory.libsonnet b/discourse-mixin/signals/memory.libsonnet new file mode 100644 index 000000000..bf20bd894 --- /dev/null +++ b/discourse-mixin/signals/memory.libsonnet @@ -0,0 +1,37 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + rssMemory: { + name: 'RSS memory', + type: 'gauge', + unit: 'bytes', + description: 'Total RSS memory used by process.', + sources: { + prometheus: { + expr: 'discourse_rss{%(queriesSelector)s}', + aggKeepLabels: ['pid'], + legendCustomTemplate: 'pid: {{pid}}', + }, + }, + }, + + v8HeapSize: { + name: 'V8 heap size', + type: 'gauge', + unit: 'bytes', + description: 'Current heap size of V8 engine broken up by process type.', + sources: { + prometheus: { + expr: 'discourse_v8_used_heap_size{%(queriesSelector)s}', + aggKeepLabels: ['type'], + legendCustomTemplate: '{{type}}', + }, + }, + }, + }, + } diff --git a/discourse-mixin/signals/requests.libsonnet b/discourse-mixin/signals/requests.libsonnet new file mode 100644 index 000000000..bbe3aa95a --- /dev/null +++ b/discourse-mixin/signals/requests.libsonnet @@ -0,0 +1,47 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'none', + signals: { + activeRequests: { + name: 'Active requests', + type: 'gauge', + unit: 'reqps', + description: 'Active web requests for the entire application.', + sources: { + prometheus: { + expr: 'discourse_active_app_reqs{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + queuedRequests: { + name: 'Queued requests', + type: 'gauge', + unit: 'reqps', + description: 'Queued web requests for the entire application.', + sources: { + prometheus: { + expr: 'discourse_queued_app_reqs{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + pageViews: { + name: 'Page views', + type: 'counter', + unit: 'views/sec', + description: 'Rate of pageviews for the entire application.', + sources: { + prometheus: { + expr: 'discourse_page_views{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + }, + }