diff --git a/.gitignore b/.gitignore index d68c86c04..1a57d51f0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ vendor jsonnetfile.lock.json *.zip +.worktrees diff --git a/mongodb-atlas-mixin/alerts/alerts.libsonnet b/mongodb-atlas-mixin/alerts.libsonnet similarity index 83% rename from mongodb-atlas-mixin/alerts/alerts.libsonnet rename to mongodb-atlas-mixin/alerts.libsonnet index 9cf658b9d..0c85b0ed5 100644 --- a/mongodb-atlas-mixin/alerts/alerts.libsonnet +++ b/mongodb-atlas-mixin/alerts.libsonnet @@ -1,14 +1,14 @@ { - prometheusAlerts+:: { - groups+: [ + new(this): { + groups: [ { - name: 'mongodb-atlas-alerts', + name: this.config.uid + '-alerts', rules: [ { - alert: 'MongoDBAtlasHighNumberOfCollectionExclusiveDeadlocks', + alert: 'MongoDBAtlasCollExclusiveDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Collection_deadlockCount_W[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -18,14 +18,14 @@ description: ( 'The number of collection exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfCollectionIntentExclusiveDeadlocks', + alert: 'MongoDBAtlasCollIntentExclDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Collection_deadlockCount_w[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -35,14 +35,14 @@ description: ( 'The number of collection intent-exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfCollectionSharedDeadlocks', + alert: 'MongoDBAtlasCollSharedDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Collection_deadlockCount_R[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -52,14 +52,14 @@ description: ( 'The number of collection shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfCollectionIntentSharedDeadlocks', + alert: 'MongoDBAtlasCollIntentSharedDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Collection_deadlockCount_r[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -69,14 +69,14 @@ description: ( 'The number of collection intent-shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfDatabaseExclusiveDeadlocks', + alert: 'MongoDBAtlasDBExclusiveDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Database_deadlockCount_W[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -86,14 +86,14 @@ description: ( 'The number of database exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfDatabaseIntentExclusiveDeadlocks', + alert: 'MongoDBAtlasDBIntentExclDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Database_deadlockCount_w[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -103,14 +103,14 @@ description: ( 'The number of database intent-exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfDatabaseSharedDeadlocks', + alert: 'MongoDBAtlasDBSharedDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Database_deadlockCount_R[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -120,14 +120,14 @@ description: ( 'The number of database shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfDatabaseIntentSharedDeadlocks', + alert: 'MongoDBAtlasDBIntentSharedDeadlocks', expr: ||| sum without(cl_role,process_port,rs_nm,rs_state) (increase(mongodb_locks_Database_deadlockCount_r[5m])) > %(alertsDeadlocks)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -137,14 +137,14 @@ description: ( 'The number of database intent-shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsDeadlocks)s.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfSlowNetworkRequests', + alert: 'MongoDBAtlasSlowNetworkRequests', expr: ||| sum without (cl_role,rs_nm,rs_state,process_port) (increase(mongodb_network_numSlowSSLOperations[5m])) + sum without (cl_role,rs_nm,rs_state,process_port) (increase(mongodb_network_numSlowDNSOperations[5m])) > %(alertsSlowNetworkRequests)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -154,14 +154,14 @@ description: ( 'The number of DNS and SSL operations taking more than 1 second to complete on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsSlowNetworkRequests)s.' - ) % $._config, + ) % this.config, }, }, { alert: 'MongoDBAtlasDiskSpaceLow', expr: ||| 100 * ((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes)) / clamp_min((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes)) + (sum without (disk_name) (hardware_disk_metrics_disk_space_free_bytes)), 1)) > %(alertsHighDiskUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -171,14 +171,14 @@ description: ( 'The amount of hardware disk space being used on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}}%% which is above the threshold of %(alertsHighDiskUsage)s%%.' - ) % $._config, + ) % this.config, }, }, { alert: 'MongoDBAtlasSlowHardwareIO', expr: ||| (sum without (disk_name) (increase(hardware_disk_metrics_read_time_milliseconds[5m])) + sum without (disk_name) (increase(hardware_disk_metrics_write_time_milliseconds[5m]))) / 1000 > %(alertsSlowHardwareIO)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -188,14 +188,14 @@ description: ( 'The latency time for read and write I/Os on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} seconds which is above the threshold of %(alertsSlowHardwareIO)s seconds.' - ) % $._config, + ) % this.config, }, }, { - alert: 'MongoDBAtlasHighNumberOfTimeoutElections', + alert: 'MongoDBAtlasElectionTimeouts', expr: ||| sum without (cl_role,process_port,instance,rs_state) (increase(mongodb_electionMetrics_electionTimeout_called[5m])) > %(alertsHighTimeoutElections)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -204,8 +204,8 @@ summary: 'There is a high number of elections being called due to the primary node timing out.', description: ( - 'The number of elections being called due to the primary node timing out in replica set {{$labels.rs_m}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsHighTimeoutElections)s.' - ) % $._config, + 'The number of elections being called due to the primary node timing out in replica set {{$labels.rs_nm}} in cluster {{$labels.cl_name}} is {{printf "%%.0f" $value}} which is above the threshold of %(alertsHighTimeoutElections)s.' + ) % this.config, }, }, ], diff --git a/mongodb-atlas-mixin/config.libsonnet b/mongodb-atlas-mixin/config.libsonnet index a7b92c965..4a05b5a62 100644 --- a/mongodb-atlas-mixin/config.libsonnet +++ b/mongodb-atlas-mixin/config.libsonnet @@ -1,18 +1,51 @@ { - _config+:: { - // sharding dashboard flag - enableShardingOverview: false, - - dashboardTags: ['mongodb-atlas-mixin'], - dashboardPeriod: 'now-30m', - dashboardTimezone: 'default', - dashboardRefresh: '1m', - - // alerts thresholds - alertsDeadlocks: 10, // count - alertsSlowNetworkRequests: 10, // count - alertsHighDiskUsage: 90, // percentage: 0-100 - alertsSlowHardwareIO: 3, // seconds - alertsHighTimeoutElections: 10, // count + local this = self, + + // Basic filtering - MongoDB Atlas uses job and cl_name (cluster name) as primary filters + filteringSelector: 'job=~"$job", cl_name=~"$cl_name"', + groupLabels: ['job', 'cl_name'], + instanceLabels: ['instance'], + + // Dashboard settings + dashboardTags: ['mongodb-atlas-mixin'], + uid: 'mongodb-atlas', + dashboardNamePrefix: 'MongoDB Atlas', + dashboardRefresh: '1m', + dashboardPeriod: 'now-30m', + dashboardTimezone: 'default', + + // Sharding dashboard flag + enableShardingOverview: false, + + // Logs configuration (MongoDB Atlas does not have Loki logs by default) + enableLokiLogs: false, + logLabels: [], + extraLogLabels: [], + logsVolumeGroupBy: 'level', + showLogsVolume: false, + + // Alert thresholds with units + alertsDeadlocks: 10, // count + alertsSlowNetworkRequests: 10, // count + alertsHighDiskUsage: 90, // % + alertsSlowHardwareIO: 3, // seconds + alertsHighTimeoutElections: 10, // count + + // Metrics source + metricsSource: 'prometheus', + + // Legend template for instance labels + legendCustomTemplate: std.join(' ', std.map(function(label) '{{' + label + '}}', this.instanceLabels)), + + // Import signal definitions + signals+: { + hardware: (import './signals/hardware.libsonnet')(this), + memory: (import './signals/memory.libsonnet')(this), + network: (import './signals/network.libsonnet')(this), + connections: (import './signals/connections.libsonnet')(this), + operations: (import './signals/operations.libsonnet')(this), + locks: (import './signals/locks.libsonnet')(this), + elections: (import './signals/elections.libsonnet')(this), + sharding: (import './signals/sharding.libsonnet')(this), }, } diff --git a/mongodb-atlas-mixin/dashboards.libsonnet b/mongodb-atlas-mixin/dashboards.libsonnet new file mode 100644 index 000000000..fb6964c69 --- /dev/null +++ b/mongodb-atlas-mixin/dashboards.libsonnet @@ -0,0 +1,166 @@ +local g = import './g.libsonnet'; + +{ + local root = self, + new(this): + local prefix = this.config.dashboardNamePrefix; + local links = this.grafana.links; + local tags = this.config.dashboardTags; + local uid = this.config.uid; + local vars = this.grafana.variables; + local annotations = this.grafana.annotations; + local refresh = this.config.dashboardRefresh; + local period = this.config.dashboardPeriod; + local timezone = this.config.dashboardTimezone; + + { + 'mongodb-atlas-cluster-overview.json': + g.dashboard.new(prefix + ' cluster overview') + + g.dashboard.withDescription('Overview of MongoDB Atlas cluster metrics.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.clusterOverviewHardwareRow, + this.grafana.rows.clusterOverviewDiskRow, + this.grafana.rows.clusterOverviewNetworkRow, + this.grafana.rows.clusterOverviewConnectionsRow, + this.grafana.rows.clusterOverviewOperationsRow, + this.grafana.rows.clusterOverviewLocksRow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance, + uid + '-cluster-overview', + tags, + links { clusterOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + 'mongodb-atlas-elections-overview.json': + g.dashboard.new(prefix + ' elections overview') + + g.dashboard.withDescription('Overview of MongoDB Atlas election metrics.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.electionsStepUpRow, + this.grafana.rows.electionsPriorityTakeoverRow, + this.grafana.rows.electionsCatchUpTakeoverRow, + this.grafana.rows.electionsTimeoutRow, + this.grafana.rows.electionsCatchUpsRow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance, + uid + '-elections-overview', + tags, + links { electionsOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + 'mongodb-atlas-operations-overview.json': + g.dashboard.new(prefix + ' operations overview') + + g.dashboard.withDescription('Overview of MongoDB Atlas operation metrics.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.operationsCountersClusterRow, + this.grafana.rows.operationsCountersInstanceRow, + this.grafana.rows.operationsLatenciesClusterRow, + this.grafana.rows.operationsLatenciesInstanceRow, + this.grafana.rows.operationsAvgLatenciesRow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance, + uid + '-operations-overview', + tags, + links { operationsOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + + 'mongodb-atlas-performance-overview.json': + g.dashboard.new(prefix + ' performance overview') + + g.dashboard.withDescription('Overview of MongoDB Atlas performance metrics.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.performanceConnectionsRow, + this.grafana.rows.performanceDbLocksClusterRow, + this.grafana.rows.performanceDbLocksInstanceRow, + this.grafana.rows.performanceDbWaitCountsClusterRow, + this.grafana.rows.performanceDbWaitCountsInstanceRow, + this.grafana.rows.performanceDbAcqTimeRow, + this.grafana.rows.performanceCollLocksRow, + this.grafana.rows.performanceCollWaitCountsRow, + this.grafana.rows.performanceCollAcqTimeRow, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance, + uid + '-performance-overview', + tags, + links { performanceOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + } + + + if this.config.enableShardingOverview then + { + 'mongodb-atlas-sharding-overview.json': + g.dashboard.new(prefix + ' sharding overview') + + g.dashboard.withDescription('Overview of MongoDB Atlas sharding metrics.') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels( + [ + this.grafana.rows.overview, + ] + ) + ) + ) + + root.applyCommon( + vars.multiInstance, + uid + '-sharding-overview', + tags, + links { shardingOverview+:: {} }, + annotations, + timezone, + refresh, + period + ), + } else {}, + + applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period): + g.dashboard.withTags(tags) + + g.dashboard.withUid(uid) + + g.dashboard.withLinks(std.objectValues(links)) + + g.dashboard.withTimezone(timezone) + + g.dashboard.withRefresh(refresh) + + g.dashboard.time.withFrom(period) + + g.dashboard.withVariables(vars) + + g.dashboard.withAnnotations(std.objectValues(annotations)), +} diff --git a/mongodb-atlas-mixin/dashboards/dashboards.libsonnet b/mongodb-atlas-mixin/dashboards/dashboards.libsonnet deleted file mode 100644 index e7028638a..000000000 --- a/mongodb-atlas-mixin/dashboards/dashboards.libsonnet +++ /dev/null @@ -1,5 +0,0 @@ -(import 'mongodb-atlas-cluster-overview.libsonnet') + -(import 'mongodb-atlas-operations-overview.libsonnet') + -(import 'mongodb-atlas-performance-overview.libsonnet') + -(import 'mongodb-atlas-elections-overview.libsonnet') + -(import 'mongodb-atlas-sharding-overview.libsonnet') diff --git a/mongodb-atlas-mixin/dashboards/mongodb-atlas-cluster-overview.libsonnet b/mongodb-atlas-mixin/dashboards/mongodb-atlas-cluster-overview.libsonnet deleted file mode 100644 index 5520d34cb..000000000 --- a/mongodb-atlas-mixin/dashboards/mongodb-atlas-cluster-overview.libsonnet +++ /dev/null @@ -1,2159 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'mongodb-atlas-cluster-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local shardRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Shard', - collapsed: false, -}; - -local shardNodesPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'table', - title: 'Shard nodes', - description: 'An inventory table for shard nodes in the environment.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - custom: { - align: 'center', - cellOptions: { - type: 'auto', - }, - filterable: false, - inspect: false, - }, - mappings: [ - { - options: { - '1': { - index: 0, - text: 'Primary', - }, - '2': { - index: 1, - text: 'Secondary', - }, - }, - type: 'value', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - }, - overrides: [ - { - matcher: { - id: 'byName', - options: 'cl_role', - }, - properties: [ - { - id: 'custom.width', - value: 150, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_state', - }, - properties: [ - { - id: 'custom.width', - value: 100, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_nm', - }, - properties: [ - { - id: 'custom.width', - value: 250, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'cl_name', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'group_id', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'State', - }, - properties: [ - { - id: 'custom.cellOptions', - value: { - type: 'color-text', - }, - }, - { - id: 'mappings', - value: [ - { - options: { - '1': { - color: 'green', - index: 0, - text: 'Primary', - }, - '2': { - color: 'yellow', - index: 1, - text: 'Secondary', - }, - }, - type: 'value', - }, - ], - }, - ], - }, - ], - }, - options: { - cellHeight: 'md', - footer: { - countRows: false, - enablePagination: false, - fields: '', - reducer: [ - 'sum', - ], - show: false, - }, - showHeader: true, - }, - pluginVersion: '10.2.0-59981', - transformations: [ - { - id: 'reduce', - options: { - labelsToFields: true, - reducers: [ - 'lastNotNull', - ], - }, - }, - { - id: 'organize', - options: { - excludeByName: { - Field: true, - 'Last *': true, - __name__: true, - job: true, - org_id: true, - process_port: true, - }, - indexByName: { - Field: 6, - 'Last *': 11, - __name__: 7, - cl_name: 1, - cl_role: 2, - group_id: 0, - instance: 3, - job: 8, - org_id: 9, - process_port: 10, - rs_nm: 4, - rs_state: 5, - }, - renameByName: { - cl_name: 'Cluster', - cl_role: 'Role', - group_id: 'Group', - instance: 'Node', - rs_nm: 'Replica set', - rs_state: 'State', - }, - }, - }, - { - id: 'filterByValue', - options: { - filters: [ - { - config: { - id: 'equal', - options: { - value: 'shardsvr', - }, - }, - fieldName: 'Role', - }, - ], - match: 'all', - type: 'include', - }, - }, - ], -}; - -local configRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Config', - collapsed: false, -}; - -local configNodesPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'table', - title: 'Config nodes', - description: 'An inventory table for config nodes in the environment.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - custom: { - align: 'center', - cellOptions: { - type: 'auto', - }, - filterable: false, - inspect: false, - }, - mappings: [ - { - options: { - '1': { - index: 0, - text: 'Primary', - }, - '2': { - index: 1, - text: 'Secondary', - }, - }, - type: 'value', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - }, - overrides: [ - { - matcher: { - id: 'byName', - options: 'cl_role', - }, - properties: [ - { - id: 'custom.width', - value: 150, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_state', - }, - properties: [ - { - id: 'custom.width', - value: 100, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_nm', - }, - properties: [ - { - id: 'custom.width', - value: 250, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'cl_name', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'group_id', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'State', - }, - properties: [ - { - id: 'custom.cellOptions', - value: { - type: 'color-text', - }, - }, - { - id: 'mappings', - value: [ - { - options: { - '1': { - color: 'green', - index: 0, - text: 'Primary', - }, - '2': { - color: 'yellow', - index: 1, - text: 'Secondary', - }, - }, - type: 'value', - }, - ], - }, - ], - }, - ], - }, - options: { - cellHeight: 'md', - footer: { - countRows: false, - enablePagination: false, - fields: '', - reducer: [ - 'sum', - ], - show: false, - }, - showHeader: true, - }, - pluginVersion: '10.2.0-59981', - transformations: [ - { - id: 'reduce', - options: { - labelsToFields: true, - reducers: [ - 'lastNotNull', - ], - }, - }, - { - id: 'organize', - options: { - excludeByName: { - Field: true, - 'Last *': true, - __name__: true, - job: true, - org_id: true, - process_port: true, - }, - indexByName: { - Field: 6, - 'Last *': 11, - __name__: 7, - cl_name: 1, - cl_role: 2, - group_id: 0, - instance: 3, - job: 8, - org_id: 9, - process_port: 10, - rs_nm: 4, - rs_state: 5, - }, - renameByName: { - cl_name: 'Cluster', - cl_role: 'Role', - group_id: 'Group', - instance: 'Node', - rs_nm: 'Replica set', - rs_state: 'State', - }, - }, - }, - { - id: 'filterByValue', - options: { - filters: [ - { - config: { - id: 'equal', - options: { - value: 'configsvr', - }, - }, - fieldName: 'Role', - }, - ], - match: 'all', - type: 'include', - }, - }, - ], -}; - -local mongosRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'mongos', - collapsed: false, -}; - -local mongosNodesPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'table', - title: 'mongos nodes', - description: 'An inventory table for mongos nodes in the environment.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - custom: { - align: 'center', - cellOptions: { - type: 'auto', - }, - filterable: false, - inspect: false, - }, - mappings: [ - { - options: { - '1': { - index: 0, - text: 'Primary', - }, - '2': { - index: 1, - text: 'Secondary', - }, - }, - type: 'value', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - }, - overrides: [ - { - matcher: { - id: 'byName', - options: 'cl_role', - }, - properties: [ - { - id: 'custom.width', - value: 150, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_state', - }, - properties: [ - { - id: 'custom.width', - value: 100, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'rs_nm', - }, - properties: [ - { - id: 'custom.width', - value: 250, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'cl_name', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - { - matcher: { - id: 'byName', - options: 'group_id', - }, - properties: [ - { - id: 'custom.width', - value: 300, - }, - ], - }, - ], - }, - options: { - cellHeight: 'md', - footer: { - countRows: false, - enablePagination: false, - fields: '', - reducer: [ - 'sum', - ], - show: false, - }, - showHeader: true, - }, - pluginVersion: '10.2.0-59981', - transformations: [ - { - id: 'reduce', - options: { - labelsToFields: true, - reducers: [ - 'lastNotNull', - ], - }, - }, - { - id: 'organize', - options: { - excludeByName: { - Field: true, - 'Last *': true, - __name__: true, - job: true, - org_id: true, - process_port: true, - rs_state: true, - }, - indexByName: { - Field: 6, - 'Last *': 11, - __name__: 7, - cl_name: 1, - cl_role: 2, - group_id: 0, - instance: 3, - job: 8, - org_id: 9, - process_port: 10, - rs_nm: 4, - rs_state: 5, - }, - renameByName: { - cl_name: 'Cluster', - cl_role: 'Role', - group_id: 'Group', - instance: 'Node', - rs_nm: 'Replica set', - }, - }, - }, - { - id: 'filterByValue', - options: { - filters: [ - { - config: { - id: 'equal', - options: { - value: 'mongos', - }, - }, - fieldName: 'Role', - }, - ], - match: 'all', - type: 'include', - }, - }, - ], -}; - -local performanceRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Performance', - collapsed: false, -}; - -local hardwareIOPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(hardware_disk_metrics_read_count{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - ), - prometheus.target( - 'sum (rate(hardware_disk_metrics_write_count{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Hardware I/O', - description: "The number of read and write I/O's processed.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'iops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local hardwareIOWaitTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(hardware_disk_metrics_read_time_milliseconds{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(hardware_disk_metrics_write_time_milliseconds{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Hardware I/O wait time / $__interval', - description: 'The amount of time spent waiting for I/O requests.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local hardwareCPUInterruptServiceTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(hardware_system_cpu_irq_milliseconds{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Hardware CPU interrupt service time / $__interval', - description: 'The amount of time spent servicing CPU interrupts.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local memoryUsedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (mongodb_mem_resident{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - RAM', - format='time_series', - ), - prometheus.target( - 'sum (mongodb_mem_virtual{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - virtual', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Memory used', - description: 'The amount of RAM and virtual memory being used.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'mbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local diskSpaceUsagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - '(sum (hardware_disk_metrics_disk_space_used_bytes{job=~"$job",cl_name=~"$cl_name"}) by(cl_name)) / (clamp_min(sum (hardware_disk_metrics_disk_space_free_bytes{job=~"$job",cl_name=~"$cl_name"}) by(cl_name) + sum (hardware_disk_metrics_disk_space_used_bytes{job=~"$job",cl_name=~"$cl_name"}) by(cl_name),0.1))', - datasource=promDatasource, - legendFormat='{{cl_name}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Disk space usage', - description: 'The percentage of hardware space used.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 1, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local networkRequestsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(mongodb_network_numRequests{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Network requests', - description: 'The number of distinct requests that the server has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local networkThroughputPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - received', - format='time_series', - ), - prometheus.target( - 'sum (rate(mongodb_network_bytesOut{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - sent', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Network throughput', - description: 'The number of bytes sent and received over network connections.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'Bps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, - transformations: [], -}; - -local slowRequestsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(mongodb_network_numSlowDNSOperations{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - DNS', - format='time_series', - ), - prometheus.target( - 'sum (rate(mongodb_network_numSlowSSLOperations{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - SSL', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Slow requests', - description: 'The rate of DNS and SSL operations that took longer than 1 second.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local operationsRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Operations', - collapsed: false, -}; - -local connectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(mongodb_connections_totalCreated{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Connections', - description: 'The rate of incoming connections to the cluster created.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'conns/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local readwriteOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (rate(mongodb_opLatencies_reads_ops{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - ), - prometheus.target( - 'sum (rate(mongodb_opLatencies_writes_ops{job=~"$job",cl_name=~"$cl_name"}[$__rate_interval])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Read/Write operations', - description: 'The number of read and write operations.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local operationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(mongodb_opcounters_insert{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - insert', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_opcounters_query{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - query', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_opcounters_update{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - update', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_opcounters_delete{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - delete', - format='time_series', - interval='1m', - ), - ], - type: 'piechart', - title: 'Operations', - description: 'The number of insert, query, update, and delete operations.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - }, - mappings: [], - unit: 'none', - }, - overrides: [], - }, - options: { - displayLabels: [], - legend: { - displayMode: 'table', - placement: 'bottom', - showLegend: true, - values: [ - 'value', - ], - }, - pieType: 'pie', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local readwriteLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(mongodb_opLatencies_reads_latency{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_opLatencies_writes_latency{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Read/Write latency / $__interval', - description: 'The latency for read and write operations.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'µs', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local locksRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Locks', - collapsed: false, -}; - -local currentQueuePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (mongodb_globalLock_currentQueue_readers{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - ), - prometheus.target( - 'sum (mongodb_globalLock_currentQueue_writers{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Current queue', - description: 'The number of reads and writes queued because of a lock.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local activeClientOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (mongodb_globalLock_activeClients_readers{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - reads', - format='time_series', - ), - prometheus.target( - 'sum (mongodb_globalLock_activeClients_writers{job=~"$job",cl_name=~"$cl_name"}) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Active client operations', - description: 'The number of reads and writes being actively performed by connected clients.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local databaseDeadlocksPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(mongodb_locks_Database_deadlockCount_W{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_deadlockCount_w{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_deadlockCount_R{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_deadlockCount_r{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Database deadlocks / $__interval', - description: 'The number of deadlocks for database level locks.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local databaseWaitsAcquiringLockPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum (increase(mongodb_locks_Database_acquireWaitCount_W{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_acquireWaitCount_w{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_acquireWaitCount_R{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'sum (increase(mongodb_locks_Database_acquireWaitCount_r{job=~"$job",cl_name=~"$cl_name"}[$__interval:])) by (cl_name)', - datasource=promDatasource, - legendFormat='{{cl_name}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Database waits acquiring lock / $__interval', - description: 'The number of times lock acquisitions encounter waits for database level locks.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'mongodb-atlas-cluster-overview.json': - dashboard.new( - 'MongoDB Atlas cluster overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(mongodb_network_bytesIn,job)', - label='Job', - refresh=2, - multi=true, - sort=0 - ), - template.new( - 'cl_name', - promDatasource, - 'label_values(mongodb_network_bytesIn{job=~"$job"},cl_name)', - label='Atlas cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='MongoDB Atlas dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addPanels( - [ - shardRow { gridPos: { h: 1, w: 24, x: 0, y: 0 } }, - shardNodesPanel { gridPos: { h: 6, w: 24, x: 0, y: 1 } }, - configRow { gridPos: { h: 1, w: 24, x: 0, y: 7 } }, - configNodesPanel { gridPos: { h: 6, w: 24, x: 0, y: 8 } }, - mongosRow { gridPos: { h: 1, w: 24, x: 0, y: 14 } }, - mongosNodesPanel { gridPos: { h: 6, w: 24, x: 0, y: 15 } }, - performanceRow { gridPos: { h: 1, w: 24, x: 0, y: 21 } }, - hardwareIOPanel { gridPos: { h: 6, w: 6, x: 0, y: 22 } }, - hardwareIOWaitTimePanel { gridPos: { h: 6, w: 6, x: 6, y: 22 } }, - hardwareCPUInterruptServiceTimePanel { gridPos: { h: 6, w: 6, x: 12, y: 22 } }, - memoryUsedPanel { gridPos: { h: 6, w: 6, x: 18, y: 22 } }, - diskSpaceUsagePanel { gridPos: { h: 6, w: 6, x: 0, y: 28 } }, - networkRequestsPanel { gridPos: { h: 6, w: 6, x: 6, y: 28 } }, - networkThroughputPanel { gridPos: { h: 6, w: 6, x: 12, y: 28 } }, - slowRequestsPanel { gridPos: { h: 6, w: 6, x: 18, y: 28 } }, - operationsRow { gridPos: { h: 1, w: 24, x: 0, y: 34 } }, - connectionsPanel { gridPos: { h: 6, w: 12, x: 0, y: 35 } }, - readwriteOperationsPanel { gridPos: { h: 12, w: 6, x: 12, y: 35 } }, - operationsPanel { gridPos: { h: 12, w: 6, x: 18, y: 35 } }, - readwriteLatencyPanel { gridPos: { h: 6, w: 12, x: 0, y: 41 } }, - locksRow { gridPos: { h: 1, w: 24, x: 0, y: 47 } }, - currentQueuePanel { gridPos: { h: 6, w: 12, x: 0, y: 48 } }, - activeClientOperationsPanel { gridPos: { h: 6, w: 12, x: 12, y: 48 } }, - databaseDeadlocksPanel { gridPos: { h: 6, w: 12, x: 0, y: 54 } }, - databaseWaitsAcquiringLockPanel { gridPos: { h: 6, w: 12, x: 12, y: 54 } }, - ] - ), - }, -} diff --git a/mongodb-atlas-mixin/dashboards/mongodb-atlas-elections-overview.libsonnet b/mongodb-atlas-mixin/dashboards/mongodb-atlas-elections-overview.libsonnet deleted file mode 100644 index eb26cc605..000000000 --- a/mongodb-atlas-mixin/dashboards/mongodb-atlas-elections-overview.libsonnet +++ /dev/null @@ -1,920 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'mongodb-atlas-elections-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local stepupElectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_stepUpCmd_called{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - called', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_electionMetrics_stepUpCmd_successful{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - successful', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Step-up elections / $__interval', - description: 'The number of elections called and elections won by the node when the primary stepped down.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local priorityElectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_priorityTakeover_called{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - called', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_electionMetrics_priorityTakeover_successful{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - successful', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Priority elections / $__interval', - description: 'The number of elections called and elections won by the node when it had a higher priority than the primary node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local takeoverElectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_catchUpTakeover_called{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - called', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_electionMetrics_catchUpTakeover_successful{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - successful', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Takeover elections / $__interval', - description: 'The number of elections called and elections won by the node when it was more current than the primary node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local timeoutElectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_electionTimeout_called{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - called', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_electionMetrics_electionTimeout_successful{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - successful', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Timeout elections / $__interval', - description: 'The number of elections called and elections won by the node when the time it took to reach the primary node exceeded the election timeout limit.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catchupsRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - ), - ], - type: 'row', - title: 'Catch-ups', -}; - -local catchupsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_numCatchUps{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Catch-ups / $__interval', - description: 'The number of times the node had to catch up to the highest known oplog entry.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catchupsSkippedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_numCatchUpsSkipped{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Catch-ups skipped / $__interval', - description: 'The number of times the node skipped the catch up process when it was the newly elected primary.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catchupsSucceededPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_numCatchUpsSucceeded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Catch-ups succeeded / $__interval', - description: 'The number of times the node succeeded in catching up when it was the newly elected primary.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catchupsFailedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_numCatchUpsFailedWithError{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Catch-ups failed / $__interval', - description: 'The number of times the node failed in catching up when it was the newly elected primary.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catchupTimeoutsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_electionMetrics_numCatchUpsTimedOut{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Catch-up timeouts / $__interval', - description: 'The number of times the node timed out during the catch-up process when it was the newly elected primary.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local averageCatchupOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_electionMetrics_averageCatchUpOps{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Average catch-up operations', - description: 'The average number of operations done during the catch-up process when this node is the newly elected primary.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'mongodb-atlas-elections-overview.json': - dashboard.new( - 'MongoDB Atlas election overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(mongodb_network_bytesIn,job)', - label='Job', - refresh=2, - multi=true, - sort=0 - ), - template.new( - 'cl_name', - promDatasource, - 'label_values(mongodb_network_bytesIn{job=~"$job"},cl_name)', - label='Atlas cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'rs', - promDatasource, - 'label_values(mongodb_network_bytesIn{cl_name=~"$cl_name"},rs_nm)', - label='Replica set', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(mongodb_network_bytesIn{rs_nm=~"$rs"},instance)', - label='Node', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='MongoDB Atlas dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addPanels( - [ - stepupElectionsPanel { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - priorityElectionsPanel { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - takeoverElectionsPanel { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, - timeoutElectionsPanel { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, - catchupsRow { gridPos: { h: 1, w: 24, x: 0, y: 16 } }, - catchupsPanel { gridPos: { h: 8, w: 12, x: 0, y: 17 } }, - catchupsSkippedPanel { gridPos: { h: 8, w: 12, x: 12, y: 17 } }, - catchupsSucceededPanel { gridPos: { h: 8, w: 12, x: 0, y: 25 } }, - catchupsFailedPanel { gridPos: { h: 8, w: 12, x: 12, y: 25 } }, - catchupTimeoutsPanel { gridPos: { h: 8, w: 12, x: 0, y: 33 } }, - averageCatchupOperationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 33 } }, - ] - ), - }, -} diff --git a/mongodb-atlas-mixin/dashboards/mongodb-atlas-operations-overview.libsonnet b/mongodb-atlas-mixin/dashboards/mongodb-atlas-operations-overview.libsonnet deleted file mode 100644 index 617e3a6e5..000000000 --- a/mongodb-atlas-mixin/dashboards/mongodb-atlas-operations-overview.libsonnet +++ /dev/null @@ -1,1338 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'mongodb-atlas-operations-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local queryOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_opcounters_query{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Query operations', - description: 'The rate of query operations the node has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local insertOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_opcounters_insert{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Insert operations', - description: 'The rate of insert operations the node has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local updateOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_opcounters_update{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Update operations', - description: 'The rate of update operations this node has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local deleteOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_opcounters_delete{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Delete operations', - description: 'The rate of delete operations this node has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local currentConnectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_connections_current{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Current connections', - description: 'The number of incoming connections from clients to the node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local activeConnectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_connections_active{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Active connections', - description: 'The number of connections that currently have operations in progress.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local readAndWriteOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_opLatencies_reads_ops{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - reads', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_opLatencies_writes_ops{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Read and write operations', - description: 'The rate of read and write operations performed by the node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local readAndWriteLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_opLatencies_reads_latency{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - reads', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_opLatencies_writes_latency{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - writes', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Read and write latency / $__interval', - description: 'The latency time for read and write operations performed by this node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'µs', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local locksRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Locks', -}; - -local databaseDeadlocksPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Database_deadlockCount_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_deadlockCount_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_deadlockCount_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_deadlockCount_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Database deadlocks / $__interval', - description: 'The number of deadlocks that have occurred for the database lock.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local databaseWaitCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Database_acquireWaitCount_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_acquireWaitCount_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_acquireWaitCount_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_acquireWaitCount_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Database wait count / $__interval', - description: 'The number of database lock acquisitions that had to wait.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local databaseWaitTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Database_timeAcquiringMicros_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_timeAcquiringMicros_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_timeAcquiringMicros_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Database_timeAcquiringMicros_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Database wait time / $__interval', - description: 'The time spent waiting for the database lock acquisition.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'µs', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local collectionDeadlocksPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Collection_deadlockCount_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_deadlockCount_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_deadlockCount_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_deadlockCount_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Collection deadlocks / $__interval', - description: 'The number of deadlocks that have occurred for the collection lock.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local collectionWaitCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Collection_acquireWaitCount_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_acquireWaitCount_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_acquireWaitCount_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_acquireWaitCount_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Collection wait count / $__interval', - description: 'The number of collection lock acquisitions that had to wait.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local collectionWaitTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_locks_Collection_timeAcquiringMicros_W{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_timeAcquiringMicros_w{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent exclusive', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_timeAcquiringMicros_R{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - shared', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_locks_Collection_timeAcquiringMicros_r{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - intent shared', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Collection wait time / $__interval', - description: 'The time spent waiting for the collection lock acquisition.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'µs', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'mongodb-atlas-operations-overview.json': - dashboard.new( - 'MongoDB Atlas operations overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(mongodb_network_bytesIn,job)', - label='Job', - refresh=2, - multi=true, - sort=0 - ), - template.new( - 'cl_name', - promDatasource, - 'label_values(mongodb_network_bytesIn{job=~"$job"},cl_name)', - label='Atlas cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'rs', - promDatasource, - 'label_values(mongodb_network_bytesIn{cl_name=~"$cl_name"},rs_nm)', - label='Replica set', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(mongodb_network_bytesIn{rs_nm=~"$rs"},instance)', - label='Node', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='MongoDB Atlas dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addPanels( - [ - queryOperationsPanel { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - insertOperationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - updateOperationsPanel { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, - deleteOperationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, - currentConnectionsPanel { gridPos: { h: 8, w: 12, x: 0, y: 16 } }, - activeConnectionsPanel { gridPos: { h: 8, w: 12, x: 12, y: 16 } }, - readAndWriteOperationsPanel { gridPos: { h: 8, w: 12, x: 0, y: 24 } }, - readAndWriteLatencyPanel { gridPos: { h: 8, w: 12, x: 12, y: 24 } }, - locksRow { gridPos: { h: 1, w: 24, x: 0, y: 32 } }, - databaseDeadlocksPanel { gridPos: { h: 8, w: 8, x: 0, y: 33 } }, - databaseWaitCountPanel { gridPos: { h: 8, w: 8, x: 8, y: 33 } }, - databaseWaitTimePanel { gridPos: { h: 8, w: 8, x: 16, y: 33 } }, - collectionDeadlocksPanel { gridPos: { h: 8, w: 8, x: 0, y: 41 } }, - collectionWaitCountPanel { gridPos: { h: 8, w: 8, x: 8, y: 41 } }, - collectionWaitTimePanel { gridPos: { h: 8, w: 8, x: 16, y: 41 } }, - ] - ), - }, -} diff --git a/mongodb-atlas-mixin/dashboards/mongodb-atlas-performance-overview.libsonnet b/mongodb-atlas-mixin/dashboards/mongodb-atlas-performance-overview.libsonnet deleted file mode 100644 index e49eb0afd..000000000 --- a/mongodb-atlas-mixin/dashboards/mongodb-atlas-performance-overview.libsonnet +++ /dev/null @@ -1,833 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'mongodb-atlas-performance-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local memoryPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'mongodb_mem_resident{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}} - RAM', - format='time_series', - ), - prometheus.target( - 'mongodb_mem_virtual{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}} - virtual', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Memory', - description: 'The amount of RAM and virtual memory being used by the database process.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'mbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local hardwareCPUInterruptServiceTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(hardware_system_cpu_irq_milliseconds{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Hardware CPU interrupt service time / $__interval', - description: 'The amount of time spent servicing CPU interrupts.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local diskSpacePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'hardware_disk_metrics_disk_space_free_bytes{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}} - free', - format='time_series', - ), - prometheus.target( - 'hardware_disk_metrics_disk_space_used_bytes{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}', - datasource=promDatasource, - legendFormat='{{instance}} - used', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Disk space', - description: "The amount of free and used disk space on this node's hardware.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'decbytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local diskSpaceUtilizationPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - '(hardware_disk_metrics_disk_space_used_bytes{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}) / clamp_min((hardware_disk_metrics_disk_space_free_bytes{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}) + (hardware_disk_metrics_disk_space_used_bytes{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}), 1)', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Disk space utilization', - description: "The disk space utilization for this node's hardware.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - max: 1, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'percentunit', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local networkRequestsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_network_numRequests{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Network requests', - description: 'The rate of distinct requests the node has received.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local slowNetworkRequestsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_network_numSlowDNSOperations{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - DNS', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_network_numSlowSSLOperations{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - SSL', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Slow network requests', - description: 'The rate of slow DNS and SSL operations received by this node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local networkThroughputPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - received', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_network_bytesOut{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - sent', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Network throughput', - description: 'The rate of bytes sent and received by the node over a network connection.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'Bps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local hardwareIOPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(hardware_disk_metrics_read_count{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - reads', - format='time_series', - ), - prometheus.target( - 'rate(hardware_disk_metrics_write_count{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - writes', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Hardware I/O', - description: "The rate of read and write I/O's processed by this node.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'iops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local hardwareIOWaitTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(hardware_disk_metrics_read_time_milliseconds{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - read', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(hardware_disk_metrics_write_time_milliseconds{job=~"$job",cl_name=~"$cl_name",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - write', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Hardware I/O wait time / $__interval', - description: "The amount of time the node has spent waiting for read and write I/O's to process.", - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: { - 'mongodb-atlas-performance-overview.json': - dashboard.new( - 'MongoDB Atlas performance overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(mongodb_network_bytesIn,job)', - label='Job', - refresh=2, - multi=true, - sort=0 - ), - template.new( - 'cl_name', - promDatasource, - 'label_values(mongodb_network_bytesIn{job=~"$job"},cl_name)', - label='Atlas cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'rs', - promDatasource, - 'label_values(mongodb_network_bytesIn{cl_name=~"$cl_name"},rs_nm)', - label='Replica set', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(mongodb_network_bytesIn{rs_nm=~"$rs"},instance)', - label='Node', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='MongoDB Atlas dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addPanels( - [ - memoryPanel { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - hardwareCPUInterruptServiceTimePanel { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - diskSpacePanel { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, - diskSpaceUtilizationPanel { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, - networkRequestsPanel { gridPos: { h: 8, w: 12, x: 0, y: 16 } }, - slowNetworkRequestsPanel { gridPos: { h: 8, w: 12, x: 12, y: 16 } }, - networkThroughputPanel { gridPos: { h: 8, w: 24, x: 0, y: 24 } }, - hardwareIOPanel { gridPos: { h: 8, w: 24, x: 0, y: 32 } }, - hardwareIOWaitTimePanel { gridPos: { h: 8, w: 24, x: 0, y: 40 } }, - ] - ), - }, -} diff --git a/mongodb-atlas-mixin/dashboards/mongodb-atlas-sharding-overview.libsonnet b/mongodb-atlas-mixin/dashboards/mongodb-atlas-sharding-overview.libsonnet deleted file mode 100644 index ee66a02da..000000000 --- a/mongodb-atlas-mixin/dashboards/mongodb-atlas-sharding-overview.libsonnet +++ /dev/null @@ -1,1335 +0,0 @@ -local g = (import 'grafana-builder/grafana.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local template = grafana.template; -local prometheus = grafana.prometheus; - -local dashboardUid = 'mongodb-atlas-sharding-overview'; - -local promDatasourceName = 'prometheus_datasource'; - -local promDatasource = { - uid: '${%s}' % promDatasourceName, -}; - -local staleConfigsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_countStaleConfigErrors{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Stale configs / $__interval', - description: 'Number of times that a thread hit a stale config exception and triggered a metadata refresh.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local chunkMigrationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_countRecipientMoveChunkStarted{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Chunk migrations / $__interval', - description: 'Chunk migration frequency for this node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local docsClonedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_countDocsClonedOnDonor{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - donor', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_shardingStatistics_countDocsClonedOnRecipient{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - recipient', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Docs cloned / $__interval', - description: 'The number of documents cloned on this node when it acted as primary for the donor and acted as primary for the recipient.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local criticalSectionTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_totalCriticalSectionTimeMillis{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Critical section time / $__interval', - description: 'The time taken by the catch-up and update metadata phases of a range migration, by this node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'ms', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local catalogCacheRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Catalog cache', - collapsed: false, -}; - -local refreshesStartedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_countIncrementalRefreshesStarted{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - incremental', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_countFullRefreshesStarted{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - full', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Refreshes started / $__interval', - description: 'The number of incremental and full refreshes that have started.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'normal', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local refreshesFailedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_countFailedRefreshes{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Refreshes failed / $__interval', - description: 'The number of full and incremental refreshes that have failed.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local cacheStaleConfigsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_countStaleConfigErrors{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Cache stale configs / $__interval', - description: 'The number of times that a thread hit a stale config exception for the catalog cache and triggered a metadata refresh.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local cacheEntriesPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_numDatabaseEntries{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - database', - format='time_series', - interval='1m', - ), - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_numCollectionEntries{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}} - collection', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Cache entries / $__interval', - description: 'The number of database and collection entries that are currently in the catalog cache.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'none', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local cacheRefreshTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'increase(mongodb_shardingStatistics_catalogCache_totalRefreshWaitTimeMicros{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__interval:])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Cache refresh time / $__interval', - description: 'The amount of time that threads had to wait for a refresh of the metadata.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'µs', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local cacheOperationsBlockedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_shardingStatistics_catalogCache_operationsBlockedByRefresh_countAllOperations{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}}', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Cache operations blocked', - description: 'The rate of operations that are blocked by a refresh of the catalog cache. Specific to mongos nodes found under replica set "none".', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local shardOperationsRow = { - datasource: promDatasource, - targets: [ - prometheus.target( - '', - datasource=promDatasource, - legendFormat='', - format='time_series', - ), - ], - type: 'row', - title: 'Shard operations', -}; - -local allShardsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_find_allShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - find', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_insert_allShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - insert', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_update_allShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - update', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_delete_allShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - delete', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_allShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - aggregate', - format='time_series', - ), - ], - type: 'timeseries', - title: 'All shards', - description: 'The rate of CRUD operations and aggregation commands run that targeted all shards. Specific to mongos nodes found under replica set "none".', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local manyShardsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_find_manyShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - find', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_insert_manyShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - insert', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_update_manyShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - update', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_delete_manyShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - delete', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_manyShards{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - aggregate', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Many shards', - description: 'The rate of CRUD operations and aggregation commands run that targeted more than 1 shard. Specific to mongos nodes found under replica set "none".', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local oneShardPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_find_oneShard{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - find', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_insert_oneShard{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - insert', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_update_oneShard{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - update', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_delete_oneShard{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - delete', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_oneShard{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - aggregate', - format='time_series', - ), - ], - type: 'timeseries', - title: 'One shard', - description: 'The rate of CRUD operations and aggregation commands run that targeted 1 shard. Specific to mongos nodes found under replica set "none".', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -local unshardedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_find_unsharded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - find', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_insert_unsharded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - insert', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_update_unsharded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - update', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_delete_unsharded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - delete', - format='time_series', - ), - prometheus.target( - 'rate(mongodb_shardingStatistics_numHostsTargeted_aggregate_unsharded{job=~"$job",cl_name=~"$cl_name",rs_nm=~"$rs",instance=~"$instance"}[$__rate_interval])', - datasource=promDatasource, - legendFormat='{{instance}} - aggregate', - format='time_series', - ), - ], - type: 'timeseries', - title: 'Unsharded', - description: 'The rate of CRUD operations and aggregation commands run on an unsharded collection. Specific to mongos nodes found under replica set "none".', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 10, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - insertNulls: false, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'never', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - ], - }, - unit: 'ops', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'right', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'desc', - }, - }, -}; - -{ - grafanaDashboards+:: - if $._config.enableShardingOverview then { - 'mongodb-atlas-sharding-overview.json': - dashboard.new( - 'MongoDB Atlas sharding overview', - time_from='%s' % $._config.dashboardPeriod, - tags=($._config.dashboardTags), - timezone='%s' % $._config.dashboardTimezone, - refresh='%s' % $._config.dashboardRefresh, - description='', - uid=dashboardUid, - ) - - .addTemplates( - [ - template.datasource( - promDatasourceName, - 'prometheus', - null, - label='Data Source', - refresh='load' - ), - template.new( - 'job', - promDatasource, - 'label_values(mongodb_network_bytesIn,job)', - label='Job', - refresh=2, - multi=true, - sort=0 - ), - template.new( - 'cl_name', - promDatasource, - 'label_values(mongodb_network_bytesIn{job=~"$job"},cl_name)', - label='Atlas cluster', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'rs', - promDatasource, - 'label_values(mongodb_network_bytesIn{cl_name=~"$cl_name"},rs_nm)', - label='Replica set', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - template.new( - 'instance', - promDatasource, - 'label_values(mongodb_network_bytesIn{rs_nm=~"$rs"},instance)', - label='Node', - refresh=2, - includeAll=true, - multi=true, - allValues='.+', - sort=0 - ), - ] - ) - .addLink(grafana.link.dashboards( - asDropdown=false, - title='MongoDB Atlas dashboards', - includeVars=true, - keepTime=true, - tags=($._config.dashboardTags), - )) - .addPanels( - [ - staleConfigsPanel { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, - chunkMigrationsPanel { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, - docsClonedPanel { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, - criticalSectionTimePanel { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, - catalogCacheRow { gridPos: { h: 1, w: 24, x: 0, y: 16 } }, - refreshesStartedPanel { gridPos: { h: 8, w: 12, x: 0, y: 17 } }, - refreshesFailedPanel { gridPos: { h: 8, w: 12, x: 12, y: 17 } }, - cacheStaleConfigsPanel { gridPos: { h: 8, w: 6, x: 0, y: 25 } }, - cacheEntriesPanel { gridPos: { h: 8, w: 6, x: 6, y: 25 } }, - cacheRefreshTimePanel { gridPos: { h: 8, w: 6, x: 12, y: 25 } }, - cacheOperationsBlockedPanel { gridPos: { h: 8, w: 6, x: 18, y: 25 } }, - shardOperationsRow { gridPos: { h: 1, w: 24, x: 0, y: 33 } }, - allShardsPanel { gridPos: { h: 8, w: 12, x: 0, y: 34 } }, - manyShardsPanel { gridPos: { h: 8, w: 12, x: 12, y: 34 } }, - oneShardPanel { gridPos: { h: 8, w: 12, x: 0, y: 42 } }, - unshardedPanel { gridPos: { h: 8, w: 12, x: 12, y: 42 } }, - ] - ), - } else {}, -} diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json index 8e7cfba03..0b7f4b498 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-cluster-overview.json @@ -1,909 +1,101 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, + "description": "Overview of MongoDB Atlas cluster metrics.", "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": true, "keepTime": true, - "tags": [ - "mongodb-atlas-mixin" - ], - "targetBlank": false, - "title": "MongoDB Atlas dashboards", - "type": "dashboards", - "url": "" - } - ], - "panels": [ - { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "Shard", - "type": "row" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "An inventory table for shard nodes in the environment.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "center", - "cellOptions": { - "type": "auto" - }, - "filterable": false, - "inspect": false - }, - "mappings": [ - { - "options": { - "1": { - "index": 0, - "text": "Primary" - }, - "2": { - "index": 1, - "text": "Secondary" - } - }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "cl_role" - }, - "properties": [ - { - "id": "custom.width", - "value": 150 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "rs_state" - }, - "properties": [ - { - "id": "custom.width", - "value": 100 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "rs_nm" - }, - "properties": [ - { - "id": "custom.width", - "value": 250 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cl_name" - }, - "properties": [ - { - "id": "custom.width", - "value": 300 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "group_id" - }, - "properties": [ - { - "id": "custom.width", - "value": 300 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "State" - }, - "properties": [ - { - "id": "custom.cellOptions", - "value": { - "type": "color-text" - } - }, - { - "id": "mappings", - "value": [ - { - "options": { - "1": { - "color": "green", - "index": 0, - "text": "Primary" - }, - "2": { - "color": "yellow", - "index": 1, - "text": "Secondary" - } - }, - "type": "value" - } - ] - } - ] - } - ] - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 3, - "options": { - "cellHeight": "md", - "footer": { - "countRows": false, - "enablePagination": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "10.2.0-59981", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "mongodb_network_bytesIn{job=~\"$job\",cl_name=~\"$cl_name\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "Shard nodes", - "transformations": [ - { - "id": "reduce", - "options": { - "labelsToFields": true, - "reducers": [ - "lastNotNull" - ] - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Field": true, - "Last *": true, - "__name__": true, - "job": true, - "org_id": true, - "process_port": true - }, - "indexByName": { - "Field": 6, - "Last *": 11, - "__name__": 7, - "cl_name": 1, - "cl_role": 2, - "group_id": 0, - "instance": 3, - "job": 8, - "org_id": 9, - "process_port": 10, - "rs_nm": 4, - "rs_state": 5 - }, - "renameByName": { - "cl_name": "Cluster", - "cl_role": "Role", - "group_id": "Group", - "instance": "Node", - "rs_nm": "Replica set", - "rs_state": "State" - } - } - }, - { - "id": "filterByValue", - "options": { - "filters": [ - { - "config": { - "id": "equal", - "options": { - "value": "shardsvr" - } - }, - "fieldName": "Role" - } - ], - "match": "all", - "type": "include" - } - } - ], - "type": "table" - }, - { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 7 - }, - "id": 4, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "Config", - "type": "row" + "title": "MongoDB Atlas elections overview", + "type": "link", + "url": "/d/mongodb-atlas-elections-overview" }, { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "An inventory table for config nodes in the environment.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "center", - "cellOptions": { - "type": "auto" - }, - "filterable": false, - "inspect": false - }, - "mappings": [ - { - "options": { - "1": { - "index": 0, - "text": "Primary" - }, - "2": { - "index": 1, - "text": "Secondary" - } - }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "cl_role" - }, - "properties": [ - { - "id": "custom.width", - "value": 150 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "rs_state" - }, - "properties": [ - { - "id": "custom.width", - "value": 100 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "rs_nm" - }, - "properties": [ - { - "id": "custom.width", - "value": 250 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cl_name" - }, - "properties": [ - { - "id": "custom.width", - "value": 300 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "group_id" - }, - "properties": [ - { - "id": "custom.width", - "value": 300 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "State" - }, - "properties": [ - { - "id": "custom.cellOptions", - "value": { - "type": "color-text" - } - }, - { - "id": "mappings", - "value": [ - { - "options": { - "1": { - "color": "green", - "index": 0, - "text": "Primary" - }, - "2": { - "color": "yellow", - "index": 1, - "text": "Secondary" - } - }, - "type": "value" - } - ] - } - ] - } - ] - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 5, - "options": { - "cellHeight": "md", - "footer": { - "countRows": false, - "enablePagination": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "10.2.0-59981", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "mongodb_network_bytesIn{job=~\"$job\",cl_name=~\"$cl_name\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "Config nodes", - "transformations": [ - { - "id": "reduce", - "options": { - "labelsToFields": true, - "reducers": [ - "lastNotNull" - ] - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Field": true, - "Last *": true, - "__name__": true, - "job": true, - "org_id": true, - "process_port": true - }, - "indexByName": { - "Field": 6, - "Last *": 11, - "__name__": 7, - "cl_name": 1, - "cl_role": 2, - "group_id": 0, - "instance": 3, - "job": 8, - "org_id": 9, - "process_port": 10, - "rs_nm": 4, - "rs_state": 5 - }, - "renameByName": { - "cl_name": "Cluster", - "cl_role": "Role", - "group_id": "Group", - "instance": "Node", - "rs_nm": "Replica set", - "rs_state": "State" - } - } - }, - { - "id": "filterByValue", - "options": { - "filters": [ - { - "config": { - "id": "equal", - "options": { - "value": "configsvr" - } - }, - "fieldName": "Role" - } - ], - "match": "all", - "type": "include" - } - } - ], - "type": "table" + "keepTime": true, + "title": "MongoDB Atlas operations overview", + "type": "link", + "url": "/d/mongodb-atlas-operations-overview" }, { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 14 - }, - "id": 6, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "mongos", - "type": "row" + "keepTime": true, + "title": "MongoDB Atlas performance overview", + "type": "link", + "url": "/d/mongodb-atlas-performance-overview" }, { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "An inventory table for mongos nodes in the environment.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "center", - "cellOptions": { - "type": "auto" - }, - "filterable": false, - "inspect": false - }, - "mappings": [ - { - "options": { - "1": { - "index": 0, - "text": "Primary" - }, - "2": { - "index": 1, - "text": "Secondary" - } - }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "cl_role" - }, - "properties": [ - { - "id": "custom.width", - "value": 150 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "rs_state" - }, - "properties": [ - { - "id": "custom.width", - "value": 100 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "rs_nm" - }, - "properties": [ - { - "id": "custom.width", - "value": 250 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cl_name" - }, - "properties": [ - { - "id": "custom.width", - "value": 300 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "group_id" - }, - "properties": [ - { - "id": "custom.width", - "value": 300 - } - ] - } - ] - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 15 - }, - "id": 7, - "options": { - "cellHeight": "md", - "footer": { - "countRows": false, - "enablePagination": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "10.2.0-59981", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "mongodb_network_bytesIn{job=~\"$job\",cl_name=~\"$cl_name\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "mongos nodes", - "transformations": [ - { - "id": "reduce", - "options": { - "labelsToFields": true, - "reducers": [ - "lastNotNull" - ] - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Field": true, - "Last *": true, - "__name__": true, - "job": true, - "org_id": true, - "process_port": true, - "rs_state": true - }, - "indexByName": { - "Field": 6, - "Last *": 11, - "__name__": 7, - "cl_name": 1, - "cl_role": 2, - "group_id": 0, - "instance": 3, - "job": 8, - "org_id": 9, - "process_port": 10, - "rs_nm": 4, - "rs_state": 5 - }, - "renameByName": { - "cl_name": "Cluster", - "cl_role": "Role", - "group_id": "Group", - "instance": "Node", - "rs_nm": "Replica set" - } - } - }, - { - "id": "filterByValue", - "options": { - "filters": [ - { - "config": { - "id": "equal", - "options": { - "value": "mongos" - } - }, - "fieldName": "Role" - } - ], - "match": "all", - "type": "include" - } - } - ], - "type": "table" - }, + "keepTime": true, + "title": "MongoDB Atlas sharding overview", + "type": "link", + "url": "/d/mongodb-atlas-sharding-overview" + } + ], + "panels": [ { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 21 + "y": 0 }, - "id": 8, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "Performance", + "id": 1, + "panels": [ ], + "title": "Hardware", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of read and write I/O's processed.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "iops" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, + "h": 12, + "w": 12, "x": 0, - "y": 22 + "y": 1 }, - "id": 9, + "id": 2, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(hardware_disk_metrics_read_count{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(hardware_disk_metrics_read_count{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "instant": false, + "legendFormat": "{{cl_name}} - reads", + "refId": "Disk read operations" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(hardware_disk_metrics_write_count{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(hardware_disk_metrics_write_count{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "instant": false, + "legendFormat": "{{cl_name}} - writes", + "refId": "Disk write operations" } ], "title": "Hardware I/O", @@ -911,97 +103,59 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The amount of time spent waiting for I/O requests.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "ms" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 22 + "h": 12, + "w": 12, + "x": 12, + "y": 1 }, - "id": 10, + "id": 3, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(hardware_disk_metrics_read_time_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(hardware_disk_metrics_read_time_milliseconds{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "legendFormat": "{{cl_name}} - reads", + "refId": "Disk read I/O time" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(hardware_disk_metrics_write_time_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(hardware_disk_metrics_write_time_milliseconds{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "legendFormat": "{{cl_name}} - writes", + "refId": "Disk write I/O time" } ], "title": "Hardware I/O wait time / $__interval", @@ -1009,87 +163,44 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The amount of time spent servicing CPU interrupts.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 10 }, "unit": "ms" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 22 + "h": 8, + "w": 12, + "x": 0, + "y": 9 }, - "id": 11, + "id": 4, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(hardware_system_cpu_irq_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(hardware_system_cpu_irq_milliseconds{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}}" + "legendFormat": "{{cl_name}}", + "refId": "CPU interrupt service time" } ], "title": "Hardware CPU interrupt service time / $__interval", @@ -1097,271 +208,170 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The amount of RAM and virtual memory being used.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 10 }, "unit": "mbytes" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 22 + "h": 8, + "w": 12, + "x": 12, + "y": 9 }, - "id": 12, + "id": 5, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_mem_resident{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum by (job,cl_name) (\n mongodb_mem_resident{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - RAM" + "instant": false, + "legendFormat": "{{cl_name}} - RAM", + "refId": "Memory resident (RAM)" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_mem_virtual{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum by (job,cl_name) (\n mongodb_mem_virtual{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - virtual" + "instant": false, + "legendFormat": "{{cl_name}} - virtual", + "refId": "Memory virtual" } ], "title": "Memory used", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 21 + }, + "id": 6, + "panels": [ ], + "title": "Disk", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The percentage of hardware space used.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "fillOpacity": 10 }, - "mappings": [ ], "max": 1, "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "percentunit" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, + "h": 8, + "w": 24, "x": 0, - "y": 28 + "y": 22 }, - "id": 13, + "id": 7, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "(sum (hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\",cl_name=~\"$cl_name\"}) by(cl_name)) / (clamp_min(sum (hardware_disk_metrics_disk_space_free_bytes{job=~\"$job\",cl_name=~\"$cl_name\"}) by(cl_name) + sum (hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\",cl_name=~\"$cl_name\"}) by(cl_name),0.1))", + "expr": "100 * ((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"})) / clamp_min((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"})) + (sum without (disk_name) (hardware_disk_metrics_disk_space_free_bytes{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"})), 1))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}}" + "instant": false, + "legendFormat": "{{cl_name}}", + "refId": "Disk space utilization" } ], "title": "Disk space usage", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 30 + }, + "id": 8, + "panels": [ ], + "title": "Network", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of distinct requests that the server has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 10 }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 28 + "h": 8, + "w": 12, + "x": 0, + "y": 31 }, - "id": 14, + "id": 9, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_network_numRequests{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_network_numRequests{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}}" + "instant": false, + "legendFormat": "{{cl_name}}", + "refId": "Network requests" } ], "title": "Network requests", @@ -1369,192 +379,112 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of bytes sent and received over network connections.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "Bps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, + "h": 8, + "w": 12, "x": 12, - "y": 28 + "y": 31 }, - "id": 15, + "id": 10, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_network_bytesIn{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - received" + "instant": false, + "legendFormat": "{{cl_name}} - received", + "refId": "Network bytes received" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_network_bytesOut{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_network_bytesOut{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - sent" + "instant": false, + "legendFormat": "{{cl_name}} - sent", + "refId": "Network bytes sent" } ], "title": "Network throughput", - "transformations": [ ], "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The rate of DNS and SSL operations that took longer than 1 second.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 10 }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 28 + "h": 8, + "w": 24, + "x": 0, + "y": 39 }, - "id": 16, + "id": 11, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_network_numSlowDNSOperations{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_network_numSlowDNSOperations{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - DNS" + "instant": false, + "legendFormat": "{{cl_name}} - DNS", + "refId": "Slow DNS operations" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_network_numSlowSSLOperations{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_network_numSlowSSLOperations{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - SSL" + "instant": false, + "legendFormat": "{{cl_name}} - SSL", + "refId": "Slow SSL operations" } ], "title": "Slow requests", @@ -1562,208 +492,127 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 34 + "y": 47 }, - "id": 17, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "title": "Operations", + "id": 12, + "panels": [ ], + "title": "Connections", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The rate of incoming connections to the cluster created.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 10 }, "unit": "conns/s" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 12, + "h": 8, + "w": 24, "x": 0, - "y": 35 + "y": 48 }, - "id": 18, + "id": 13, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_connections_totalCreated{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_connections_totalCreated{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}}" + "instant": false, + "legendFormat": "{{cl_name}}", + "refId": "Connections created" } ], "title": "Connections", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 56 + }, + "id": 14, + "panels": [ ], + "title": "Operations", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of read and write operations.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "ops" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 12, - "w": 6, - "x": 12, - "y": 35 + "h": 8, + "w": 12, + "x": 0, + "y": 57 }, - "id": 19, + "id": 15, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_opLatencies_reads_ops{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opLatencies_reads_ops{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "instant": false, + "legendFormat": "{{cl_name}} - reads", + "refId": "Read operation count" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (rate(mongodb_opLatencies_writes_ops{job=~\"$job\",cl_name=~\"$cl_name\"}[$__rate_interval])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opLatencies_writes_ops{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "instant": false, + "legendFormat": "{{cl_name}} - writes", + "refId": "Write operation count" } ], "title": "Read/Write operations", @@ -1771,96 +620,84 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of insert, query, update, and delete operations.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [ ], - "unit": "none" - }, - "overrides": [ ] - }, "gridPos": { - "h": 12, - "w": 6, - "x": 18, - "y": 35 + "h": 8, + "w": 12, + "x": 12, + "y": 57 }, - "id": 20, + "id": 16, "options": { - "displayLabels": [ ], "legend": { "displayMode": "table", "placement": "bottom", - "showLegend": true, "values": [ "value" ] }, - "pieType": "pie", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false + ] }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opcounters_insert{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opcounters_insert{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - insert" + "legendFormat": "{{cl_name}} - insert", + "refId": "Insert operations" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opcounters_query{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opcounters_query{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - query" + "legendFormat": "{{cl_name}} - query", + "refId": "Query operations" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opcounters_update{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opcounters_update{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - update" + "legendFormat": "{{cl_name}} - update", + "refId": "Update operations" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opcounters_delete{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opcounters_delete{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - delete" + "legendFormat": "{{cl_name}} - delete", + "refId": "Delete operations" } ], "title": "Operations", @@ -1868,97 +705,59 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The latency for read and write operations.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "µs" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 6, - "w": 12, + "h": 8, + "w": 24, "x": 0, - "y": 41 + "y": 65 }, - "id": 21, + "id": 17, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opLatencies_reads_latency{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opLatencies_reads_latency{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "legendFormat": "{{cl_name}} - reads", + "refId": "Read operation latency" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_opLatencies_writes_latency{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opLatencies_writes_latency{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "legendFormat": "{{cl_name}} - writes", + "refId": "Write operation latency" } ], "title": "Read/Write latency / $__interval", @@ -1966,121 +765,69 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 47 + "y": 73 }, - "id": 22, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "id": 18, + "panels": [ ], "title": "Locks", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of reads and writes queued because of a lock.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 48 + "y": 74 }, - "id": 23, + "id": 19, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_globalLock_currentQueue_readers{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum by (job,cl_name) (\n mongodb_globalLock_currentQueue_readers{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "instant": false, + "legendFormat": "{{cl_name}} - reads", + "refId": "Global lock queue - readers" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_globalLock_currentQueue_writers{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum by (job,cl_name) (\n mongodb_globalLock_currentQueue_writers{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "instant": false, + "legendFormat": "{{cl_name}} - writes", + "refId": "Global lock queue - writers" } ], "title": "Current queue", @@ -2088,95 +835,56 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of reads and writes being actively performed by connected clients.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 48 + "y": 74 }, - "id": 24, + "id": 20, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_globalLock_activeClients_readers{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum by (job,cl_name) (\n mongodb_globalLock_activeClients_readers{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - reads" + "instant": false, + "legendFormat": "{{cl_name}} - reads", + "refId": "Global lock active clients - readers" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (mongodb_globalLock_activeClients_writers{job=~\"$job\",cl_name=~\"$cl_name\"}) by (cl_name)", + "expr": "sum by (job,cl_name) (\n mongodb_globalLock_activeClients_writers{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - writes" + "instant": false, + "legendFormat": "{{cl_name}} - writes", + "refId": "Global lock active clients - writers" } ], "title": "Active client operations", @@ -2184,117 +892,85 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of deadlocks for database level locks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 0, - "y": 54 + "y": 82 }, - "id": 25, + "id": 21, "options": { "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_deadlockCount_W{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_deadlockCount_W{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - exclusive" + "legendFormat": "{{cl_name}} - exclusive", + "refId": "Database exclusive lock deadlocks" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_deadlockCount_w{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_deadlockCount_w{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - intent exclusive" + "legendFormat": "{{cl_name}} - intent exclusive", + "refId": "Database intent exclusive lock deadlocks" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_deadlockCount_R{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_deadlockCount_R{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - shared" + "legendFormat": "{{cl_name}} - shared", + "refId": "Database shared lock deadlocks" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_deadlockCount_r{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_deadlockCount_r{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - intent shared" + "legendFormat": "{{cl_name}} - intent shared", + "refId": "Database intent shared lock deadlocks" } ], "title": "Database deadlocks / $__interval", @@ -2302,117 +978,85 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of times lock acquisitions encounter waits for database level locks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + } + } }, "gridPos": { - "h": 6, + "h": 8, "w": 12, "x": 12, - "y": 54 + "y": 82 }, - "id": 26, + "id": 22, "options": { "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "right" }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_acquireWaitCount_W{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_acquireWaitCount_W{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - exclusive" + "legendFormat": "{{cl_name}} - exclusive", + "refId": "Database exclusive lock wait count" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_acquireWaitCount_w{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_acquireWaitCount_w{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - intent exclusive" + "legendFormat": "{{cl_name}} - intent exclusive", + "refId": "Database intent exclusive lock wait count" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_acquireWaitCount_R{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_acquireWaitCount_R{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - shared" + "legendFormat": "{{cl_name}} - shared", + "refId": "Database shared lock wait count" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "sum (increase(mongodb_locks_Database_acquireWaitCount_r{job=~\"$job\",cl_name=~\"$cl_name\"}[$__interval:])) by (cl_name)", + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_acquireWaitCount_r{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{cl_name}} - intent shared" + "legendFormat": "{{cl_name}} - intent shared", + "refId": "Database intent shared lock wait count" } ], "title": "Database waits acquiring lock / $__interval", @@ -2420,68 +1064,63 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "mongodb-atlas-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", - "name": "prometheus_datasource", - "options": [ ], + "label": "Data source", + "name": "datasource", "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": null, - "current": { }, + "allValue": ".+", "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn,job)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Atlas cluster", + "label": "Cl_name", "multi": true, "name": "cl_name", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{job=~\"$job\"},cl_name)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\"}, cl_name)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" } ] }, @@ -2489,33 +1128,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "MongoDB Atlas cluster overview", - "uid": "mongodb-atlas-cluster-overview", - "version": 0 + "uid": "mongodb-atlas-cluster-overview" } \ No newline at end of file diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json index 1a76b3003..15232cb29 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-elections-overview.json @@ -1,536 +1,368 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, + "description": "Overview of MongoDB Atlas election metrics.", "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": true, "keepTime": true, - "tags": [ - "mongodb-atlas-mixin" - ], - "targetBlank": false, - "title": "MongoDB Atlas dashboards", - "type": "dashboards", - "url": "" + "title": "MongoDB Atlas cluster overview", + "type": "link", + "url": "/d/mongodb-atlas-cluster-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas operations overview", + "type": "link", + "url": "/d/mongodb-atlas-operations-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas performance overview", + "type": "link", + "url": "/d/mongodb-atlas-performance-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas sharding overview", + "type": "link", + "url": "/d/mongodb-atlas-sharding-overview" } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Step-up elections", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of elections called and elections won by the node when the primary stepped down.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 10 }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_stepUpCmd_called{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_stepUpCmd_called{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - called" + "legendFormat": "{{instance}} - called", + "refId": "Step-up elections called" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_stepUpCmd_successful{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_stepUpCmd_successful{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - successful" + "legendFormat": "{{instance}} - successful", + "refId": "Step-up elections successful" } ], "title": "Step-up elections / $__interval", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 9 + }, + "id": 3, + "panels": [ ], + "title": "Priority takeover elections", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of elections called and elections won by the node when it had a higher priority than the primary node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 10 }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 0 + "x": 0, + "y": 10 }, - "id": 3, + "id": 4, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_priorityTakeover_called{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_priorityTakeover_called{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - called" + "legendFormat": "{{instance}} - called", + "refId": "Priority takeover elections called" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_priorityTakeover_successful{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_priorityTakeover_successful{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - successful" + "legendFormat": "{{instance}} - successful", + "refId": "Priority takeover elections successful" } ], "title": "Priority elections / $__interval", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 18 + }, + "id": 5, + "panels": [ ], + "title": "Catch-up takeover elections", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of elections called and elections won by the node when it was more current than the primary node.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 10 }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 19 }, - "id": 4, + "id": 6, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_catchUpTakeover_called{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_catchUpTakeover_called{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - called" + "legendFormat": "{{instance}} - called", + "refId": "Catch-up takeover elections called" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_catchUpTakeover_successful{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_catchUpTakeover_successful{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - successful" + "legendFormat": "{{instance}} - successful", + "refId": "Catch-up takeover elections successful" } ], "title": "Takeover elections / $__interval", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 27 + }, + "id": 7, + "panels": [ ], + "title": "Election timeout", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of elections called and elections won by the node when the time it took to reach the primary node exceeded the election timeout limit.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 10 }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 8 + "x": 0, + "y": 28 }, - "id": 5, + "id": 8, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_electionTimeout_called{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_electionTimeout_called{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - called" + "legendFormat": "{{instance}} - called", + "refId": "Election timeout elections called" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_electionTimeout_successful{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_electionTimeout_successful{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - successful" + "legendFormat": "{{instance}} - successful", + "refId": "Election timeout elections successful" } ], "title": "Timeout elections / $__interval", "type": "timeseries" }, { - "datasource": { - "uid": "${prometheus_datasource}" - }, + "collapsed": false, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 16 + "y": 36 }, - "id": 6, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], + "id": 9, + "panels": [ ], "title": "Catch-ups", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of times the node had to catch up to the highest known oplog entry.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 10 }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, - "w": 12, + "w": 8, "x": 0, - "y": 17 + "y": 37 }, - "id": 7, + "id": 10, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_numCatchUps{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_numCatchUps{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "legendFormat": "{{instance}}", + "refId": "Number of catch-ups" } ], "title": "Catch-ups / $__interval", @@ -538,88 +370,44 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of times the node skipped the catch up process when it was the newly elected primary.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 10 }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, - "w": 12, - "x": 12, - "y": 17 + "w": 8, + "x": 8, + "y": 37 }, - "id": 8, + "id": 11, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_numCatchUpsSkipped{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_numCatchUpsSkipped{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "legendFormat": "{{instance}}", + "refId": "Number of catch-ups skipped" } ], "title": "Catch-ups skipped / $__interval", @@ -627,87 +415,44 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of times the node succeeded in catching up when it was the newly elected primary.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 10 }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, - "w": 12, - "x": 0, - "y": 25 + "w": 8, + "x": 16, + "y": 37 }, - "id": 9, + "id": 12, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_numCatchUpsSucceeded{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_numCatchUpsSucceeded{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "legendFormat": "{{instance}}", + "refId": "Number of catch-ups succeeded" } ], "title": "Catch-ups succeeded / $__interval", @@ -715,87 +460,44 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of times the node failed in catching up when it was the newly elected primary.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 10 }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, - "w": 12, - "x": 12, - "y": 25 + "w": 8, + "x": 0, + "y": 45 }, - "id": 10, + "id": 13, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_numCatchUpsFailedWithError{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_numCatchUpsFailedWithError{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "legendFormat": "{{instance}}", + "refId": "Number of catch-ups failed with error" } ], "title": "Catch-ups failed / $__interval", @@ -803,87 +505,44 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of times the node timed out during the catch-up process when it was the newly elected primary.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 10 }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, - "w": 12, - "x": 0, - "y": 33 + "w": 8, + "x": 8, + "y": 45 }, - "id": 11, + "id": 14, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_electionMetrics_numCatchUpsTimedOut{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_electionMetrics_numCatchUpsTimedOut{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "legendFormat": "{{instance}}", + "refId": "Number of catch-up timeouts" } ], "title": "Catch-up timeouts / $__interval", @@ -891,86 +550,43 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The average number of operations done during the catch-up process when this node is the newly elected primary.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 10 }, "unit": "none" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, - "w": 12, - "x": 12, - "y": 33 + "w": 8, + "x": 16, + "y": 45 }, - "id": 12, + "id": 15, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "mongodb_electionMetrics_averageCatchUpOps{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}", + "expr": "mongodb_electionMetrics_averageCatchUpOps{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Average catch-up operations" } ], "title": "Average catch-up operations", @@ -978,112 +594,63 @@ } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "mongodb-atlas-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", - "name": "prometheus_datasource", - "options": [ ], + "label": "Data source", + "name": "datasource", "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": null, - "current": { }, + "allValue": ".+", "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn,job)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Atlas cluster", + "label": "Cl_name", "multi": true, "name": "cl_name", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{job=~\"$job\"},cl_name)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": true, - "label": "Replica set", - "multi": true, - "name": "rs", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{cl_name=~\"$cl_name\"},rs_nm)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\"}, cl_name)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Node", + "label": "Instance", "multi": true, "name": "instance", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{rs_nm=~\"$rs\"},instance)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" } ] }, @@ -1091,33 +658,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", - "title": "MongoDB Atlas election overview", - "uid": "mongodb-atlas-elections-overview", - "version": 0 + "title": "MongoDB Atlas elections overview", + "uid": "mongodb-atlas-elections-overview" } \ No newline at end of file diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json index b1c6bdb91..3a6d27bba 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-operations-overview.json @@ -1,290 +1,178 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, + "description": "Overview of MongoDB Atlas operation metrics.", "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": true, "keepTime": true, - "tags": [ - "mongodb-atlas-mixin" - ], - "targetBlank": false, - "title": "MongoDB Atlas dashboards", - "type": "dashboards", - "url": "" + "title": "MongoDB Atlas cluster overview", + "type": "link", + "url": "/d/mongodb-atlas-cluster-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas elections overview", + "type": "link", + "url": "/d/mongodb-atlas-elections-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas performance overview", + "type": "link", + "url": "/d/mongodb-atlas-performance-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas sharding overview", + "type": "link", + "url": "/d/mongodb-atlas-sharding-overview" } ], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Operation counters - cluster", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The rate of query operations the node has received.", + "description": "The number of insert operations.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 10 }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 0 + "y": 1 }, "id": 2, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opcounters_query{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opcounters_insert{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - insert", + "refId": "Insert operations" } ], - "title": "Query operations", + "title": "Insert operations", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The rate of insert operations the node has received.", + "description": "The number of query operations.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "fillOpacity": 10 }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 0 + "y": 1 }, "id": 3, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opcounters_insert{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opcounters_query{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - query", + "refId": "Query operations" } ], - "title": "Insert operations", + "title": "Query operations", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The rate of update operations this node has received.", + "description": "The number of update operations.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 10 }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 9 }, "id": 4, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opcounters_update{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opcounters_update{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - update", + "refId": "Update operations" } ], "title": "Update operations", @@ -292,1303 +180,896 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The rate of delete operations this node has received.", + "description": "The number of delete operations.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "fillOpacity": 10 }, - "unit": "ops" - }, - "overrides": [ ] + "unit": "none" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 8 + "y": 9 }, "id": 5, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opcounters_delete{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opcounters_delete{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - delete", + "refId": "Delete operations" } ], "title": "Delete operations", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 17 + }, + "id": 6, + "panels": [ ], + "title": "Operation counters - instance", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The number of incoming connections from clients to the node.", + "description": "The rate of insert operations the node has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "mode": "normal" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "ops" + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 18 }, - "id": 6, + "id": 7, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "mongodb_connections_current{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}", + "expr": "rate(mongodb_opcounters_insert{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Insert operations by instance" } ], - "title": "Current connections", + "title": "Insert operations", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The number of connections that currently have operations in progress.", + "description": "The rate of query operations the node has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "mode": "normal" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "ops" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 18 }, - "id": 7, + "id": 8, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "mongodb_connections_active{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}", + "expr": "rate(mongodb_opcounters_query{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Query operations by instance" } ], - "title": "Active connections", + "title": "Query operations", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The rate of read and write operations performed by the node.", + "description": "The rate of update operations this node has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, "unit": "ops" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 24 + "y": 26 }, - "id": 8, + "id": 9, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_opLatencies_reads_ops{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_opcounters_update{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - reads" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "rate(mongodb_opLatencies_writes_ops{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - writes" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Update operations by instance" } ], - "title": "Read and write operations", + "title": "Update operations", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The latency time for read and write operations performed by this node.", + "description": "The rate of delete operations this node has received.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "µs" - }, - "overrides": [ ] + "unit": "ops" + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 24 + "y": 26 }, - "id": 9, + "id": 10, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_opLatencies_reads_latency{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - reads" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_opLatencies_writes_latency{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_opcounters_delete{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - writes" + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Delete operations by instance" } ], - "title": "Read and write latency / $__interval", + "title": "Delete operations", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 34 + }, + "id": 11, + "panels": [ ], + "title": "Operation latencies - cluster", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of read operations.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "ops" + } }, "gridPos": { - "h": 1, - "w": 24, + "h": 8, + "w": 12, "x": 0, - "y": 32 + "y": 35 }, - "id": 10, + "id": 12, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opLatencies_reads_ops{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" + "instant": false, + "legendFormat": "{{cl_name}} - reads", + "refId": "Read operation count" } ], - "title": "Locks", - "type": "row" + "title": "Read operation count", + "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The number of deadlocks that have occurred for the database lock.", + "description": "The number of write operations.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "fillOpacity": 10 }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "ops" + } }, "gridPos": { "h": 8, - "w": 8, - "x": 0, - "y": 33 + "w": 12, + "x": 12, + "y": 35 }, - "id": 11, + "id": 13, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_deadlockCount_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opLatencies_writes_ops{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" + "instant": false, + "legendFormat": "{{cl_name}} - writes", + "refId": "Write operation count" + } + ], + "title": "Write operation count", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The latency time for read operations performed by this node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 }, - "expr": "increase(mongodb_locks_Database_deadlockCount_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" - }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 14, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_deadlockCount_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opLatencies_reads_latency{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" - }, + "legendFormat": "{{cl_name}} - reads", + "refId": "Read operation latency" + } + ], + "title": "Read operation latency / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The latency time for write operations performed by this node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 15, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_deadlockCount_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "sum by (job,cl_name) (\n rate(mongodb_opLatencies_writes_latency{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "legendFormat": "{{cl_name}} - writes", + "refId": "Write operation latency" } ], - "title": "Database deadlocks / $__interval", + "title": "Write operation latency / $__interval", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 51 + }, + "id": 16, + "panels": [ ], + "title": "Operation latencies - instance", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The number of database lock acquisitions that had to wait.", + "description": "The number of read operations per instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "ops" + } }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 33 + "w": 12, + "x": 0, + "y": 52 }, - "id": 12, + "id": 17, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_locks_Database_acquireWaitCount_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_acquireWaitCount_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_opLatencies_reads_ops{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_locks_Database_acquireWaitCount_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_locks_Database_acquireWaitCount_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "instant": false, + "legendFormat": "{{instance}} - reads", + "refId": "Read operation count by instance" } ], - "title": "Database wait count / $__interval", + "title": "Read operation count", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The time spent waiting for the database lock acquisition.", + "description": "The number of write operations per instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "µs" - }, - "overrides": [ ] + "unit": "ops" + } }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 33 + "w": 12, + "x": 12, + "y": 52 }, - "id": 13, + "id": 18, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_opLatencies_writes_ops{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_locks_Database_timeAcquiringMicros_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "instant": false, + "legendFormat": "{{instance}} - writes", + "refId": "Write operation count by instance" } ], - "title": "Database wait time / $__interval", + "title": "Write operation count", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The number of deadlocks that have occurred for the collection lock.", + "description": "The latency time for read operations performed per instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "µs" + } }, "gridPos": { "h": 8, - "w": 8, + "w": 12, "x": 0, - "y": 41 + "y": 60 }, - "id": 14, + "id": 19, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_locks_Collection_deadlockCount_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_locks_Collection_deadlockCount_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_deadlockCount_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_opLatencies_reads_latency{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_locks_Collection_deadlockCount_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "legendFormat": "{{instance}} - reads", + "refId": "Read operation latency by instance" } ], - "title": "Collection deadlocks / $__interval", + "title": "Read operation latency / $__interval", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The number of collection lock acquisitions that had to wait.", + "description": "The latency time for write operations performed per instance.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [ ] + "unit": "µs" + } }, "gridPos": { "h": 8, - "w": 8, - "x": 8, - "y": 41 + "w": 12, + "x": 12, + "y": 60 }, - "id": 15, + "id": 20, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_acquireWaitCount_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_opLatencies_writes_latency{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "increase(mongodb_locks_Collection_acquireWaitCount_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" + "legendFormat": "{{instance}} - writes", + "refId": "Write operation latency by instance" + } + ], + "title": "Write operation latency / $__interval", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 68 + }, + "id": 21, + "panels": [ ], + "title": "Average latencies", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Average latency per read operation.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 }, - "expr": "increase(mongodb_locks_Collection_acquireWaitCount_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" - }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 69 + }, + "id": 22, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_acquireWaitCount_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "sum (increase(mongodb_opLatencies_reads_latency{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__interval:])) by (job, cl_name) / clamp_min(sum (increase(mongodb_opLatencies_reads_ops{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__interval:])) by (job, cl_name), 1)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "legendFormat": "{{cl_name}} - reads", + "refId": "Average read latency" } ], - "title": "Collection wait count / $__interval", + "title": "Average read latency / $__interval", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The time spent waiting for the collection lock acquisition.", + "description": "Average latency per write operation.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "fillOpacity": 10 }, "unit": "µs" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, - "w": 8, - "x": 16, - "y": 41 + "w": 12, + "x": 12, + "y": 69 }, - "id": 16, + "id": 23, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_W{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "sum (increase(mongodb_opLatencies_writes_latency{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__interval:])) by (job, cl_name) / clamp_min(sum (increase(mongodb_opLatencies_writes_ops{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__interval:])) by (job, cl_name), 1)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - exclusive" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" + "legendFormat": "{{cl_name}} - writes", + "refId": "Average write latency" + } + ], + "title": "Average write latency / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Average latency per read operation by instance.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 }, - "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_w{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent exclusive" - }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 77 + }, + "id": 24, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_R{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_opLatencies_reads_latency{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__interval:]) / clamp_min(increase(mongodb_opLatencies_reads_ops{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__interval:]), 1)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - shared" - }, + "legendFormat": "{{instance}} - reads", + "refId": "Average read latency by instance" + } + ], + "title": "Average read latency / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Average latency per write operation by instance.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 77 + }, + "id": 25, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(mongodb_locks_Collection_timeAcquiringMicros_r{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__interval:])", + "expr": "increase(mongodb_opLatencies_writes_latency{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__interval:]) / clamp_min(increase(mongodb_opLatencies_writes_ops{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__interval:]), 1)", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - intent shared" + "legendFormat": "{{instance}} - writes", + "refId": "Average write latency by instance" } ], - "title": "Collection wait time / $__interval", + "title": "Average write latency / $__interval", "type": "timeseries" } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "mongodb-atlas-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", - "name": "prometheus_datasource", - "options": [ ], + "label": "Data source", + "name": "datasource", "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": null, - "current": { }, + "allValue": ".+", "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn,job)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Atlas cluster", + "label": "Cl_name", "multi": true, "name": "cl_name", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{job=~\"$job\"},cl_name)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": true, - "label": "Replica set", - "multi": true, - "name": "rs", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{cl_name=~\"$cl_name\"},rs_nm)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\"}, cl_name)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Node", + "label": "Instance", "multi": true, "name": "instance", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{rs_nm=~\"$rs\"},instance)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" } ] }, @@ -1596,33 +1077,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "MongoDB Atlas operations overview", - "uid": "mongodb-atlas-operations-overview", - "version": 0 + "uid": "mongodb-atlas-operations-overview" } \ No newline at end of file diff --git a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json index f722decee..276ea8594 100644 --- a/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json +++ b/mongodb-atlas-mixin/dashboards_out/mongodb-atlas-performance-overview.json @@ -1,987 +1,1785 @@ { - "__inputs": [ ], - "__requires": [ ], "annotations": { "list": [ ] }, - "description": "", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, + "description": "Overview of MongoDB Atlas performance metrics.", "links": [ { - "asDropdown": false, - "icon": "external link", - "includeVars": true, "keepTime": true, - "tags": [ - "mongodb-atlas-mixin" + "title": "MongoDB Atlas cluster overview", + "type": "link", + "url": "/d/mongodb-atlas-cluster-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas elections overview", + "type": "link", + "url": "/d/mongodb-atlas-elections-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas operations overview", + "type": "link", + "url": "/d/mongodb-atlas-operations-overview" + }, + { + "keepTime": true, + "title": "MongoDB Atlas sharding overview", + "type": "link", + "url": "/d/mongodb-atlas-sharding-overview" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Connections", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The current number of active connections.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "mongodb_connections_current{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Current connections" + } + ], + "title": "Current connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The current number of connections with operations in progress.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "mongodb_connections_active{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{instance}}", + "refId": "Active connections" + } + ], + "title": "Active connections", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 9 + }, + "id": 4, + "panels": [ ], + "title": "Database lock deadlocks - cluster", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of database exclusive lock deadlocks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 5, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_deadlockCount_W{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - exclusive", + "refId": "Database exclusive lock deadlocks" + } + ], + "title": "Database exclusive lock deadlocks / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of database intent-exclusive lock deadlocks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 6, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_deadlockCount_w{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - intent exclusive", + "refId": "Database intent exclusive lock deadlocks" + } + ], + "title": "Database intent-exclusive lock deadlocks / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of database shared lock deadlocks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 7, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_deadlockCount_R{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - shared", + "refId": "Database shared lock deadlocks" + } + ], + "title": "Database shared lock deadlocks / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of database intent-shared lock deadlocks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 8, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_deadlockCount_r{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - intent shared", + "refId": "Database intent shared lock deadlocks" + } + ], + "title": "Database intent-shared lock deadlocks / $__interval", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 26 + }, + "id": 9, + "panels": [ ], + "title": "Database lock deadlocks - instance", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of database exclusive lock deadlocks per instance.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 10, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_deadlockCount_W{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Database exclusive lock deadlocks by instance" + } + ], + "title": "Database exclusive lock deadlocks / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of database intent-exclusive lock deadlocks per instance.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 11, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_deadlockCount_w{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Database intent exclusive lock deadlocks by instance" + } + ], + "title": "Database intent-exclusive lock deadlocks / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of database shared lock deadlocks per instance.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 12, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_deadlockCount_R{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - shared", + "refId": "Database shared lock deadlocks by instance" + } + ], + "title": "Database shared lock deadlocks / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of database intent-shared lock deadlocks per instance.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 13, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_deadlockCount_r{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Database intent shared lock deadlocks by instance" + } + ], + "title": "Database intent-shared lock deadlocks / $__interval", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 43 + }, + "id": 14, + "panels": [ ], + "title": "Database lock wait counts - cluster", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of times lock acquisitions encounter waits for database exclusive locks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 44 + }, + "id": 15, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_acquireWaitCount_W{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - exclusive", + "refId": "Database exclusive lock wait count" + } + ], + "title": "Database exclusive lock wait count / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of times lock acquisitions encounter waits for database intent-exclusive locks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 44 + }, + "id": 16, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_acquireWaitCount_w{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - intent exclusive", + "refId": "Database intent exclusive lock wait count" + } + ], + "title": "Database intent-exclusive lock wait count / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of times lock acquisitions encounter waits for database shared locks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 17, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_acquireWaitCount_R{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - shared", + "refId": "Database shared lock wait count" + } + ], + "title": "Database shared lock wait count / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of times lock acquisitions encounter waits for database intent-shared locks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 18, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (job,cl_name) (\n rate(mongodb_locks_Database_acquireWaitCount_r{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])\n)", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{cl_name}} - intent shared", + "refId": "Database intent shared lock wait count" + } + ], + "title": "Database intent-shared lock wait count / $__interval", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 60 + }, + "id": 19, + "panels": [ ], + "title": "Database lock wait counts - instance", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of times lock acquisitions encounter waits for database exclusive locks per instance.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 61 + }, + "id": 20, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_acquireWaitCount_W{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Database exclusive lock wait count by instance" + } + ], + "title": "Database exclusive lock wait count / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of times lock acquisitions encounter waits for database intent-exclusive locks per instance.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 61 + }, + "id": 21, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_acquireWaitCount_w{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Database intent exclusive lock wait count by instance" + } + ], + "title": "Database intent-exclusive lock wait count / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of times lock acquisitions encounter waits for database shared locks per instance.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 69 + }, + "id": 22, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_acquireWaitCount_R{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - shared", + "refId": "Database shared lock wait count by instance" + } + ], + "title": "Database shared lock wait count / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of times lock acquisitions encounter waits for database intent-shared locks per instance.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 69 + }, + "id": 23, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_acquireWaitCount_r{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Database intent shared lock wait count by instance" + } + ], + "title": "Database intent-shared lock wait count / $__interval", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 77 + }, + "id": 24, + "panels": [ ], + "title": "Database lock acquisition time", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The time spent acquiring database exclusive locks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 78 + }, + "id": 25, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_timeAcquiringMicros_W{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Database exclusive lock acquisition time" + } + ], + "title": "Database exclusive lock acquisition time / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The time spent acquiring database intent-exclusive locks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 78 + }, + "id": 26, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_timeAcquiringMicros_w{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Database intent exclusive lock acquisition time" + } + ], + "title": "Database intent-exclusive lock acquisition time / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The time spent acquiring database shared locks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 86 + }, + "id": 27, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(mongodb_locks_Database_timeAcquiringMicros_R{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - shared", + "refId": "Database shared lock acquisition time" + } ], - "targetBlank": false, - "title": "MongoDB Atlas dashboards", - "type": "dashboards", - "url": "" - } - ], - "panels": [ + "title": "Database shared lock acquisition time / $__interval", + "type": "timeseries" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The amount of RAM and virtual memory being used by the database process.", + "description": "The time spent acquiring database intent-shared locks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "mode": "normal" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "mbytes" - }, - "overrides": [ ] + "unit": "µs" + } }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 86 }, - "id": 2, + "id": 28, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "mongodb_mem_resident{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}", + "expr": "rate(mongodb_locks_Database_timeAcquiringMicros_r{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - RAM" - }, + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Database intent shared lock acquisition time" + } + ], + "title": "Database intent-shared lock acquisition time / $__interval", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 94 + }, + "id": 29, + "panels": [ ], + "title": "Collection lock deadlocks", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of collection exclusive lock deadlocks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 95 + }, + "id": 30, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "mongodb_mem_virtual{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}", + "expr": "rate(mongodb_locks_Collection_deadlockCount_W{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - virtual" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Collection exclusive lock deadlocks" } ], - "title": "Memory", + "title": "Collection exclusive lock deadlocks / $__interval", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The amount of time spent servicing CPU interrupts.", + "description": "The number of collection intent-exclusive lock deadlocks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "mode": "normal" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ms" - }, - "overrides": [ ] + } + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 0 + "y": 95 }, - "id": 3, + "id": 31, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(hardware_system_cpu_irq_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_locks_Collection_deadlockCount_w{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Collection intent exclusive lock deadlocks" } ], - "title": "Hardware CPU interrupt service time / $__interval", + "title": "Collection intent-exclusive lock deadlocks / $__interval", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The amount of free and used disk space on this node's hardware.", + "description": "The number of collection shared lock deadlocks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "mode": "normal" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "decbytes" - }, - "overrides": [ ] + } + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 103 }, - "id": 4, + "id": 32, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "hardware_disk_metrics_disk_space_free_bytes{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}", + "expr": "rate(mongodb_locks_Collection_deadlockCount_R{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - free" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - used" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - shared", + "refId": "Collection shared lock deadlocks" } ], - "title": "Disk space", + "title": "Collection shared lock deadlocks / $__interval", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The disk space utilization for this node's hardware.", + "description": "The number of collection intent-shared lock deadlocks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "mode": "normal" } - }, - "mappings": [ ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ ] + } + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 8 + "y": 103 }, - "id": 5, + "id": 33, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "(hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) / clamp_min((hardware_disk_metrics_disk_space_free_bytes{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}) + (hardware_disk_metrics_disk_space_used_bytes{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}), 1)", + "expr": "rate(mongodb_locks_Collection_deadlockCount_r{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Collection intent shared lock deadlocks" } ], - "title": "Disk space utilization", + "title": "Collection intent-shared lock deadlocks / $__interval", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 111 + }, + "id": 34, + "panels": [ ], + "title": "Collection lock wait counts", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The rate of distinct requests the node has received.", + "description": "The number of times lock acquisitions encounter waits for collection exclusive locks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "mode": "normal" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] + } + } }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 112 }, - "id": 6, + "id": 35, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_network_numRequests{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_locks_Collection_acquireWaitCount_W{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Collection exclusive lock wait count" } ], - "title": "Network requests", + "title": "Collection exclusive lock wait count / $__interval", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The rate of slow DNS and SSL operations received by this node.", + "description": "The number of times lock acquisitions encounter waits for collection intent-exclusive locks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "mode": "normal" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "reqps" - }, - "overrides": [ ] + } + } }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 112 }, - "id": 7, + "id": 36, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "rate(mongodb_network_numSlowDNSOperations{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - DNS" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_network_numSlowSSLOperations{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_locks_Collection_acquireWaitCount_w{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - SSL" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Collection intent exclusive lock wait count" } ], - "title": "Slow network requests", + "title": "Collection intent-exclusive lock wait count / $__interval", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The rate of bytes sent and received by the node over a network connection.", + "description": "The number of times lock acquisitions encounter waits for collection shared locks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "Bps" - }, - "overrides": [ ] + } + } }, "gridPos": { "h": 8, - "w": 24, + "w": 12, "x": 0, - "y": 24 + "y": 120 }, - "id": 8, + "id": 37, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_network_bytesIn{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_locks_Collection_acquireWaitCount_R{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - received" - }, + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - shared", + "refId": "Collection shared lock wait count" + } + ], + "title": "Collection shared lock wait count / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The number of times lock acquisitions encounter waits for collection intent-shared locks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 120 + }, + "id": 38, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(mongodb_network_bytesOut{job=~\"$job\",cl_name=~\"$cl_name\",rs_nm=~\"$rs\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_locks_Collection_acquireWaitCount_r{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - sent" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - intent shared", + "refId": "Collection intent shared lock wait count" } ], - "title": "Network throughput", + "title": "Collection intent-shared lock wait count / $__interval", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 128 + }, + "id": 39, + "panels": [ ], + "title": "Collection lock acquisition time", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The rate of read and write I/O's processed by this node.", + "description": "The time spent acquiring collection exclusive locks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "iops" - }, - "overrides": [ ] + "unit": "µs" + } }, "gridPos": { "h": 8, - "w": 24, + "w": 12, "x": 0, - "y": 32 + "y": 129 }, - "id": 9, + "id": 40, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(hardware_disk_metrics_read_count{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_locks_Collection_timeAcquiringMicros_W{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - reads" - }, + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - exclusive", + "refId": "Collection exclusive lock acquisition time" + } + ], + "title": "Collection exclusive lock acquisition time / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The time spent acquiring collection intent-exclusive locks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 129 + }, + "id": 41, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "rate(hardware_disk_metrics_write_count{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(mongodb_locks_Collection_timeAcquiringMicros_w{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} - writes" + "instant": false, + "interval": "1m", + "legendFormat": "{{instance}} - intent exclusive", + "refId": "Collection intent exclusive lock acquisition time" } ], - "title": "Hardware I/O", + "title": "Collection intent-exclusive lock acquisition time / $__interval", "type": "timeseries" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "The amount of time the node has spent waiting for read and write I/O's to process.", + "description": "The time spent acquiring collection shared locks.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, "stacking": { - "group": "A", "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "ms" - }, - "overrides": [ ] + "unit": "µs" + } }, "gridPos": { "h": 8, - "w": 24, + "w": 12, "x": 0, - "y": 40 + "y": 137 }, - "id": 10, + "id": 42, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(hardware_disk_metrics_read_time_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_locks_Collection_timeAcquiringMicros_R{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - read" - }, + "legendFormat": "{{instance}} - shared", + "refId": "Collection shared lock acquisition time" + } + ], + "title": "Collection shared lock acquisition time / $__interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The time spent acquiring collection intent-shared locks.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "stacking": { + "mode": "normal" + } + }, + "unit": "µs" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 137 + }, + "id": 43, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ { "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "expr": "increase(hardware_disk_metrics_write_time_milliseconds{job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\"}[$__interval:])", + "expr": "rate(mongodb_locks_Collection_timeAcquiringMicros_r{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\",instance=~\"$instance\", rs_nm=~\"$rs\"}[$__rate_interval])", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{instance}} - write" + "legendFormat": "{{instance}} - intent shared", + "refId": "Collection intent shared lock acquisition time" } ], - "title": "Hardware I/O wait time / $__interval", + "title": "Collection intent-shared lock acquisition time / $__interval", "type": "timeseries" } ], "refresh": "1m", - "rows": [ ], - "schemaVersion": 14, - "style": "dark", + "schemaVersion": 39, "tags": [ "mongodb-atlas-mixin" ], "templating": { "list": [ { - "current": { }, - "hide": 0, - "label": "Data Source", - "name": "prometheus_datasource", - "options": [ ], + "label": "Data source", + "name": "datasource", "query": "prometheus", - "refresh": 1, "regex": "", "type": "datasource" }, { - "allValue": null, - "current": { }, + "allValue": ".+", "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Job", "multi": true, "name": "job", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn,job)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\"}, job)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Atlas cluster", + "label": "Cl_name", "multi": true, "name": "cl_name", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{job=~\"$job\"},cl_name)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { }, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "hide": 0, - "includeAll": true, - "label": "Replica set", - "multi": true, - "name": "rs", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{cl_name=~\"$cl_name\"},rs_nm)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\"}, cl_name)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" }, { "allValue": ".+", - "current": { }, "datasource": { - "uid": "${prometheus_datasource}" + "type": "prometheus", + "uid": "${datasource}" }, - "hide": 0, "includeAll": true, - "label": "Node", + "label": "Instance", "multi": true, "name": "instance", - "options": [ ], - "query": "label_values(mongodb_network_bytesIn{rs_nm=~\"$rs\"},instance)", + "query": "label_values(mongodb_network_bytesIn{job=~\"$job\", cl_name=~\"$cl_name\",job=~\"$job\",cl_name=~\"$cl_name\"}, instance)", "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 1, + "type": "query" } ] }, @@ -989,33 +1787,7 @@ "from": "now-30m", "to": "now" }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, "timezone": "default", "title": "MongoDB Atlas performance overview", - "uid": "mongodb-atlas-performance-overview", - "version": 0 + "uid": "mongodb-atlas-performance-overview" } \ No newline at end of file diff --git a/mongodb-atlas-mixin/g.libsonnet b/mongodb-atlas-mixin/g.libsonnet new file mode 100644 index 000000000..ba90fd9b0 --- /dev/null +++ b/mongodb-atlas-mixin/g.libsonnet @@ -0,0 +1,3 @@ +// grafonnet must be imported with "g" alias +local g = import './vendor/grafonnet-v11.0.0/main.libsonnet'; +g diff --git a/mongodb-atlas-mixin/jsonnetfile.json b/mongodb-atlas-mixin/jsonnetfile.json index 65cebf84b..46d5af011 100644 --- a/mongodb-atlas-mixin/jsonnetfile.json +++ b/mongodb-atlas-mixin/jsonnetfile.json @@ -1,15 +1,24 @@ { - "version": 1, - "dependencies": [ - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" - } - }, - "version": "master" + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet", + "subdir": "gen/grafonnet-v11.0.0" } - ], - "legacyImports": true + }, + "version": "main" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" + } + }, + "version": "master" + } + ], + "legacyImports": true } diff --git a/mongodb-atlas-mixin/links.libsonnet b/mongodb-atlas-mixin/links.libsonnet new file mode 100644 index 000000000..72f0f4293 --- /dev/null +++ b/mongodb-atlas-mixin/links.libsonnet @@ -0,0 +1,22 @@ +local g = import './g.libsonnet'; + +{ + new(this): { + local link = g.dashboard.link, + clusterOverview: + link.link.new('MongoDB Atlas cluster overview', '/d/' + this.config.uid + '-cluster-overview') + + link.link.options.withKeepTime(true), + electionsOverview: + link.link.new('MongoDB Atlas elections overview', '/d/' + this.config.uid + '-elections-overview') + + link.link.options.withKeepTime(true), + operationsOverview: + link.link.new('MongoDB Atlas operations overview', '/d/' + this.config.uid + '-operations-overview') + + link.link.options.withKeepTime(true), + performanceOverview: + link.link.new('MongoDB Atlas performance overview', '/d/' + this.config.uid + '-performance-overview') + + link.link.options.withKeepTime(true), + shardingOverview: + link.link.new('MongoDB Atlas sharding overview', '/d/' + this.config.uid + '-sharding-overview') + + link.link.options.withKeepTime(true), + }, +} diff --git a/mongodb-atlas-mixin/main.libsonnet b/mongodb-atlas-mixin/main.libsonnet new file mode 100644 index 000000000..a60bfd42c --- /dev/null +++ b/mongodb-atlas-mixin/main.libsonnet @@ -0,0 +1,49 @@ +local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; +local dashboards = import './dashboards.libsonnet'; +local links = import './links.libsonnet'; +local panels = import './panels.libsonnet'; +local rows = import './rows.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + withConfigMixin(config): { + config+: config, + }, + + new(): { + + local this = self, + config: config, + + signals: + { + [sig]: commonlib.signals.unmarshallJsonMulti( + this.config.signals[sig], + type=this.config.metricsSource + ) + for sig in std.objectFields(this.config.signals) + }, + + grafana: { + variables: commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='mongodb_network_bytesIn', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ), + annotations: {}, + links: links.new(this), + panels: panels.new(this), + dashboards: dashboards.new(this), + rows: rows.new(this), + }, + + prometheus: { + alerts: alerts.new(this), + recordingRules: {}, + }, + }, +} diff --git a/mongodb-atlas-mixin/mixin.libsonnet b/mongodb-atlas-mixin/mixin.libsonnet index 4d987cf31..de54de64b 100644 --- a/mongodb-atlas-mixin/mixin.libsonnet +++ b/mongodb-atlas-mixin/mixin.libsonnet @@ -1,3 +1,12 @@ -(import 'dashboards/dashboards.libsonnet') + -(import 'alerts/alerts.libsonnet') + -(import 'config.libsonnet') +local mongodbAtlaslib = import './main.libsonnet'; + +local mongodbAtlas = + mongodbAtlaslib.new() + + mongodbAtlaslib.withConfigMixin({}); + +// populate monitoring-mixin: +{ + grafanaDashboards+:: mongodbAtlas.grafana.dashboards, + prometheusAlerts+:: mongodbAtlas.prometheus.alerts, + prometheusRules+:: mongodbAtlas.prometheus.recordingRules, +} diff --git a/mongodb-atlas-mixin/panels.libsonnet b/mongodb-atlas-mixin/panels.libsonnet new file mode 100644 index 000000000..b1b5da130 --- /dev/null +++ b/mongodb-atlas-mixin/panels.libsonnet @@ -0,0 +1,1173 @@ +local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + new(this): { + local signals = this.signals, + + // + // Inventory table panels (for shard, config, mongos nodes) + // + + shardNodesTable: + g.panel.table.new('Shard nodes') + + g.panel.table.panelOptions.withDescription('An inventory table for shard nodes in the environment.') + + g.panel.table.queryOptions.withTargets([ + g.query.prometheus.new( + '${prometheus_datasource}', + 'mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}' + ) + + g.query.prometheus.withInstant(true), + ]) + + g.panel.table.queryOptions.withTransformations([ + { id: 'reduce', options: { labelsToFields: true, reducers: ['lastNotNull'] } }, + { id: 'organize', options: { + excludeByName: { Field: true, 'Last *': true, __name__: true, job: true, org_id: true, process_port: true }, + indexByName: { Field: 6, 'Last *': 11, __name__: 7, cl_name: 1, cl_role: 2, group_id: 0, instance: 3, job: 8, org_id: 9, process_port: 10, rs_nm: 4, rs_state: 5 }, + renameByName: { cl_name: 'Cluster', cl_role: 'Role', group_id: 'Group', instance: 'Node', rs_nm: 'Replica set', rs_state: 'State' }, + } }, + { id: 'filterByValue', options: { filters: [{ config: { id: 'equal', options: { value: 'shardsvr' } }, fieldName: 'Role' }], match: 'all', type: 'include' } }, + ]) + + g.panel.table.standardOptions.color.withMode('thresholds') + + g.panel.table.standardOptions.withMappings([ + g.panel.table.standardOptions.mapping.ValueMap.withType() + + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ + '1': { index: 0, text: 'Primary' }, + '2': { index: 1, text: 'Secondary' }, + }), + ]) + + g.panel.table.standardOptions.withOverrides([ + g.panel.table.fieldOverride.byName.new('cl_role') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 150), + g.panel.table.fieldOverride.byName.new('rs_state') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100), + g.panel.table.fieldOverride.byName.new('rs_nm') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), + g.panel.table.fieldOverride.byName.new('cl_name') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + g.panel.table.fieldOverride.byName.new('group_id') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + g.panel.table.fieldOverride.byName.new('State') + + g.panel.table.fieldOverride.byName.withProperty('custom.cellOptions', { type: 'color-text' }) + + g.panel.table.fieldOverride.byName.withProperty('mappings', [ + { options: { '1': { color: 'green', index: 0, text: 'Primary' }, '2': { color: 'yellow', index: 1, text: 'Secondary' } }, type: 'value' }, + ]), + ]), + + configNodesTable: + g.panel.table.new('Config nodes') + + g.panel.table.panelOptions.withDescription('An inventory table for config nodes in the environment.') + + g.panel.table.queryOptions.withTargets([ + g.query.prometheus.new( + '${prometheus_datasource}', + 'mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}' + ) + + g.query.prometheus.withInstant(true), + ]) + + g.panel.table.queryOptions.withTransformations([ + { id: 'reduce', options: { labelsToFields: true, reducers: ['lastNotNull'] } }, + { id: 'organize', options: { + excludeByName: { Field: true, 'Last *': true, __name__: true, job: true, org_id: true, process_port: true }, + indexByName: { Field: 6, 'Last *': 11, __name__: 7, cl_name: 1, cl_role: 2, group_id: 0, instance: 3, job: 8, org_id: 9, process_port: 10, rs_nm: 4, rs_state: 5 }, + renameByName: { cl_name: 'Cluster', cl_role: 'Role', group_id: 'Group', instance: 'Node', rs_nm: 'Replica set', rs_state: 'State' }, + } }, + { id: 'filterByValue', options: { filters: [{ config: { id: 'equal', options: { value: 'configsvr' } }, fieldName: 'Role' }], match: 'all', type: 'include' } }, + ]) + + g.panel.table.standardOptions.color.withMode('thresholds') + + g.panel.table.standardOptions.withMappings([ + g.panel.table.standardOptions.mapping.ValueMap.withType() + + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ + '1': { index: 0, text: 'Primary' }, + '2': { index: 1, text: 'Secondary' }, + }), + ]) + + g.panel.table.standardOptions.withOverrides([ + g.panel.table.fieldOverride.byName.new('cl_role') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 150), + g.panel.table.fieldOverride.byName.new('rs_state') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100), + g.panel.table.fieldOverride.byName.new('rs_nm') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), + g.panel.table.fieldOverride.byName.new('cl_name') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + g.panel.table.fieldOverride.byName.new('group_id') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + g.panel.table.fieldOverride.byName.new('State') + + g.panel.table.fieldOverride.byName.withProperty('custom.cellOptions', { type: 'color-text' }) + + g.panel.table.fieldOverride.byName.withProperty('mappings', [ + { options: { '1': { color: 'green', index: 0, text: 'Primary' }, '2': { color: 'yellow', index: 1, text: 'Secondary' } }, type: 'value' }, + ]), + ]), + + mongosNodesTable: + g.panel.table.new('mongos nodes') + + g.panel.table.panelOptions.withDescription('An inventory table for mongos nodes in the environment.') + + g.panel.table.queryOptions.withTargets([ + g.query.prometheus.new( + '${prometheus_datasource}', + 'mongodb_network_bytesIn{job=~"$job",cl_name=~"$cl_name"}' + ) + + g.query.prometheus.withInstant(true), + ]) + + g.panel.table.queryOptions.withTransformations([ + { id: 'reduce', options: { labelsToFields: true, reducers: ['lastNotNull'] } }, + { id: 'organize', options: { + excludeByName: { Field: true, 'Last *': true, __name__: true, job: true, org_id: true, process_port: true, rs_state: true }, + indexByName: { Field: 6, 'Last *': 11, __name__: 7, cl_name: 1, cl_role: 2, group_id: 0, instance: 3, job: 8, org_id: 9, process_port: 10, rs_nm: 4, rs_state: 5 }, + renameByName: { cl_name: 'Cluster', cl_role: 'Role', group_id: 'Group', instance: 'Node', rs_nm: 'Replica set' }, + } }, + { id: 'filterByValue', options: { filters: [{ config: { id: 'equal', options: { value: 'mongos' } }, fieldName: 'Role' }], match: 'all', type: 'include' } }, + ]) + + g.panel.table.standardOptions.color.withMode('thresholds') + + g.panel.table.standardOptions.withMappings([ + g.panel.table.standardOptions.mapping.ValueMap.withType() + + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ + '1': { index: 0, text: 'Primary' }, + '2': { index: 1, text: 'Secondary' }, + }), + ]) + + g.panel.table.standardOptions.withOverrides([ + g.panel.table.fieldOverride.byName.new('cl_role') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 150), + g.panel.table.fieldOverride.byName.new('rs_state') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 100), + g.panel.table.fieldOverride.byName.new('rs_nm') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 250), + g.panel.table.fieldOverride.byName.new('cl_name') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + g.panel.table.fieldOverride.byName.new('group_id') + + g.panel.table.fieldOverride.byName.withProperty('custom.width', 300), + ]), + + // + // Performance section panels + // + + hardwareIO: + g.panel.timeSeries.new('Hardware I/O') + + g.panel.timeSeries.panelOptions.withDescription("The number of read and write I/O's processed.") + + g.panel.timeSeries.queryOptions.withTargets([ + signals.hardware.diskReadCount.asTarget(), + signals.hardware.diskWriteCount.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('iops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + hardwareIOWaitTime: + g.panel.timeSeries.new('Hardware I/O wait time / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The amount of time spent waiting for I/O requests.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.hardware.diskReadTime.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.hardware.diskWriteTime.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + hardwareCPUInterruptServiceTime: + g.panel.timeSeries.new('Hardware CPU interrupt service time / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The amount of time spent servicing CPU interrupts.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.hardware.cpuIrqTime.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + memoryUsed: + g.panel.timeSeries.new('Memory used') + + g.panel.timeSeries.panelOptions.withDescription('The amount of RAM and virtual memory being used.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.memory.memoryResident.asTarget(), + signals.memory.memoryVirtual.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('mbytes') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + diskSpaceUsage: + g.panel.timeSeries.new('Disk space usage') + + g.panel.timeSeries.panelOptions.withDescription('The percentage of hardware space used.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.hardware.diskSpaceUtilization.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('percentunit') + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withMax(1) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + networkRequests: + g.panel.timeSeries.new('Network requests') + + g.panel.timeSeries.panelOptions.withDescription('The number of distinct requests that the server has received.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.network.networkRequests.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + networkThroughput: + g.panel.timeSeries.new('Network throughput') + + g.panel.timeSeries.panelOptions.withDescription('The number of bytes sent and received over network connections.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.network.networkBytesIn.asTarget(), + signals.network.networkBytesOut.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('Bps') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + slowRequests: + g.panel.timeSeries.new('Slow requests') + + g.panel.timeSeries.panelOptions.withDescription('The rate of DNS and SSL operations that took longer than 1 second.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.network.networkSlowDNS.asTarget(), + signals.network.networkSlowSSL.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + // + // Operations section panels + // + + connections: + g.panel.timeSeries.new('Connections') + + g.panel.timeSeries.panelOptions.withDescription('The rate of incoming connections to the cluster created.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.connections.connectionsCreated.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('conns/s') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + readwriteOperations: + g.panel.timeSeries.new('Read/Write operations') + + g.panel.timeSeries.panelOptions.withDescription('The number of read and write operations.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opLatenciesReadsOps.asTarget(), + signals.operations.opLatenciesWritesOps.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + operations: + g.panel.pieChart.new('Operations') + + g.panel.pieChart.panelOptions.withDescription('The number of insert, query, update, and delete operations.') + + g.panel.pieChart.queryOptions.withTargets([ + signals.operations.opCountersInsert.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.operations.opCountersQuery.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.operations.opCountersUpdate.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.operations.opCountersDelete.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.pieChart.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.pieChart.options.legend.withDisplayMode('table') + + g.panel.pieChart.options.legend.withPlacement('bottom') + + g.panel.pieChart.options.legend.withValues(['value']) + + g.panel.pieChart.options.tooltip.withMode('multi') + + g.panel.pieChart.options.tooltip.withSort('desc'), + + readwriteLatency: + g.panel.timeSeries.new('Read/Write latency / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The latency for read and write operations.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opLatenciesReadsLatency.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.operations.opLatenciesWritesLatency.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // + // Locks section panels + // + + currentQueue: + g.panel.timeSeries.new('Current queue') + + g.panel.timeSeries.panelOptions.withDescription('The number of reads and writes queued because of a lock.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.globalLockQueueReaders.asTarget(), + signals.locks.globalLockQueueWriters.asTarget(), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + activeClientOperations: + g.panel.timeSeries.new('Active client operations') + + g.panel.timeSeries.panelOptions.withDescription('The number of reads and writes being actively performed by connected clients.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.globalLockActiveReaders.asTarget(), + signals.locks.globalLockActiveWriters.asTarget(), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + databaseDeadlocks: + g.panel.timeSeries.new('Database deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of deadlocks for database level locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbDeadlockExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.locks.dbDeadlockIntentExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.locks.dbDeadlockShared.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.locks.dbDeadlockIntentShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + databaseWaitsAcquiringLock: + g.panel.timeSeries.new('Database waits acquiring lock / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database level locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbWaitCountExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.locks.dbWaitCountIntentExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.locks.dbWaitCountShared.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.locks.dbWaitCountIntentShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.legend.withPlacement('right') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // + // Elections section panels + // + + stepUpElectionsCalled: + g.panel.timeSeries.new('Step-up elections / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when the primary stepped down.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.elections.stepUpCmdCalled.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.elections.stepUpCmdSuccessful.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + priorityTakeoverCalled: + g.panel.timeSeries.new('Priority elections / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when it had a higher priority than the primary node.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.elections.priorityTakeoverCalled.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.elections.priorityTakeoverSuccessful.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + catchUpTakeoverCalled: + g.panel.timeSeries.new('Takeover elections / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when it was more current than the primary node.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.elections.catchUpTakeoverCalled.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.elections.catchUpTakeoverSuccessful.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + electionTimeoutCalled: + g.panel.timeSeries.new('Timeout elections / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of elections called and elections won by the node when the time it took to reach the primary node exceeded the election timeout limit.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.elections.electionTimeoutCalled.asTarget() + + g.query.prometheus.withInterval('1m'), + signals.elections.electionTimeoutSuccessful.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + catchUpsTotal: + g.panel.timeSeries.new('Catch-ups / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times the node had to catch up to the highest known oplog entry.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.elections.numCatchUps.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + catchUpsSkipped: + g.panel.timeSeries.new('Catch-ups skipped / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times the node skipped the catch up process when it was the newly elected primary.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.elections.numCatchUpsSkipped.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + catchUpsSucceeded: + g.panel.timeSeries.new('Catch-ups succeeded / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times the node succeeded in catching up when it was the newly elected primary.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.elections.numCatchUpsSucceeded.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + catchUpsFailed: + g.panel.timeSeries.new('Catch-ups failed / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times the node failed in catching up when it was the newly elected primary.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.elections.numCatchUpsFailedWithError.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + catchUpsTimedOut: + g.panel.timeSeries.new('Catch-up timeouts / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times the node timed out during the catch-up process when it was the newly elected primary.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.elections.numCatchUpsTimedOut.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + averageCatchUpOps: + g.panel.timeSeries.new('Average catch-up operations') + + g.panel.timeSeries.panelOptions.withDescription('The average number of operations done during the catch-up process when this node is the newly elected primary.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.elections.averageCatchUpOps.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + // + // Operations Overview dashboard panels + // + + // Section 1: Operation Counters (by type) - cluster-level aggregated + insertOperations: + g.panel.timeSeries.new('Insert operations') + + g.panel.timeSeries.panelOptions.withDescription('The number of insert operations.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opCountersInsert.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + queryOperations: + g.panel.timeSeries.new('Query operations') + + g.panel.timeSeries.panelOptions.withDescription('The number of query operations.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opCountersQuery.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + updateOperations: + g.panel.timeSeries.new('Update operations') + + g.panel.timeSeries.panelOptions.withDescription('The number of update operations.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opCountersUpdate.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + deleteOperations: + g.panel.timeSeries.new('Delete operations') + + g.panel.timeSeries.panelOptions.withDescription('The number of delete operations.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opCountersDelete.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('none') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + // Section 2: Operation Counters (by instance) + insertOperationsByInstance: + g.panel.timeSeries.new('Insert operations') + + g.panel.timeSeries.panelOptions.withDescription('The rate of insert operations the node has received.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opCountersInsertByInstance.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + queryOperationsByInstance: + g.panel.timeSeries.new('Query operations') + + g.panel.timeSeries.panelOptions.withDescription('The rate of query operations the node has received.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opCountersQueryByInstance.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + updateOperationsByInstance: + g.panel.timeSeries.new('Update operations') + + g.panel.timeSeries.panelOptions.withDescription('The rate of update operations this node has received.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opCountersUpdateByInstance.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + deleteOperationsByInstance: + g.panel.timeSeries.new('Delete operations') + + g.panel.timeSeries.panelOptions.withDescription('The rate of delete operations this node has received.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opCountersDeleteByInstance.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 3: Operation Latencies (cluster) + readOperationCount: + g.panel.timeSeries.new('Read operation count') + + g.panel.timeSeries.panelOptions.withDescription('The number of read operations.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opLatenciesReadsOps.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + writeOperationCount: + g.panel.timeSeries.new('Write operation count') + + g.panel.timeSeries.panelOptions.withDescription('The number of write operations.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opLatenciesWritesOps.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + readOperationLatency: + g.panel.timeSeries.new('Read operation latency / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The latency time for read operations performed by this node.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opLatenciesReadsLatency.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + writeOperationLatency: + g.panel.timeSeries.new('Write operation latency / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The latency time for write operations performed by this node.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opLatenciesWritesLatency.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + // Section 4: Operation Latencies (by instance) + readOperationCountByInstance: + g.panel.timeSeries.new('Read operation count') + + g.panel.timeSeries.panelOptions.withDescription('The number of read operations per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opLatenciesReadsOpsByInstance.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + writeOperationCountByInstance: + g.panel.timeSeries.new('Write operation count') + + g.panel.timeSeries.panelOptions.withDescription('The number of write operations per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opLatenciesWritesOpsByInstance.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('ops') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + readOperationLatencyByInstance: + g.panel.timeSeries.new('Read operation latency / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The latency time for read operations performed per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opLatenciesReadsLatencyByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + writeOperationLatencyByInstance: + g.panel.timeSeries.new('Write operation latency / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The latency time for write operations performed per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.opLatenciesWritesLatencyByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 5: Average Latencies (calculated) + avgReadLatency: + g.panel.timeSeries.new('Average read latency / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('Average latency per read operation.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.avgReadLatency.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + avgWriteLatency: + g.panel.timeSeries.new('Average write latency / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('Average latency per write operation.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.avgWriteLatency.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + avgReadLatencyByInstance: + g.panel.timeSeries.new('Average read latency / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('Average latency per read operation by instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.avgReadLatencyByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + avgWriteLatencyByInstance: + g.panel.timeSeries.new('Average write latency / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('Average latency per write operation by instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.operations.avgWriteLatencyByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + // + // Performance Overview dashboard panels + // + + // Section 1: Connection Metrics + currentConnections: + g.panel.timeSeries.new('Current connections') + + g.panel.timeSeries.panelOptions.withDescription('The current number of active connections.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.connections.connectionsCurrent.asTarget(), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + activeConnections: + g.panel.timeSeries.new('Active connections') + + g.panel.timeSeries.panelOptions.withDescription('The current number of connections with operations in progress.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.connections.connectionsActive.asTarget(), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + // Section 2: Database Lock Deadlocks (Cluster) + dbLockDeadlocksExclusive: + g.panel.timeSeries.new('Database exclusive lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of database exclusive lock deadlocks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbDeadlockExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + dbLockDeadlocksIntentExclusive: + g.panel.timeSeries.new('Database intent-exclusive lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-exclusive lock deadlocks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbDeadlockIntentExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + dbLockDeadlocksShared: + g.panel.timeSeries.new('Database shared lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of database shared lock deadlocks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbDeadlockShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + dbLockDeadlocksIntentShared: + g.panel.timeSeries.new('Database intent-shared lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-shared lock deadlocks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbDeadlockIntentShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + // Section 3: Database Lock Deadlocks (By Instance) + dbLockDeadlocksExclusiveByInstance: + g.panel.timeSeries.new('Database exclusive lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of database exclusive lock deadlocks per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbDeadlockExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockDeadlocksIntentExclusiveByInstance: + g.panel.timeSeries.new('Database intent-exclusive lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-exclusive lock deadlocks per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbDeadlockIntentExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockDeadlocksSharedByInstance: + g.panel.timeSeries.new('Database shared lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of database shared lock deadlocks per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbDeadlockSharedByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockDeadlocksIntentSharedByInstance: + g.panel.timeSeries.new('Database intent-shared lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of database intent-shared lock deadlocks per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbDeadlockIntentSharedByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 4: Database Lock Wait Counts (Cluster) + dbLockWaitCountExclusive: + g.panel.timeSeries.new('Database exclusive lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database exclusive locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbWaitCountExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + dbLockWaitCountIntentExclusive: + g.panel.timeSeries.new('Database intent-exclusive lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-exclusive locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbWaitCountIntentExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + dbLockWaitCountShared: + g.panel.timeSeries.new('Database shared lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database shared locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbWaitCountShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + dbLockWaitCountIntentShared: + g.panel.timeSeries.new('Database intent-shared lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-shared locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbWaitCountIntentShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10), + + // Section 5: Database Lock Wait Counts (By Instance) + dbLockWaitCountExclusiveByInstance: + g.panel.timeSeries.new('Database exclusive lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database exclusive locks per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbWaitCountExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockWaitCountIntentExclusiveByInstance: + g.panel.timeSeries.new('Database intent-exclusive lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-exclusive locks per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbWaitCountIntentExclusiveByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockWaitCountSharedByInstance: + g.panel.timeSeries.new('Database shared lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database shared locks per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbWaitCountSharedByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockWaitCountIntentSharedByInstance: + g.panel.timeSeries.new('Database intent-shared lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for database intent-shared locks per instance.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbWaitCountIntentSharedByInstance.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 6: Database Lock Acquisition Time + dbLockAcqTimeExclusive: + g.panel.timeSeries.new('Database exclusive lock acquisition time / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database exclusive locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbAcqTimeExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockAcqTimeIntentExclusive: + g.panel.timeSeries.new('Database intent-exclusive lock acquisition time / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database intent-exclusive locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbAcqTimeIntentExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockAcqTimeShared: + g.panel.timeSeries.new('Database shared lock acquisition time / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database shared locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbAcqTimeShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + dbLockAcqTimeIntentShared: + g.panel.timeSeries.new('Database intent-shared lock acquisition time / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring database intent-shared locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.dbAcqTimeIntentShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 7: Collection Lock Deadlocks + collLockDeadlocksExclusive: + g.panel.timeSeries.new('Collection exclusive lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of collection exclusive lock deadlocks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collDeadlockExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockDeadlocksIntentExclusive: + g.panel.timeSeries.new('Collection intent-exclusive lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of collection intent-exclusive lock deadlocks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collDeadlockIntentExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockDeadlocksShared: + g.panel.timeSeries.new('Collection shared lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of collection shared lock deadlocks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collDeadlockShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockDeadlocksIntentShared: + g.panel.timeSeries.new('Collection intent-shared lock deadlocks / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of collection intent-shared lock deadlocks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collDeadlockIntentShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 8: Collection Lock Wait Counts + collLockWaitCountExclusive: + g.panel.timeSeries.new('Collection exclusive lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection exclusive locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collWaitCountExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockWaitCountIntentExclusive: + g.panel.timeSeries.new('Collection intent-exclusive lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection intent-exclusive locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collWaitCountIntentExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockWaitCountShared: + g.panel.timeSeries.new('Collection shared lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection shared locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collWaitCountShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockWaitCountIntentShared: + g.panel.timeSeries.new('Collection intent-shared lock wait count / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The number of times lock acquisitions encounter waits for collection intent-shared locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collWaitCountIntentShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + // Section 9: Collection Lock Acquisition Time + collLockAcqTimeExclusive: + g.panel.timeSeries.new('Collection exclusive lock acquisition time / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection exclusive locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collAcqTimeExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockAcqTimeIntentExclusive: + g.panel.timeSeries.new('Collection intent-exclusive lock acquisition time / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection intent-exclusive locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collAcqTimeIntentExclusive.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockAcqTimeShared: + g.panel.timeSeries.new('Collection shared lock acquisition time / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection shared locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collAcqTimeShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + + collLockAcqTimeIntentShared: + g.panel.timeSeries.new('Collection intent-shared lock acquisition time / $__interval') + + g.panel.timeSeries.panelOptions.withDescription('The time spent acquiring collection intent-shared locks.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.locks.collAcqTimeIntentShared.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('µs') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), + }, +} diff --git a/mongodb-atlas-mixin/prometheus_rules_out/prometheus_alerts.yaml b/mongodb-atlas-mixin/prometheus_rules_out/prometheus_alerts.yaml index c71ae4633..5594c4fe5 100644 --- a/mongodb-atlas-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/mongodb-atlas-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -1,7 +1,7 @@ groups: - name: mongodb-atlas-alerts rules: - - alert: MongoDBAtlasHighNumberOfCollectionExclusiveDeadlocks + - alert: MongoDBAtlasCollExclusiveDeadlocks annotations: description: The number of collection exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of collection exclusive deadlocks occurring. @@ -10,7 +10,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfCollectionIntentExclusiveDeadlocks + - alert: MongoDBAtlasCollIntentExclDeadlocks annotations: description: The number of collection intent-exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of collection intent-exclusive deadlocks occurring. @@ -19,7 +19,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfCollectionSharedDeadlocks + - alert: MongoDBAtlasCollSharedDeadlocks annotations: description: The number of collection shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of collection shared deadlocks occurring. @@ -28,7 +28,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfCollectionIntentSharedDeadlocks + - alert: MongoDBAtlasCollIntentSharedDeadlocks annotations: description: The number of collection intent-shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of collection intent-shared deadlocks occurring. @@ -37,7 +37,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfDatabaseExclusiveDeadlocks + - alert: MongoDBAtlasDBExclusiveDeadlocks annotations: description: The number of database exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of database exclusive deadlocks occurring. @@ -46,7 +46,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfDatabaseIntentExclusiveDeadlocks + - alert: MongoDBAtlasDBIntentExclDeadlocks annotations: description: The number of database intent-exclusive-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of database intent-exclusive deadlocks occurring. @@ -55,7 +55,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfDatabaseSharedDeadlocks + - alert: MongoDBAtlasDBSharedDeadlocks annotations: description: The number of database shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of database shared deadlocks occurring. @@ -64,7 +64,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfDatabaseIntentSharedDeadlocks + - alert: MongoDBAtlasDBIntentSharedDeadlocks annotations: description: The number of database intent-shared-lock deadlocks occurring on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of database intent-shared deadlocks occurring. @@ -73,7 +73,7 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfSlowNetworkRequests + - alert: MongoDBAtlasSlowNetworkRequests annotations: description: The number of DNS and SSL operations taking more than 1 second to complete on node {{$labels.instance}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of slow network requests. @@ -100,9 +100,9 @@ groups: for: 5m labels: severity: warning - - alert: MongoDBAtlasHighNumberOfTimeoutElections + - alert: MongoDBAtlasElectionTimeouts annotations: - description: The number of elections being called due to the primary node timing out in replica set {{$labels.rs_m}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. + description: The number of elections being called due to the primary node timing out in replica set {{$labels.rs_nm}} in cluster {{$labels.cl_name}} is {{printf "%.0f" $value}} which is above the threshold of 10. summary: There is a high number of elections being called due to the primary node timing out. expr: | sum without (cl_role,process_port,instance,rs_state) (increase(mongodb_electionMetrics_electionTimeout_called[5m])) > 10 diff --git a/mongodb-atlas-mixin/rows.libsonnet b/mongodb-atlas-mixin/rows.libsonnet new file mode 100644 index 000000000..f272bb43d --- /dev/null +++ b/mongodb-atlas-mixin/rows.libsonnet @@ -0,0 +1,228 @@ +local g = import './g.libsonnet'; + +{ + new(this): { + local panels = this.grafana.panels, + + // + // Cluster Overview dashboard rows + // + + clusterOverviewHardwareRow: + g.panel.row.new('Hardware') + + g.panel.row.withPanels([ + panels.hardwareIO { gridPos: { h: 12, w: 12, x: 0, y: 0 } }, + panels.hardwareIOWaitTime { gridPos: { h: 12, w: 12, x: 12, y: 0 } }, + panels.hardwareCPUInterruptServiceTime { gridPos: { h: 8, w: 12, x: 0, y: 12 } }, + panels.memoryUsed { gridPos: { h: 8, w: 12, x: 12, y: 12 } }, + ]), + + clusterOverviewDiskRow: + g.panel.row.new('Disk') + + g.panel.row.withPanels([ + panels.diskSpaceUsage { gridPos: { h: 8, w: 24, x: 0, y: 0 } }, + ]), + + clusterOverviewNetworkRow: + g.panel.row.new('Network') + + g.panel.row.withPanels([ + panels.networkRequests { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.networkThroughput { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.slowRequests { gridPos: { h: 8, w: 24, x: 0, y: 8 } }, + ]), + + clusterOverviewConnectionsRow: + g.panel.row.new('Connections') + + g.panel.row.withPanels([ + panels.connections { gridPos: { h: 8, w: 24, x: 0, y: 0 } }, + ]), + + clusterOverviewOperationsRow: + g.panel.row.new('Operations') + + g.panel.row.withPanels([ + panels.readwriteOperations { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.operations { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.readwriteLatency { gridPos: { h: 8, w: 24, x: 0, y: 8 } }, + ]), + + clusterOverviewLocksRow: + g.panel.row.new('Locks') + + g.panel.row.withPanels([ + panels.currentQueue { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.activeClientOperations { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.databaseDeadlocks { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.databaseWaitsAcquiringLock { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + // + // Elections Overview dashboard rows + // + + electionsStepUpRow: + g.panel.row.new('Step-up elections') + + g.panel.row.withPanels([ + panels.stepUpElectionsCalled { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + ]), + + electionsPriorityTakeoverRow: + g.panel.row.new('Priority takeover elections') + + g.panel.row.withPanels([ + panels.priorityTakeoverCalled { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + ]), + + electionsCatchUpTakeoverRow: + g.panel.row.new('Catch-up takeover elections') + + g.panel.row.withPanels([ + panels.catchUpTakeoverCalled { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + ]), + + electionsTimeoutRow: + g.panel.row.new('Election timeout') + + g.panel.row.withPanels([ + panels.electionTimeoutCalled { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + ]), + + electionsCatchUpsRow: + g.panel.row.new('Catch-ups') + + g.panel.row.withPanels([ + panels.catchUpsTotal { gridPos: { h: 8, w: 8, x: 0, y: 0 } }, + panels.catchUpsSkipped { gridPos: { h: 8, w: 8, x: 8, y: 0 } }, + panels.catchUpsSucceeded { gridPos: { h: 8, w: 8, x: 16, y: 0 } }, + panels.catchUpsFailed { gridPos: { h: 8, w: 8, x: 0, y: 8 } }, + panels.catchUpsTimedOut { gridPos: { h: 8, w: 8, x: 8, y: 8 } }, + panels.averageCatchUpOps { gridPos: { h: 8, w: 8, x: 16, y: 8 } }, + ]), + + // + // Operations Overview dashboard rows + // + + operationsCountersClusterRow: + g.panel.row.new('Operation counters - cluster') + + g.panel.row.withPanels([ + panels.insertOperations { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.queryOperations { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.updateOperations { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.deleteOperations { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + operationsCountersInstanceRow: + g.panel.row.new('Operation counters - instance') + + g.panel.row.withPanels([ + panels.insertOperationsByInstance { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.queryOperationsByInstance { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.updateOperationsByInstance { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.deleteOperationsByInstance { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + operationsLatenciesClusterRow: + g.panel.row.new('Operation latencies - cluster') + + g.panel.row.withPanels([ + panels.readOperationCount { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.writeOperationCount { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.readOperationLatency { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.writeOperationLatency { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + operationsLatenciesInstanceRow: + g.panel.row.new('Operation latencies - instance') + + g.panel.row.withPanels([ + panels.readOperationCountByInstance { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.writeOperationCountByInstance { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.readOperationLatencyByInstance { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.writeOperationLatencyByInstance { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + operationsAvgLatenciesRow: + g.panel.row.new('Average latencies') + + g.panel.row.withPanels([ + panels.avgReadLatency { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.avgWriteLatency { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.avgReadLatencyByInstance { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.avgWriteLatencyByInstance { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + // + // Performance Overview dashboard rows + // + + performanceConnectionsRow: + g.panel.row.new('Connections') + + g.panel.row.withPanels([ + panels.currentConnections { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.activeConnections { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + ]), + + performanceDbLocksClusterRow: + g.panel.row.new('Database lock deadlocks - cluster') + + g.panel.row.withPanels([ + panels.dbLockDeadlocksExclusive { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.dbLockDeadlocksIntentExclusive { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.dbLockDeadlocksShared { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.dbLockDeadlocksIntentShared { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + performanceDbLocksInstanceRow: + g.panel.row.new('Database lock deadlocks - instance') + + g.panel.row.withPanels([ + panels.dbLockDeadlocksExclusiveByInstance { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.dbLockDeadlocksIntentExclusiveByInstance { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.dbLockDeadlocksSharedByInstance { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.dbLockDeadlocksIntentSharedByInstance { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + performanceDbWaitCountsClusterRow: + g.panel.row.new('Database lock wait counts - cluster') + + g.panel.row.withPanels([ + panels.dbLockWaitCountExclusive { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.dbLockWaitCountIntentExclusive { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.dbLockWaitCountShared { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.dbLockWaitCountIntentShared { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + performanceDbWaitCountsInstanceRow: + g.panel.row.new('Database lock wait counts - instance') + + g.panel.row.withPanels([ + panels.dbLockWaitCountExclusiveByInstance { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.dbLockWaitCountIntentExclusiveByInstance { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.dbLockWaitCountSharedByInstance { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.dbLockWaitCountIntentSharedByInstance { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + performanceDbAcqTimeRow: + g.panel.row.new('Database lock acquisition time') + + g.panel.row.withPanels([ + panels.dbLockAcqTimeExclusive { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.dbLockAcqTimeIntentExclusive { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.dbLockAcqTimeShared { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.dbLockAcqTimeIntentShared { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + performanceCollLocksRow: + g.panel.row.new('Collection lock deadlocks') + + g.panel.row.withPanels([ + panels.collLockDeadlocksExclusive { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.collLockDeadlocksIntentExclusive { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.collLockDeadlocksShared { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.collLockDeadlocksIntentShared { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + performanceCollWaitCountsRow: + g.panel.row.new('Collection lock wait counts') + + g.panel.row.withPanels([ + panels.collLockWaitCountExclusive { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.collLockWaitCountIntentExclusive { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.collLockWaitCountShared { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.collLockWaitCountIntentShared { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + + performanceCollAcqTimeRow: + g.panel.row.new('Collection lock acquisition time') + + g.panel.row.withPanels([ + panels.collLockAcqTimeExclusive { gridPos: { h: 8, w: 12, x: 0, y: 0 } }, + panels.collLockAcqTimeIntentExclusive { gridPos: { h: 8, w: 12, x: 12, y: 0 } }, + panels.collLockAcqTimeShared { gridPos: { h: 8, w: 12, x: 0, y: 8 } }, + panels.collLockAcqTimeIntentShared { gridPos: { h: 8, w: 12, x: 12, y: 8 } }, + ]), + }, +} diff --git a/mongodb-atlas-mixin/signals/connections.libsonnet b/mongodb-atlas-mixin/signals/connections.libsonnet new file mode 100644 index 000000000..18df0f9ff --- /dev/null +++ b/mongodb-atlas-mixin/signals/connections.libsonnet @@ -0,0 +1,51 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + + connectionsCreated: { + name: 'Connections created', + type: 'counter', + description: 'Total connections created.', + unit: 'conns', + sources: { + prometheus: { + expr: 'mongodb_connections_totalCreated{%(queriesSelector)s}', + legendCustomTemplate: '{{cl_name}}', + }, + }, + }, + + connectionsCurrent: { + name: 'Current connections', + type: 'gauge', + aggLevel: 'none', + description: 'Current number of active connections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_connections_current{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + connectionsActive: { + name: 'Active connections', + type: 'gauge', + aggLevel: 'none', + description: 'Current number of connections with operations in progress.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_connections_active{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/elections.libsonnet b/mongodb-atlas-mixin/signals/elections.libsonnet new file mode 100644 index 000000000..344877614 --- /dev/null +++ b/mongodb-atlas-mixin/signals/elections.libsonnet @@ -0,0 +1,206 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'none', + aggFunction: 'sum', + signals: { + + stepUpCmdCalled: { + name: 'Step-up elections called', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of step-up elections called.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_stepUpCmd_called{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - called', + }, + }, + }, + + stepUpCmdSuccessful: { + name: 'Step-up elections successful', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of successful step-up elections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_stepUpCmd_successful{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - successful', + }, + }, + }, + + priorityTakeoverCalled: { + name: 'Priority takeover elections called', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of priority takeover elections called.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_priorityTakeover_called{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - called', + }, + }, + }, + + priorityTakeoverSuccessful: { + name: 'Priority takeover elections successful', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of successful priority takeover elections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_priorityTakeover_successful{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - successful', + }, + }, + }, + + catchUpTakeoverCalled: { + name: 'Catch-up takeover elections called', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of catch-up takeover elections called.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_catchUpTakeover_called{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - called', + }, + }, + }, + + catchUpTakeoverSuccessful: { + name: 'Catch-up takeover elections successful', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of successful catch-up takeover elections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_catchUpTakeover_successful{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - successful', + }, + }, + }, + + electionTimeoutCalled: { + name: 'Election timeout elections called', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of election timeout elections called.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_electionTimeout_called{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - called', + }, + }, + }, + + electionTimeoutSuccessful: { + name: 'Election timeout elections successful', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of successful election timeout elections.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_electionTimeout_successful{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - successful', + }, + }, + }, + + numCatchUps: { + name: 'Number of catch-ups', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of catch-up operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_numCatchUps{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + numCatchUpsSkipped: { + name: 'Number of catch-ups skipped', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of catch-ups skipped.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_numCatchUpsSkipped{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + numCatchUpsSucceeded: { + name: 'Number of catch-ups succeeded', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of catch-ups succeeded.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_numCatchUpsSucceeded{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + numCatchUpsFailedWithError: { + name: 'Number of catch-ups failed with error', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of catch-ups failed with error.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_numCatchUpsFailedWithError{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + numCatchUpsTimedOut: { + name: 'Number of catch-up timeouts', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of catch-up timeouts.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_numCatchUpsTimedOut{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + averageCatchUpOps: { + name: 'Average catch-up operations', + type: 'gauge', + aggLevel: 'none', + description: 'Average catch-up operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_electionMetrics_averageCatchUpOps{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/hardware.libsonnet b/mongodb-atlas-mixin/signals/hardware.libsonnet new file mode 100644 index 000000000..184a2cf4d --- /dev/null +++ b/mongodb-atlas-mixin/signals/hardware.libsonnet @@ -0,0 +1,121 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + + diskReadCount: { + name: 'Disk read operations', + type: 'counter', + description: 'Number of disk read operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_read_count{%(queriesSelector)s}', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + diskWriteCount: { + name: 'Disk write operations', + type: 'counter', + description: 'Number of disk write operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_write_count{%(queriesSelector)s}', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + diskReadTime: { + name: 'Disk read I/O time', + type: 'counter', + rangeFunction: 'increase', + description: 'Time spent on read I/O operations.', + unit: 'ms', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_read_time_milliseconds{%(queriesSelector)s}', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + diskWriteTime: { + name: 'Disk write I/O time', + type: 'counter', + rangeFunction: 'increase', + description: 'Time spent on write I/O operations.', + unit: 'ms', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_write_time_milliseconds{%(queriesSelector)s}', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + cpuIrqTime: { + name: 'CPU interrupt service time', + type: 'counter', + rangeFunction: 'increase', + description: 'CPU time spent servicing interrupts.', + unit: 'ms', + sources: { + prometheus: { + expr: 'hardware_system_cpu_irq_milliseconds{%(queriesSelector)s}', + legendCustomTemplate: '{{cl_name}}', + }, + }, + }, + + diskSpaceUsed: { + name: 'Disk space used', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + description: 'Disk space used.', + unit: 'bytes', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - used', + }, + }, + }, + + diskSpaceFree: { + name: 'Disk space free', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + description: 'Disk space free.', + unit: 'bytes', + sources: { + prometheus: { + expr: 'hardware_disk_metrics_disk_space_free_bytes{%(queriesSelector)s}', + legendCustomTemplate: '{{instance}} - free', + }, + }, + }, + + diskSpaceUtilization: { + name: 'Disk space utilization', + type: 'raw', + description: 'Percentage of disk space used.', + unit: 'percent', + sources: { + prometheus: { + expr: '100 * ((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s})) / clamp_min((sum without (disk_name) (hardware_disk_metrics_disk_space_used_bytes{%(queriesSelector)s})) + (sum without (disk_name) (hardware_disk_metrics_disk_space_free_bytes{%(queriesSelector)s})), 1))', + legendCustomTemplate: '{{cl_name}}', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/locks.libsonnet b/mongodb-atlas-mixin/signals/locks.libsonnet new file mode 100644 index 000000000..2294bc192 --- /dev/null +++ b/mongodb-atlas-mixin/signals/locks.libsonnet @@ -0,0 +1,542 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + + // Global lock queues + globalLockQueueReaders: { + name: 'Global lock queue - readers', + type: 'gauge', + description: 'Number of read operations queued due to locks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_globalLock_currentQueue_readers{%(queriesSelector)s}', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + globalLockQueueWriters: { + name: 'Global lock queue - writers', + type: 'gauge', + description: 'Number of write operations queued due to locks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_globalLock_currentQueue_writers{%(queriesSelector)s}', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + // Global lock active clients + globalLockActiveReaders: { + name: 'Global lock active clients - readers', + type: 'gauge', + description: 'Number of active read operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_globalLock_activeClients_readers{%(queriesSelector)s}', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + globalLockActiveWriters: { + name: 'Global lock active clients - writers', + type: 'gauge', + description: 'Number of active write operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_globalLock_activeClients_writers{%(queriesSelector)s}', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + // Database lock deadlocks + dbDeadlockExclusive: { + name: 'Database exclusive lock deadlocks', + type: 'counter', + rangeFunction: 'increase', + description: 'Database exclusive lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_W{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - exclusive', + }, + }, + }, + + dbDeadlockIntentExclusive: { + name: 'Database intent exclusive lock deadlocks', + type: 'counter', + rangeFunction: 'increase', + description: 'Database intent exclusive lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_w{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - intent exclusive', + }, + }, + }, + + dbDeadlockShared: { + name: 'Database shared lock deadlocks', + type: 'counter', + rangeFunction: 'increase', + description: 'Database shared lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_R{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - shared', + }, + }, + }, + + dbDeadlockIntentShared: { + name: 'Database intent shared lock deadlocks', + type: 'counter', + rangeFunction: 'increase', + description: 'Database intent shared lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_r{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - intent shared', + }, + }, + }, + + dbDeadlockExclusiveByInstance: { + name: 'Database exclusive lock deadlocks by instance', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database exclusive lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_W{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + dbDeadlockIntentExclusiveByInstance: { + name: 'Database intent exclusive lock deadlocks by instance', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database intent exclusive lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_w{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + dbDeadlockSharedByInstance: { + name: 'Database shared lock deadlocks by instance', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database shared lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_R{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + dbDeadlockIntentSharedByInstance: { + name: 'Database intent shared lock deadlocks by instance', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database intent shared lock deadlocks per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_deadlockCount_r{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Database lock wait counts + dbWaitCountExclusive: { + name: 'Database exclusive lock wait count', + type: 'counter', + rangeFunction: 'increase', + description: 'Database exclusive lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_W{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - exclusive', + }, + }, + }, + + dbWaitCountIntentExclusive: { + name: 'Database intent exclusive lock wait count', + type: 'counter', + rangeFunction: 'increase', + description: 'Database intent exclusive lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_w{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - intent exclusive', + }, + }, + }, + + dbWaitCountShared: { + name: 'Database shared lock wait count', + type: 'counter', + rangeFunction: 'increase', + description: 'Database shared lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_R{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - shared', + }, + }, + }, + + dbWaitCountIntentShared: { + name: 'Database intent shared lock wait count', + type: 'counter', + rangeFunction: 'increase', + description: 'Database intent shared lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_r{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - intent shared', + }, + }, + }, + + dbWaitCountExclusiveByInstance: { + name: 'Database exclusive lock wait count by instance', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database exclusive lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_W{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + dbWaitCountIntentExclusiveByInstance: { + name: 'Database intent exclusive lock wait count by instance', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database intent exclusive lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_w{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + dbWaitCountSharedByInstance: { + name: 'Database shared lock wait count by instance', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database shared lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_R{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + dbWaitCountIntentSharedByInstance: { + name: 'Database intent shared lock wait count by instance', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database intent shared lock wait count per instance.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_acquireWaitCount_r{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Database lock acquisition time + dbAcqTimeExclusive: { + name: 'Database exclusive lock acquisition time', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database exclusive lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_W{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + dbAcqTimeIntentExclusive: { + name: 'Database intent exclusive lock acquisition time', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database intent exclusive lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_w{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + dbAcqTimeShared: { + name: 'Database shared lock acquisition time', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database shared lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_R{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + dbAcqTimeIntentShared: { + name: 'Database intent shared lock acquisition time', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Database intent shared lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Database_timeAcquiringMicros_r{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Collection lock deadlocks + collDeadlockExclusive: { + name: 'Collection exclusive lock deadlocks', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection exclusive lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_W{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + collDeadlockIntentExclusive: { + name: 'Collection intent exclusive lock deadlocks', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection intent exclusive lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_w{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + collDeadlockShared: { + name: 'Collection shared lock deadlocks', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection shared lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_R{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + collDeadlockIntentShared: { + name: 'Collection intent shared lock deadlocks', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection intent shared lock deadlocks.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_deadlockCount_r{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Collection lock wait counts + collWaitCountExclusive: { + name: 'Collection exclusive lock wait count', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection exclusive lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_W{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + collWaitCountIntentExclusive: { + name: 'Collection intent exclusive lock wait count', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection intent exclusive lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_w{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + collWaitCountShared: { + name: 'Collection shared lock wait count', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection shared lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_R{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + collWaitCountIntentShared: { + name: 'Collection intent shared lock wait count', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection intent shared lock wait count.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_acquireWaitCount_r{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + + // Collection lock acquisition time + collAcqTimeExclusive: { + name: 'Collection exclusive lock acquisition time', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection exclusive lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_W{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - exclusive', + }, + }, + }, + + collAcqTimeIntentExclusive: { + name: 'Collection intent exclusive lock acquisition time', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection intent exclusive lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_w{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent exclusive', + }, + }, + }, + + collAcqTimeShared: { + name: 'Collection shared lock acquisition time', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection shared lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_R{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - shared', + }, + }, + }, + + collAcqTimeIntentShared: { + name: 'Collection intent shared lock acquisition time', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Collection intent shared lock acquisition time.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_locks_Collection_timeAcquiringMicros_r{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - intent shared', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/memory.libsonnet b/mongodb-atlas-mixin/signals/memory.libsonnet new file mode 100644 index 000000000..48f471faf --- /dev/null +++ b/mongodb-atlas-mixin/signals/memory.libsonnet @@ -0,0 +1,64 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + + memoryResident: { + name: 'Memory resident (RAM)', + type: 'gauge', + description: 'Resident memory (RAM) usage.', + unit: 'mbytes', + sources: { + prometheus: { + expr: 'mongodb_mem_resident{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - RAM', + }, + }, + }, + + memoryVirtual: { + name: 'Memory virtual', + type: 'gauge', + description: 'Virtual memory usage.', + unit: 'mbytes', + sources: { + prometheus: { + expr: 'mongodb_mem_virtual{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - virtual', + }, + }, + }, + + memoryResidentByInstance: { + name: 'Memory resident by instance', + type: 'gauge', + aggLevel: 'none', + description: 'Resident memory (RAM) usage per instance.', + unit: 'mbytes', + sources: { + prometheus: { + expr: 'mongodb_mem_resident{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - RAM', + }, + }, + }, + + memoryVirtualByInstance: { + name: 'Memory virtual by instance', + type: 'gauge', + aggLevel: 'none', + description: 'Virtual memory usage per instance.', + unit: 'mbytes', + sources: { + prometheus: { + expr: 'mongodb_mem_virtual{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - virtual', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/network.libsonnet b/mongodb-atlas-mixin/signals/network.libsonnet new file mode 100644 index 000000000..2b88d385f --- /dev/null +++ b/mongodb-atlas-mixin/signals/network.libsonnet @@ -0,0 +1,145 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + + networkBytesIn: { + name: 'Network bytes received', + type: 'counter', + description: 'Network bytes received.', + unit: 'Bps', + sources: { + prometheus: { + expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - received', + }, + }, + }, + + networkBytesOut: { + name: 'Network bytes sent', + type: 'counter', + description: 'Network bytes sent.', + unit: 'Bps', + sources: { + prometheus: { + expr: 'mongodb_network_bytesOut{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - sent', + }, + }, + }, + + networkBytesInByInstance: { + name: 'Network bytes received by instance', + type: 'counter', + aggLevel: 'none', + description: 'Network bytes received per instance.', + unit: 'Bps', + sources: { + prometheus: { + expr: 'mongodb_network_bytesIn{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - received', + }, + }, + }, + + networkBytesOutByInstance: { + name: 'Network bytes sent by instance', + type: 'counter', + aggLevel: 'none', + description: 'Network bytes sent per instance.', + unit: 'Bps', + sources: { + prometheus: { + expr: 'mongodb_network_bytesOut{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - sent', + }, + }, + }, + + networkRequests: { + name: 'Network requests', + type: 'counter', + description: 'Number of network requests received.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'mongodb_network_numRequests{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}}', + }, + }, + }, + + networkRequestsByInstance: { + name: 'Network requests by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of network requests received per instance.', + unit: 'reqps', + sources: { + prometheus: { + expr: 'mongodb_network_numRequests{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + networkSlowDNS: { + name: 'Slow DNS operations', + type: 'counter', + description: 'Number of slow DNS operations (>1s).', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_network_numSlowDNSOperations{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - DNS', + }, + }, + }, + + networkSlowSSL: { + name: 'Slow SSL operations', + type: 'counter', + description: 'Number of slow SSL operations (>1s).', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_network_numSlowSSLOperations{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - SSL', + }, + }, + }, + + networkSlowDNSByInstance: { + name: 'Slow DNS operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of slow DNS operations (>1s) per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_network_numSlowDNSOperations{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - DNS', + }, + }, + }, + + networkSlowSSLByInstance: { + name: 'Slow SSL operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of slow SSL operations (>1s) per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_network_numSlowSSLOperations{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - SSL', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/operations.libsonnet b/mongodb-atlas-mixin/signals/operations.libsonnet new file mode 100644 index 000000000..509f3b8f5 --- /dev/null +++ b/mongodb-atlas-mixin/signals/operations.libsonnet @@ -0,0 +1,289 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'group', + aggFunction: 'sum', + signals: { + + // Operation counters + opCountersInsert: { + name: 'Insert operations', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of insert operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_opcounters_insert{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - insert', + }, + }, + }, + + opCountersQuery: { + name: 'Query operations', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of query operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_opcounters_query{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - query', + }, + }, + }, + + opCountersUpdate: { + name: 'Update operations', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of update operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_opcounters_update{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - update', + }, + }, + }, + + opCountersDelete: { + name: 'Delete operations', + type: 'counter', + rangeFunction: 'increase', + description: 'Number of delete operations.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_opcounters_delete{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - delete', + }, + }, + }, + + opCountersInsertByInstance: { + name: 'Insert operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of insert operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opcounters_insert{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + opCountersQueryByInstance: { + name: 'Query operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of query operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opcounters_query{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + opCountersUpdateByInstance: { + name: 'Update operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of update operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opcounters_update{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + opCountersDeleteByInstance: { + name: 'Delete operations by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of delete operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opcounters_delete{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // Operation latencies + opLatenciesReadsOps: { + name: 'Read operation count', + type: 'counter', + description: 'Number of read operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + opLatenciesWritesOps: { + name: 'Write operation count', + type: 'counter', + description: 'Number of write operations.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + opLatenciesReadsOpsByInstance: { + name: 'Read operation count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of read operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - reads', + }, + }, + }, + + opLatenciesWritesOpsByInstance: { + name: 'Write operation count by instance', + type: 'counter', + aggLevel: 'none', + description: 'Number of write operations per instance.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - writes', + }, + }, + }, + + opLatenciesReadsLatency: { + name: 'Read operation latency', + type: 'counter', + rangeFunction: 'increase', + description: 'Total read operation latency.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + opLatenciesWritesLatency: { + name: 'Write operation latency', + type: 'counter', + rangeFunction: 'increase', + description: 'Total write operation latency.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + opLatenciesReadsLatencyByInstance: { + name: 'Read operation latency by instance', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Total read operation latency per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - reads', + }, + }, + }, + + opLatenciesWritesLatencyByInstance: { + name: 'Write operation latency by instance', + type: 'counter', + rangeFunction: 'increase', + aggLevel: 'none', + description: 'Total write operation latency per instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - writes', + }, + }, + }, + + // Average latency calculations (raw type for complex expressions) + avgReadLatency: { + name: 'Average read latency', + type: 'raw', + description: 'Average latency per read operation.', + unit: 'µs', + sources: { + prometheus: { + expr: 'sum (increase(mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs"}[$__interval:])) by (job, cl_name) / clamp_min(sum (increase(mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs"}[$__interval:])) by (job, cl_name), 1)', + legendCustomTemplate: '{{cl_name}} - reads', + }, + }, + }, + + avgWriteLatency: { + name: 'Average write latency', + type: 'raw', + description: 'Average latency per write operation.', + unit: 'µs', + sources: { + prometheus: { + expr: 'sum (increase(mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs"}[$__interval:])) by (job, cl_name) / clamp_min(sum (increase(mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs"}[$__interval:])) by (job, cl_name), 1)', + legendCustomTemplate: '{{cl_name}} - writes', + }, + }, + }, + + avgReadLatencyByInstance: { + name: 'Average read latency by instance', + type: 'raw', + description: 'Average latency per read operation by instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'increase(mongodb_opLatencies_reads_latency{%(queriesSelector)s, rs_nm=~"$rs"}[$__interval:]) / clamp_min(increase(mongodb_opLatencies_reads_ops{%(queriesSelector)s, rs_nm=~"$rs"}[$__interval:]), 1)', + legendCustomTemplate: '{{instance}} - reads', + }, + }, + }, + + avgWriteLatencyByInstance: { + name: 'Average write latency by instance', + type: 'raw', + description: 'Average latency per write operation by instance.', + unit: 'µs', + sources: { + prometheus: { + expr: 'increase(mongodb_opLatencies_writes_latency{%(queriesSelector)s, rs_nm=~"$rs"}[$__interval:]) / clamp_min(increase(mongodb_opLatencies_writes_ops{%(queriesSelector)s, rs_nm=~"$rs"}[$__interval:]), 1)', + legendCustomTemplate: '{{instance}} - writes', + }, + }, + }, + }, + } diff --git a/mongodb-atlas-mixin/signals/sharding.libsonnet b/mongodb-atlas-mixin/signals/sharding.libsonnet new file mode 100644 index 000000000..30c2be469 --- /dev/null +++ b/mongodb-atlas-mixin/signals/sharding.libsonnet @@ -0,0 +1,457 @@ +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + aggLevel: 'none', + aggFunction: 'sum', + signals: { + + // General sharding statistics + staleConfigErrors: { + name: 'Stale config errors', + type: 'counter', + rangeFunction: 'increase', + description: 'Stale config errors triggering metadata refresh.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_countStaleConfigErrors{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + moveChunksStarted: { + name: 'Chunk migrations started as recipient', + type: 'counter', + rangeFunction: 'increase', + description: 'Chunk migrations started as recipient.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_countRecipientMoveChunkStarted{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + docsClonedDonor: { + name: 'Documents cloned on donor', + type: 'counter', + rangeFunction: 'increase', + description: 'Documents cloned when acting as donor.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_countDocsClonedOnDonor{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - donor', + }, + }, + }, + + docsClonedRecipient: { + name: 'Documents cloned on recipient', + type: 'counter', + rangeFunction: 'increase', + description: 'Documents cloned when acting as recipient.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_countDocsClonedOnRecipient{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - recipient', + }, + }, + }, + + criticalSectionTime: { + name: 'Critical section time', + type: 'counter', + rangeFunction: 'increase', + description: 'Time in critical section during chunk migration.', + unit: 'ms', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_totalCriticalSectionTimeMillis{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // Catalog cache refreshes + catalogCacheIncrementalRefreshes: { + name: 'Incremental catalog cache refreshes', + type: 'counter', + rangeFunction: 'increase', + description: 'Incremental catalog cache refreshes started.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_countIncrementalRefreshesStarted{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - incremental', + }, + }, + }, + + catalogCacheFullRefreshes: { + name: 'Full catalog cache refreshes', + type: 'counter', + rangeFunction: 'increase', + description: 'Full catalog cache refreshes started.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_countFullRefreshesStarted{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - full', + }, + }, + }, + + catalogCacheFailedRefreshes: { + name: 'Failed catalog cache refreshes', + type: 'counter', + rangeFunction: 'increase', + description: 'Failed catalog cache refreshes.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_countFailedRefreshes{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + catalogCacheStaleConfigErrors: { + name: 'Catalog cache stale config errors', + type: 'counter', + rangeFunction: 'increase', + description: 'Stale config errors in catalog cache.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_countStaleConfigErrors{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + catalogCacheDatabaseEntries: { + name: 'Database entries in catalog cache', + type: 'counter', + rangeFunction: 'increase', + description: 'Database entries in catalog cache.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_numDatabaseEntries{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - database', + }, + }, + }, + + catalogCacheCollectionEntries: { + name: 'Collection entries in catalog cache', + type: 'counter', + rangeFunction: 'increase', + description: 'Collection entries in catalog cache.', + unit: 'none', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_numCollectionEntries{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - collection', + }, + }, + }, + + catalogCacheRefreshWaitTime: { + name: 'Catalog cache refresh wait time', + type: 'counter', + rangeFunction: 'increase', + description: 'Total time waiting for catalog cache refresh.', + unit: 'µs', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_totalRefreshWaitTimeMicros{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + catalogCacheOpsBlocked: { + name: 'Operations blocked by catalog cache refresh', + type: 'counter', + description: 'Operations blocked by catalog cache refresh.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_catalogCache_operationsBlockedByRefresh_countAllOperations{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}}', + }, + }, + }, + + // Targeting - allShards + targetingFindAllShards: { + name: 'Find operations targeting all shards', + type: 'counter', + description: 'Find operations targeting all shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_find_allShards{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - find', + }, + }, + }, + + targetingInsertAllShards: { + name: 'Insert operations targeting all shards', + type: 'counter', + description: 'Insert operations targeting all shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_insert_allShards{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - insert', + }, + }, + }, + + targetingUpdateAllShards: { + name: 'Update operations targeting all shards', + type: 'counter', + description: 'Update operations targeting all shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_update_allShards{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - update', + }, + }, + }, + + targetingDeleteAllShards: { + name: 'Delete operations targeting all shards', + type: 'counter', + description: 'Delete operations targeting all shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_delete_allShards{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - delete', + }, + }, + }, + + targetingAggregateAllShards: { + name: 'Aggregate operations targeting all shards', + type: 'counter', + description: 'Aggregate operations targeting all shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_aggregate_allShards{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - aggregate', + }, + }, + }, + + // Targeting - manyShards + targetingFindManyShards: { + name: 'Find operations targeting many shards', + type: 'counter', + description: 'Find operations targeting many shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_find_manyShards{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - find', + }, + }, + }, + + targetingInsertManyShards: { + name: 'Insert operations targeting many shards', + type: 'counter', + description: 'Insert operations targeting many shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_insert_manyShards{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - insert', + }, + }, + }, + + targetingUpdateManyShards: { + name: 'Update operations targeting many shards', + type: 'counter', + description: 'Update operations targeting many shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_update_manyShards{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - update', + }, + }, + }, + + targetingDeleteManyShards: { + name: 'Delete operations targeting many shards', + type: 'counter', + description: 'Delete operations targeting many shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_delete_manyShards{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - delete', + }, + }, + }, + + targetingAggregateManyShards: { + name: 'Aggregate operations targeting many shards', + type: 'counter', + description: 'Aggregate operations targeting many shards.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_aggregate_manyShards{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - aggregate', + }, + }, + }, + + // Targeting - oneShard + targetingFindOneShard: { + name: 'Find operations targeting one shard', + type: 'counter', + description: 'Find operations targeting one shard.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_find_oneShard{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - find', + }, + }, + }, + + targetingInsertOneShard: { + name: 'Insert operations targeting one shard', + type: 'counter', + description: 'Insert operations targeting one shard.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_insert_oneShard{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - insert', + }, + }, + }, + + targetingUpdateOneShard: { + name: 'Update operations targeting one shard', + type: 'counter', + description: 'Update operations targeting one shard.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_update_oneShard{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - update', + }, + }, + }, + + targetingDeleteOneShard: { + name: 'Delete operations targeting one shard', + type: 'counter', + description: 'Delete operations targeting one shard.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_delete_oneShard{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - delete', + }, + }, + }, + + targetingAggregateOneShard: { + name: 'Aggregate operations targeting one shard', + type: 'counter', + description: 'Aggregate operations targeting one shard.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_aggregate_oneShard{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - aggregate', + }, + }, + }, + + // Targeting - unsharded + targetingFindUnsharded: { + name: 'Find operations on unsharded collections', + type: 'counter', + description: 'Find operations on unsharded collections.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_find_unsharded{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - find', + }, + }, + }, + + targetingInsertUnsharded: { + name: 'Insert operations on unsharded collections', + type: 'counter', + description: 'Insert operations on unsharded collections.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_insert_unsharded{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - insert', + }, + }, + }, + + targetingUpdateUnsharded: { + name: 'Update operations on unsharded collections', + type: 'counter', + description: 'Update operations on unsharded collections.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_update_unsharded{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - update', + }, + }, + }, + + targetingDeleteUnsharded: { + name: 'Delete operations on unsharded collections', + type: 'counter', + description: 'Delete operations on unsharded collections.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_delete_unsharded{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - delete', + }, + }, + }, + + targetingAggregateUnsharded: { + name: 'Aggregate operations on unsharded collections', + type: 'counter', + description: 'Aggregate operations on unsharded collections.', + unit: 'ops', + sources: { + prometheus: { + expr: 'mongodb_shardingStatistics_numHostsTargeted_aggregate_unsharded{%(queriesSelector)s, rs_nm=~"$rs"}', + legendCustomTemplate: '{{instance}} - aggregate', + }, + }, + }, + }, + }