diff --git a/.gitignore b/.gitignore index d68c86c04..1a57d51f0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ vendor jsonnetfile.lock.json *.zip +.worktrees diff --git a/opensearch-mixin/alerts/alerts.libsonnet b/opensearch-mixin/alerts.libsonnet similarity index 91% rename from opensearch-mixin/alerts/alerts.libsonnet rename to opensearch-mixin/alerts.libsonnet index 91f0d5bb0..27ae4dc18 100644 --- a/opensearch-mixin/alerts/alerts.libsonnet +++ b/opensearch-mixin/alerts.libsonnet @@ -1,14 +1,14 @@ { - prometheusAlerts+:: { + new(this): { groups+: [ { - name: $._config.uid + '-alerts', + name: this.config.uid + '-alerts', rules: [ { alert: 'OpenSearchYellowCluster', expr: ||| opensearch_cluster_status{%(filteringSelector)s} == 1 - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -18,14 +18,14 @@ description: ( '{{$labels.cluster}} health status is yellow over the last 5 minutes' - ) % $._config, + ) % this.config, }, }, { alert: 'OpenSearchRedCluster', expr: ||| opensearch_cluster_status{%(filteringSelector)s} == 2 - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -35,14 +35,14 @@ description: ( '{{$labels.cluster}} health status is red over the last 5 minutes' - ) % $._config, + ) % this.config, }, }, { alert: 'OpenSearchUnstableShardReallocation', expr: ||| sum without(type) (opensearch_cluster_shards_number{%(filteringSelector)s, type="relocating"}) > %(alertsWarningShardReallocations)s - ||| % $._config, + ||| % this.config, 'for': '1m', labels: { severity: 'warning', @@ -51,14 +51,14 @@ summary: 'A node has gone offline or has been disconnected triggering shard reallocation.', description: ||| {{$labels.cluster}} has had {{ printf "%%.0f" $value }} shard reallocation over the last 1m which is above the threshold of %(alertsWarningShardReallocations)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchUnstableShardUnassigned', expr: ||| sum without(type) (opensearch_cluster_shards_number{%(filteringSelector)s, type="unassigned"}) > %(alertsWarningShardUnassigned)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -67,14 +67,14 @@ summary: 'There are shards that have been detected as unassigned.', description: ||| {{$labels.cluster}} has had {{ printf "%%.0f" $value }} shard unassigned over the last 5m which is above the threshold of %(alertsWarningShardUnassigned)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeDiskUsage', expr: ||| 100 * sum without(nodeid, path, mount, type) ((opensearch_fs_path_total_bytes{%(filteringSelector)s} - opensearch_fs_path_free_bytes{%(filteringSelector)s}) / opensearch_fs_path_total_bytes{%(filteringSelector)s}) > %(alertsWarningDiskUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -83,14 +83,14 @@ summary: 'The node disk usage has exceeded the warning threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }} disk usage over the last 5m which is above the threshold of %(alertsWarningDiskUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeDiskUsage', expr: ||| 100 * sum without(nodeid, path, mount, type) ((opensearch_fs_path_total_bytes{%(filteringSelector)s} - opensearch_fs_path_free_bytes{%(filteringSelector)s}) / opensearch_fs_path_total_bytes{%(filteringSelector)s}) > %(alertsCriticalDiskUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -99,14 +99,14 @@ summary: 'The node disk usage has exceeded the critical threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }}%% disk usage over the last 5m which is above the threshold of %(alertsCriticalDiskUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeCpuUsage', expr: ||| sum without(nodeid) (opensearch_os_cpu_percent{%(filteringSelector)s}) > %(alertsWarningCPUUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -115,14 +115,14 @@ summary: 'The node CPU usage has exceeded the warning threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }}%% CPU usage over the last 5m which is above the threshold of %(alertsWarningCPUUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeCpuUsage', expr: ||| sum without(nodeid) (opensearch_os_cpu_percent{%(filteringSelector)s}) > %(alertsCriticalCPUUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -131,14 +131,14 @@ summary: 'The node CPU usage has exceeded the critical threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }}%% CPU usage over the last 5m which is above the threshold of %(alertsCriticalCPUUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeMemoryUsage', expr: ||| sum without(nodeid) (opensearch_os_mem_used_percent{%(filteringSelector)s}) > %(alertsWarningMemoryUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -147,14 +147,14 @@ summary: 'The node memory usage has exceeded the warning threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }}%% memory usage over the last 5m which is above the threshold of %(alertsWarningMemoryUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeMemoryUsage', expr: ||| sum without(nodeid) (opensearch_os_mem_used_percent{%(filteringSelector)s}) > %(alertsCriticalMemoryUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -163,14 +163,14 @@ summary: 'The node memory usage has exceeded the critical threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }}%% memory usage over the last 5m which is above the threshold of %(alertsCriticalMemoryUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchModerateRequestLatency', expr: ||| sum without(context) ((increase(opensearch_index_search_fetch_time_seconds{%(filteringSelector)s, context="total"}[5m])+increase(opensearch_index_search_query_time_seconds{context="total"}[5m])+increase(opensearch_index_search_scroll_time_seconds{context="total"}[5m])) / clamp_min(increase(opensearch_index_search_fetch_count{context="total"}[5m])+increase(opensearch_index_search_query_count{context="total"}[5m])+increase(opensearch_index_search_scroll_count{context="total"}[5m]), 1)) > %(alertsWarningRequestLatency)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -179,14 +179,14 @@ summary: 'The request latency has exceeded the warning threshold.', description: ||| {{$labels.index}} has had {{ printf "%%.0f" $value }}s of request latency over the last 5m which is above the threshold of %(alertsWarningRequestLatency)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchModerateIndexLatency', expr: ||| sum without(context) (increase(opensearch_index_indexing_index_time_seconds{%(filteringSelector)s, context="total"}[5m]) / clamp_min(increase(opensearch_index_indexing_index_count{context="total"}[5m]), 1)) > %(alertsWarningIndexLatency)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -195,7 +195,7 @@ summary: 'The index latency has exceeded the warning threshold.', description: ||| {{$labels.index}} has had {{ printf "%%.0f" $value }}s of index latency over the last 5m which is above the threshold of %(alertsWarningIndexLatency)s. - ||| % $._config, + ||| % this.config, }, }, ], diff --git a/opensearch-mixin/config.libsonnet b/opensearch-mixin/config.libsonnet index 99bac4d6a..25b3d6868 100644 --- a/opensearch-mixin/config.libsonnet +++ b/opensearch-mixin/config.libsonnet @@ -1,31 +1,52 @@ { - _config+:: { - enableMultiCluster: false, - // extra static selector to apply to all templated variables and alerts - filteringSelector: if self.enableMultiCluster then 'cluster!="",opensearch_cluster!=""' else 'opensearch_cluster!=""', - groupLabels: if self.enableMultiCluster then ['job', 'cluster', 'opensearch_cluster'] else ['job', 'opensearch_cluster'], - instanceLabels: ['node'], - dashboardTags: ['opensearch-mixin'], - dashboardPeriod: 'now-1h', - dashboardTimezone: 'default', - dashboardRefresh: '1m', - dashboardNamePrefix: '', + local this = self, + filteringSelector: if self.enableMultiCluster then 'cluster!="",opensearch_cluster!=""' else 'opensearch_cluster!=""', + groupLabels: if self.enableMultiCluster then ['job', 'cluster', 'opensearch_cluster'] else ['job', 'opensearch_cluster'], + logLabels: ['job', 'cluster', 'node'], + instanceLabels: ['node'], - // prefix dashboards uids - uid: 'opensearch', + dashboardTags: [self.uid], + uid: 'opensearch', + dashboardNamePrefix: 'OpenSearch', + dashboardPeriod: 'now-1h', + dashboardTimezone: 'default', + dashboardRefresh: '1m', + metricsSource: 'prometheus', // metrics source for signals - // alerts thresholds - alertsWarningShardReallocations: 0, - alertsWarningShardUnassigned: 0, - alertsWarningDiskUsage: 60, - alertsCriticalDiskUsage: 80, - alertsWarningCPUUsage: 70, - alertsCriticalCPUUsage: 85, - alertsWarningMemoryUsage: 70, - alertsCriticalMemoryUsage: 85, - alertsWarningRequestLatency: 0.5, // seconds - alertsWarningIndexLatency: 0.5, // seconds + // Agg Lists + groupAggList: std.join(',', this.groupLabels), + groupAggListWithInstance: std.join(',', this.groupLabels + this.instanceLabels), + + // Multi-cluster support + enableMultiCluster: false, + opensearchSelector: if self.enableMultiCluster then 'job=~"$job", instance=~"$instance", cluster=~"$cluster"' else 'job=~"$job", instance=~"$instance"', - enableLokiLogs: true, + // Logging configuration + enableLokiLogs: true, + extraLogLabels: ['level', 'severity'], // Required by logs-lib + logsVolumeGroupBy: 'level', + showLogsVolume: true, + logExpression: '{job=~"$job", cluster=~"$cluster", instance=~"$instance", exception_class=~".+"} | json | line_format "{{.severity}} {{.exception_class}} - {{.exception_message}}" | drop time_extracted, severity_extracted, exception_class_extracted, correlation_id_extracted', + + // Alerts configuration + alertsWarningShardReallocations: 0, // count + alertsWarningShardUnassigned: 0, // count + alertsWarningDiskUsage: 60, // % + alertsCriticalDiskUsage: 80, // % + alertsWarningCPUUsage: 70, // % + alertsCriticalCPUUsage: 85, // % + alertsWarningMemoryUsage: 70, // % + alertsCriticalMemoryUsage: 85, // % + alertsWarningRequestLatency: 0.5, // seconds + alertsWarningIndexLatency: 0.5, // seconds + + // Signals configuration + signals+: { + cluster: (import './signals/cluster.libsonnet')(this), + node: (import './signals/node.libsonnet')(this), + topk: (import './signals/topk.libsonnet')(this), + roles: (import './signals/roles.libsonnet')(this), + search: (import './signals/search.libsonnet')(this), + indexing: (import './signals/indexing.libsonnet')(this), }, } diff --git a/opensearch-mixin/dashboards.libsonnet b/opensearch-mixin/dashboards.libsonnet new file mode 100644 index 000000000..4f541e816 --- /dev/null +++ b/opensearch-mixin/dashboards.libsonnet @@ -0,0 +1,125 @@ +local g = import '../g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; +local logslib = import 'logs-lib/logs/main.libsonnet'; +{ + local root = self, + new(this):: + + local links = this.grafana.links; + local tags = this.config.dashboardTags; + local uid = g.util.string.slugify(this.config.uid); + local vars = this.grafana.variables; + local annotations = this.grafana.annotations; + local refresh = this.config.dashboardRefresh; + local period = this.config.dashboardPeriod; + local timezone = this.config.dashboardTimezone; + { + + 'opensearch-cluster-overview.json': + g.dashboard.new(this.config.dashboardNamePrefix + ' Cluster Overview') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels([ + this.grafana.rows.clusterOverviewRow, + this.grafana.rows.rolesRow, + this.grafana.rows.resourceUsageRow, + this.grafana.rows.storageAndTasksRow, + this.grafana.rows.searchPerformanceRow, + this.grafana.rows.ingestPerformanceRow, + this.grafana.rows.indexingPerformanceRow, + ]), + ) + ) + root.applyCommon( + vars.multiInstance, + uid + '-cluster-overview', + tags, + links { opensearchClusterOverview+:: {} }, + annotations, + timezone, + refresh, + period, + ), + 'opensearch-node-overview.json': + g.dashboard.new(this.config.dashboardNamePrefix + ' Node Overview') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels([ + this.grafana.rows.nodeRolesRow, + this.grafana.rows.nodeHealthRow, + this.grafana.rows.nodeJVMRow, + this.grafana.rows.threadPoolsRow, + ]) + ) + ) + root.applyCommon( + vars.multiInstance, + uid + '-node-overview', + tags, + links { opensearchNodeOverview+:: {} }, + annotations, + timezone, + refresh, + period, + ), + 'opensearch-search-and-index-overview.json': + g.dashboard.new(this.config.dashboardNamePrefix + ' Search and Index Overview') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels([ + this.grafana.rows.searchAndIndexSearchPerformanceRow, + this.grafana.rows.searchAndIndexIndexingPerformanceRow, + this.grafana.rows.searchAndIndexCapacityRow, + ]) + ) + ) + root.applyCommon( + vars.multiInstance, + uid + '-search-and-index-overview', + tags, + links { opensearchSearchAndIndexOverview+:: {} }, + annotations, + timezone, + refresh, + period, + ), + + } + if this.config.enableLokiLogs then { + 'opensearch-logs.json': + logslib.new( + this.config.dashboardNamePrefix + ' Logs', + datasourceName=this.grafana.variables.datasources.loki.name, + datasourceRegex=this.grafana.variables.datasources.loki.regex, + filterSelector=this.config.filteringSelector, + labels=this.config.groupLabels + this.config.extraLogLabels, + formatParser=null, + showLogsVolume=this.config.showLogsVolume, + ) + { + dashboards+: + { + logs+: + root.applyCommon(vars.multiInstance, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), + }, + panels+: + { + logs+: + g.panel.logs.options.withEnableLogDetails(true) + + g.panel.logs.options.withShowTime(false) + + g.panel.logs.options.withWrapLogMessage(false), + }, + variables+: { + toArray+: [ + this.grafana.variables.datasources.prometheus { hide: 2 }, + ], + }, + }.dashboards.logs, + } else {}, + + applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period): + g.dashboard.withTags(tags) + + g.dashboard.withUid(uid) + + g.dashboard.withLinks(std.objectValues(links)) + + g.dashboard.withTimezone(timezone) + + g.dashboard.withRefresh(refresh) + + g.dashboard.time.withFrom(period) + + g.dashboard.withVariables(vars) + + g.dashboard.withAnnotations(std.objectValues(annotations)), +} diff --git a/opensearch-mixin/dashboards/dashboards.libsonnet b/opensearch-mixin/dashboards/dashboards.libsonnet deleted file mode 100644 index 65bd82d29..000000000 --- a/opensearch-mixin/dashboards/dashboards.libsonnet +++ /dev/null @@ -1,3 +0,0 @@ -(import 'opensearch-cluster-overview.libsonnet') + -(import 'opensearch-node-overview.libsonnet') + -(import 'opensearch-search-and-index-overview.libsonnet') diff --git a/opensearch-mixin/dashboards/opensearch-cluster-overview.libsonnet b/opensearch-mixin/dashboards/opensearch-cluster-overview.libsonnet deleted file mode 100644 index c8d18c418..000000000 --- a/opensearch-mixin/dashboards/opensearch-cluster-overview.libsonnet +++ /dev/null @@ -1,1717 +0,0 @@ -local g = import '../g.libsonnet'; -local grafana = import 'grafonnet/grafana.libsonnet'; -local prometheus = grafana.prometheus; -local commonlib = import 'common-lib/common/main.libsonnet'; -local xtd = import 'github.com/jsonnet-libs/xtd/main.libsonnet'; -local utils = commonlib.utils; - -local dashboardUidSuffix = '-cluster-overview'; - -{ - // variables - local variables = (import '../variables.libsonnet').new( - filteringSelector=$._config.filteringSelector, - groupLabels=$._config.groupLabels, - instanceLabels=[], - varMetric='opensearch_cluster_status', - ), - - local legendGroupLabels = xtd.array.slice($._config.groupLabels, -1), - - local panels = (import '../panels.libsonnet').new( - $._config.groupLabels, - $._config.instanceLabels, - variables, - ), - - local promDatasource = { - uid: '${%s}' % variables.datasources.prometheus.name, - }, - // panels - local clusterStatusPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'min by(%(agg)s) (opensearch_cluster_status{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels) - ), - ], - type: 'stat', - title: 'Cluster status', - description: 'The overall health and availability of the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [ - { - options: { - '0': { - index: 0, - text: 'Green', - }, - '1': { - index: 1, - text: 'Yellow', - }, - '2': { - index: 2, - text: 'Red', - }, - }, - type: 'value', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'green', - value: 0, - }, - { - color: 'yellow', - value: 1, - }, - { - color: 'red', - value: 2, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.4.3', - }, - - local nodeCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'min by(%(agg)s) (opensearch_cluster_nodes_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels) - ), - ], - type: 'stat', - title: 'Node count', - description: 'The number of running nodes across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'green', - value: 1, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.4.3', - }, - - local dataNodeCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'min by(%(agg)s) (opensearch_cluster_datanodes_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels) - ), - ], - type: 'stat', - title: 'Data node count', - description: 'The number of data nodes in the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'green', - value: 1, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.4.3', - }, - - local shardCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum(max by (type) (opensearch_cluster_shards_number{%(queriesSelector)s}))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels) - ), - ], - type: 'stat', - title: 'Shard count', - description: 'The number of shards in the OpenSearch cluster across all indices.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'green', - value: 1, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.4.3', - }, - - local activeShardsPercentagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'min by(%(agg)s) (opensearch_cluster_shards_active_percent{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels) - ), - - ], - type: 'stat', - title: 'Active shards %', - description: 'Percent of active shards across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'yellow', - value: 1, - }, - { - color: 'green', - value: 100, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.4.3', - }, - - local topNodesByCPUUsagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, sort_desc(sum by(node, %(agg)s) (opensearch_os_cpu_percent{%(queriesSelector)s})))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - ), - ], - type: 'bargauge', - title: 'Top nodes by CPU usage', - description: 'Top nodes by OS CPU usage across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - max: 100, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - displayMode: 'gradient', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - }, - pluginVersion: '9.4.3', - }, - - local breakersTrippedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(%(agg)s, node) (increase(opensearch_circuitbreaker_tripped_count{%(queriesSelector)s}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - interval='1m', - ), - ], - type: 'bargauge', - title: 'Breakers tripped', - description: 'The total count of circuit breakers tripped across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'trips', - }, - overrides: [], - }, - options: { - displayMode: 'gradient', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - }, - pluginVersion: '9.4.3', - }, - - local shardStatusPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'min by(type, %(agg)s) (opensearch_cluster_shards_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{type}}', - ), - ], - type: 'bargauge', - title: 'Shard status', - description: 'Shard status counts across the Opensearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'shards', - }, - overrides: [], - }, - options: { - displayMode: 'gradient', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - }, - pluginVersion: '9.4.3', - }, - - local topNodesByDiskUsagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, sort_desc((100 * (sum by(node, %(agg)s) (opensearch_fs_path_total_bytes{%(queriesSelector)s})- sum by(node, %(agg)s) (opensearch_fs_path_free_bytes{%(queriesSelector)s})) / sum by(node, %(agg)s) (opensearch_fs_path_total_bytes{%(queriesSelector)s}))))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - ), - ], - type: 'bargauge', - title: 'Top nodes by disk usage', - description: 'Top nodes by disk usage across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - max: 100, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - displayMode: 'gradient', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - }, - pluginVersion: '9.4.3', - }, - - local totalDocumentsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_indices_indexing_index_count{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels), - ), - ], - type: 'timeseries', - title: 'Total documents', - description: 'The total count of documents indexed across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'documents', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local pendingTasksPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_cluster_pending_tasks_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels), - ), - ], - type: 'timeseries', - title: 'Pending tasks', - description: 'The number of tasks waiting to be executed across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'tasks', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local storeSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_indices_store_size_bytes{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels), - ), - ], - type: 'timeseries', - title: 'Store size', - description: 'The total size of the store across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local maxTaskWaitTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(%(agg)s) (opensearch_cluster_task_max_waiting_time_seconds{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels), - ), - ], - type: 'timeseries', - title: 'Max task wait time', - description: 'The max wait time for tasks to be executed across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local clusterSearchAndIndexSummaryRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Cluster search and index summary', - collapsed: false, - }, - - local topIndicesByRequestRatePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - ||| - topk(10, sort_desc(avg by(index, %(agg)s) ( - opensearch_index_search_fetch_current_number{%(queriesSelector)s, context="total"} + - opensearch_index_search_query_current_number{%(queriesSelector)s, context="total"} + - opensearch_index_search_scroll_current_number{%(queriesSelector)s, context="total"} - ))) - ||| - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - ), - ], - type: 'timeseries', - title: 'Top indices by request rate', - description: 'Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topIndicesByRequestLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - ||| - topk(10, sort_desc(sum by(index, %(agg)s) ((increase(opensearch_index_search_fetch_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) - +increase(opensearch_index_search_query_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) - +increase(opensearch_index_search_scroll_time_seconds{%(queriesSelector)s, context="total"}[$__interval:])) - / clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelector)s, context="total"}[$__interval:]) - +increase(opensearch_index_search_query_count{%(queriesSelector)s, context="total"}[$__interval:]) - +increase(opensearch_index_search_scroll_count{%(queriesSelector)s, context="total"}[$__interval:]), 1)))) - ||| - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Top indices by request latency', - description: 'Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topIndicesByCombinedCacheHitRatioPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - ||| - topk(10, sort_desc(avg by(index, %(agg)s) ( - 100 * (opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"} + - opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"}) / - clamp_min((opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"} + - opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"} + - opensearch_index_requestcache_miss_count{%(queriesSelector)s, context="total"} + - opensearch_index_querycache_miss_number{%(queriesSelector)s, context="total"}), 1 - )))) - ||| - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - ), - ], - type: 'timeseries', - title: 'Top indices by combined cache hit ratio', - description: 'Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topNodesByIngestRatePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, sum by(node, %(agg)s) (rate(opensearch_ingest_total_count{%(queriesSelector)s}[$__rate_interval])))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - ), - ], - type: 'timeseries', - title: 'Top nodes by ingest rate', - description: 'Top nodes by rate of ingest across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'Bps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topNodesByIngestLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - ||| - topk(10, sum by(%(agg)s, node) ( - increase(opensearch_ingest_total_time_seconds{%(queriesSelector)s}[$__interval:]) / - clamp_min(increase(opensearch_ingest_total_count{%(queriesSelector)s}[$__interval:]), 1))) - ||| - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Top nodes by ingest latency', - description: 'Top nodes by ingestion latency across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topNodesByIngestErrorsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, sum by(%(agg)s, node) (increase(opensearch_ingest_total_failed_count{%(queriesSelector)s}[$__interval:])))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Top nodes by ingest errors', - description: 'Top nodes by ingestion failures across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'errors', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topIndicesByIndexRatePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, avg by(%(agg)s, index) (opensearch_index_indexing_index_current_number{%(queriesSelector)s}))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - ), - ], - type: 'timeseries', - title: 'Top indices by index rate', - description: 'Top indices by rate of document indexing across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'documents/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topIndicesByIndexLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - ||| - topk(10, avg by(%(agg)s, index) - (increase(opensearch_index_indexing_index_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / - clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelector)s, context="total"}[$__interval:]), 1))) - ||| - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Top indices by index latency', - description: 'Top indices by indexing latency across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topIndicesByIndexFailuresPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, avg by(%(agg)s, index) (increase(opensearch_index_indexing_index_failed_count{%(queriesSelector)s}[$__interval:])))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Top indices by index failures', - description: 'Top indices by index document failures across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'failures', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - grafanaDashboards+:: { - 'opensearch-cluster-overview.json': - g.dashboard.new($._config.dashboardNamePrefix + 'OpenSearch cluster overview') - + g.dashboard.withTags($._config.dashboardTags) - + g.dashboard.time.withFrom($._config.dashboardPeriod) - + g.dashboard.withTimezone($._config.dashboardTimezone) - + g.dashboard.withRefresh($._config.dashboardRefresh) - + g.dashboard.withUid($._config.uid + dashboardUidSuffix) - + g.dashboard.withLinks( - g.dashboard.link.dashboards.new( - 'Other Opensearch dashboards', - $._config.dashboardTags - ) - + g.dashboard.link.dashboards.options.withIncludeVars(true) - + g.dashboard.link.dashboards.options.withKeepTime(true) - + g.dashboard.link.dashboards.options.withAsDropdown(false) - ) - + g.dashboard.withPanels( - [ - panels.osRoles { gridPos: { h: 6, w: 24, x: 0, y: 0 } }, - clusterStatusPanel { gridPos: { h: 5, w: 3, x: 0, y: 2 } }, - nodeCountPanel { gridPos: { h: 5, w: 3, x: 3, y: 2 } }, - dataNodeCountPanel { gridPos: { h: 5, w: 3, x: 6, y: 2 } }, - shardCountPanel { gridPos: { h: 5, w: 3, x: 9, y: 2 } }, - activeShardsPercentagePanel { gridPos: { h: 5, w: 3, x: 12, y: 2 } }, - panels.osRolesTimeline { gridPos: { h: 5, w: 9, x: 15, y: 2 } }, - topNodesByCPUUsagePanel { gridPos: { h: 9, w: 8, x: 0, y: 4 } }, - breakersTrippedPanel { gridPos: { h: 9, w: 8, x: 8, y: 4 } }, - shardStatusPanel { gridPos: { h: 9, w: 8, x: 16, y: 4 } }, - topNodesByDiskUsagePanel { gridPos: { h: 10, w: 8, x: 0, y: 13 } }, - totalDocumentsPanel { gridPos: { h: 5, w: 8, x: 8, y: 13 } }, - pendingTasksPanel { gridPos: { h: 5, w: 8, x: 16, y: 13 } }, - storeSizePanel { gridPos: { h: 5, w: 8, x: 8, y: 18 } }, - maxTaskWaitTimePanel { gridPos: { h: 5, w: 8, x: 16, y: 18 } }, - clusterSearchAndIndexSummaryRow { gridPos: { h: 1, w: 24, x: 0, y: 23 } }, - topIndicesByRequestRatePanel { gridPos: { h: 8, w: 8, x: 0, y: 24 } }, - topIndicesByRequestLatencyPanel { gridPos: { h: 8, w: 8, x: 8, y: 24 } }, - topIndicesByCombinedCacheHitRatioPanel { gridPos: { h: 8, w: 8, x: 16, y: 24 } }, - topNodesByIngestRatePanel { gridPos: { h: 8, w: 8, x: 0, y: 32 } }, - topNodesByIngestLatencyPanel { gridPos: { h: 8, w: 8, x: 8, y: 32 } }, - topNodesByIngestErrorsPanel { gridPos: { h: 8, w: 8, x: 16, y: 32 } }, - topIndicesByIndexRatePanel { gridPos: { h: 8, w: 8, x: 0, y: 40 } }, - topIndicesByIndexLatencyPanel { gridPos: { h: 8, w: 8, x: 8, y: 40 } }, - topIndicesByIndexFailuresPanel { gridPos: { h: 8, w: 8, x: 16, y: 40 } }, - ] - ) - + g.dashboard.withVariables(variables.singleInstance), - }, -} diff --git a/opensearch-mixin/dashboards/opensearch-node-overview.libsonnet b/opensearch-mixin/dashboards/opensearch-node-overview.libsonnet deleted file mode 100644 index 32b18e5ff..000000000 --- a/opensearch-mixin/dashboards/opensearch-node-overview.libsonnet +++ /dev/null @@ -1,1201 +0,0 @@ -local g = (import '../g.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local commonlib = import 'common-lib/common/main.libsonnet'; -local utils = commonlib.utils; -local prometheus = grafana.prometheus; -local xtd = import 'github.com/jsonnet-libs/xtd/main.libsonnet'; -local dashboardUidSuffix = '-node-overview'; - -{ - - // variables - local variables = (import '../variables.libsonnet').new( - filteringSelector=$._config.filteringSelector, - groupLabels=$._config.groupLabels, - instanceLabels=$._config.instanceLabels, - varMetric='opensearch_os_cpu_percent', - enableLokiLogs=$._config.enableLokiLogs, - ), - - local legendInstanceLabels = xtd.array.slice($._config.instanceLabels, -1), - - local panels = (import '../panels.libsonnet').new( - $._config.groupLabels, - $._config.instanceLabels, - variables, - ), - - local promDatasource = { - uid: '${%s}' % variables.datasources.prometheus.name, - }, - - local lokiDatasource = { - uid: '${%s}' % variables.datasources.loki.name, - }, - - local nodeHealthRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Node health', - collapsed: false, - }, - - local nodeCPUUsagePanel = - commonlib.panels.cpu.timeSeries.utilization.new( - 'Node CPU usage', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - 'opensearch_os_cpu_percent{%(queriesSelector)s}' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(legendInstanceLabels)), - - ], - description="CPU usage percentage of the node's Operating System.", - ) - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), - - local nodeMemoryUsagePanel = - commonlib.panels.memory.timeSeries.usagePercent.new( - 'Node memory usage', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - 'opensearch_os_mem_used_percent{%(queriesSelector)s}' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Memory usage percentage of the node for the Operating System and OpenSearch', - ) - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), - - local nodeIOPanel = - commonlib.panels.disk.timeSeries.ioBytesPerSec.new( - 'Node I/O', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - 'sum by(%(agg)s) (rate(opensearch_fs_io_total_read_bytes{%(queriesSelector)s}[$__rate_interval]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat('%s - read' % utils.labelsToPanelLegend(legendInstanceLabels)), - g.query.prometheus.new( - promDatasource.uid, - 'sum by(%(agg)s) (rate(opensearch_fs_io_total_write_bytes{%(queriesSelector)s}[$__rate_interval]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat('%s - write' % utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Node file system read and write data.', - ) - + g.panel.timeSeries.fieldConfig.defaults.custom.withStacking(value='normal'), - - local nodeOpenConnectionsPanel = - commonlib.panels.generic.timeSeries.base.new( - 'Node open connections', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - 'sum by (%(agg)s) (opensearch_transport_server_open_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Number of open connections for the selected node.', - ) - + g.panel.timeSeries.fieldConfig.defaults.custom.withStacking(value='normal') - + g.panel.timeSeries.standardOptions.withUnit(''), - - local nodeDiskUsagePanel = - commonlib.panels.disk.timeSeries.usagePercent.new( - 'Node disk usage', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - '100 - (100 * opensearch_fs_path_free_bytes{%(queriesSelector)s} / clamp_min(opensearch_fs_path_total_bytes{%(queriesSelector)s}, 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Disk usage percentage of the selected node.', - ), - - local nodeMemorySwapPanel = - commonlib.panels.memory.timeSeries.usagePercent.new( - 'Node memory swap', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - '100 * opensearch_os_swap_used_bytes{%(queriesSelector)s} / clamp_min((opensearch_os_swap_used_bytes{%(queriesSelector)s} + opensearch_os_swap_free_bytes{%(queriesSelector)s}), 1)' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Percentage of swap space used by OpenSearch and the Operating System on the selected node.', - ) - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), - - local nodeNetworkTrafficPanel = - commonlib.panels.network.timeSeries.traffic.new( - 'Node network traffic', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - 'sum by (%(agg)s) (rate(opensearch_transport_tx_bytes_count{%(queriesSelector)s}[$__rate_interval])) * 8' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat('%s - sent' % utils.labelsToPanelLegend(legendInstanceLabels)), - g.query.prometheus.new( - promDatasource.uid, - 'sum by (%(agg)s) (rate(opensearch_transport_rx_bytes_count{%(queriesSelector)s}[$__rate_interval])) * 8' - % - { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - } - - ) + g.query.prometheus.withLegendFormat('%s - received' % utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Node network traffic sent and received.', - ) - + commonlib.panels.network.timeSeries.traffic.withNegateOutPackets('/sent/') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), - - local circuitBreakersPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(name, %(agg)s) (increase(opensearch_circuitbreaker_tripped_count{%(queriesSelector)s}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - {{ name }}' % utils.labelsToPanelLegend(legendInstanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'Circuit breakers', - description: 'Circuit breakers tripped on the selected node by type', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'trips', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local nodeJVMRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Node JVM', - collapsed: false, - }, - - local jvmHeapUsedVsCommittedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (opensearch_jvm_mem_heap_used_bytes{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - used' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - prometheus.target( - 'sum by (%(agg)s) (opensearch_jvm_mem_heap_committed_bytes{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - commited' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM heap used vs. committed', - description: 'The amount of heap memory used vs committed on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - }, - - local jvmNonheapUsedVsCommittedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (opensearch_jvm_mem_nonheap_used_bytes{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - used' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - prometheus.target( - 'sum by (%(agg)s) (opensearch_jvm_mem_nonheap_committed_bytes{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - commited' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM non-heap used vs. committed', - description: 'The amount of non-heap memory used vs committed on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - }, - - local jvmThreadsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (opensearch_jvm_threads_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM threads', - description: 'The number of threads running in the JVM on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'threads', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local jvmBufferPoolsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by( %(agg)s, bufferpool) (opensearch_jvm_bufferpool_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - {{bufferpool}}' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM buffer pools', - description: 'The number of buffer pools available on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'buffer pools', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - }, - - local jvmUptimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(%(agg)s) (opensearch_jvm_uptime_seconds{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM uptime', - description: 'The uptime of the JVM in seconds on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - pluginVersion: '9.4.3', - }, - - local jvmGarbageCollectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (increase(opensearch_jvm_gc_collection_count{%(queriesSelector)s}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'JVM garbage collections', - description: 'The number of garbage collection operations on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'operations', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local jvmGarbageCollectionTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (increase(opensearch_jvm_gc_collection_time_seconds{%(queriesSelector)s}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'JVM garbage collection time', - description: 'The amount of time spent on garbage collection on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local jvmBufferPoolUsagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - '100 * (sum by (%(agg)s, bufferpool) (opensearch_jvm_bufferpool_used_bytes{%(queriesSelector)s})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{%(queriesSelector)s})),1)' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - {{bufferpool}}' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM buffer pool usage', - description: 'The percent used of JVM buffer pool memory.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local threadPoolsRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Thread pools', - collapsed: false, - }, - - local threadPoolThreadsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(%(agg)s) ((opensearch_threadpool_threads_number{%(queriesSelector)s}))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'Thread pool threads', - description: 'The number of threads in the thread pool for the selected node', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'threads', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local threadPoolTasksPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (opensearch_threadpool_tasks_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'Thread pool tasks', - description: 'The number of tasks in the thread pool for the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'tasks', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local errorLogsPanelPanel = { - datasource: lokiDatasource, - targets: [ - { - datasource: lokiDatasource, - editorMode: 'code', - expr: '{%(queriesSelector)s} |~ ""' % { queriesSelector: variables.queriesSelector }, - queryType: 'range', - refId: 'A', - }, - ], - type: 'logs', - title: 'Error logs panel', - description: 'The recent error logs being reported by OpenSearch.', - options: { - dedupStrategy: 'none', - enableLogDetails: true, - prettifyLogMessage: false, - showCommonLabels: false, - showLabels: false, - showTime: false, - sortOrder: 'Descending', - wrapLogMessage: false, - }, - }, - - grafanaDashboards+:: { - 'node-overview.json': - g.dashboard.new($._config.dashboardNamePrefix + 'OpenSearch node overview') - + g.dashboard.withTags($._config.dashboardTags) - + g.dashboard.time.withFrom($._config.dashboardPeriod) - + g.dashboard.withTimezone($._config.dashboardTimezone) - + g.dashboard.withRefresh($._config.dashboardRefresh) - + g.dashboard.withUid($._config.uid + dashboardUidSuffix) - + g.dashboard.withLinks( - g.dashboard.link.dashboards.new( - 'Other Opensearch dashboards', - $._config.dashboardTags - ) - + g.dashboard.link.dashboards.options.withIncludeVars(true) - + g.dashboard.link.dashboards.options.withKeepTime(true) - + g.dashboard.link.dashboards.options.withAsDropdown(false) - ) - + g.dashboard.withPanels( - std.flattenArrays([ - [ - panels.osRolesTimeline { gridPos: { h: 5, w: 24, x: 0, y: 0 } }, - nodeHealthRow { gridPos: { h: 1, w: 24, x: 0, y: 1 } }, - nodeCPUUsagePanel { gridPos: { h: 7, w: 6, x: 0, y: 2 } }, - nodeMemoryUsagePanel { gridPos: { h: 7, w: 6, x: 6, y: 2 } }, - nodeIOPanel { gridPos: { h: 7, w: 6, x: 12, y: 2 } }, - nodeOpenConnectionsPanel { gridPos: { h: 7, w: 6, x: 18, y: 2 } }, - nodeDiskUsagePanel { gridPos: { h: 7, w: 6, x: 0, y: 8 } }, - nodeMemorySwapPanel { gridPos: { h: 7, w: 6, x: 6, y: 8 } }, - nodeNetworkTrafficPanel { gridPos: { h: 7, w: 6, x: 12, y: 8 } }, - circuitBreakersPanel { gridPos: { h: 7, w: 6, x: 18, y: 8 } }, - nodeJVMRow { gridPos: { h: 1, w: 24, x: 0, y: 15 } }, - jvmHeapUsedVsCommittedPanel { gridPos: { h: 6, w: 6, x: 0, y: 16 } }, - jvmNonheapUsedVsCommittedPanel { gridPos: { h: 6, w: 6, x: 6, y: 16 } }, - jvmThreadsPanel { gridPos: { h: 6, w: 6, x: 12, y: 16 } }, - jvmBufferPoolsPanel { gridPos: { h: 6, w: 6, x: 18, y: 16 } }, - jvmUptimePanel { gridPos: { h: 6, w: 6, x: 0, y: 22 } }, - jvmGarbageCollectionsPanel { gridPos: { h: 6, w: 6, x: 6, y: 22 } }, - jvmGarbageCollectionTimePanel { gridPos: { h: 6, w: 6, x: 12, y: 22 } }, - jvmBufferPoolUsagePanel { gridPos: { h: 6, w: 6, x: 18, y: 22 } }, - threadPoolsRow { gridPos: { h: 1, w: 24, x: 0, y: 28 } }, - threadPoolThreadsPanel { gridPos: { h: 8, w: 12, x: 0, y: 29 } }, - threadPoolTasksPanel { gridPos: { h: 8, w: 12, x: 12, y: 29 } }, - ], - if $._config.enableLokiLogs then [ - errorLogsPanelPanel { gridPos: { h: 7, w: 24, x: 0, y: 37 } }, - ] else [], - [], - ]) - ) - + g.dashboard.withVariables(variables.multiInstance), - }, -} diff --git a/opensearch-mixin/dashboards/opensearch-search-and-index-overview.libsonnet b/opensearch-mixin/dashboards/opensearch-search-and-index-overview.libsonnet deleted file mode 100644 index c94f5f8d9..000000000 --- a/opensearch-mixin/dashboards/opensearch-search-and-index-overview.libsonnet +++ /dev/null @@ -1,1898 +0,0 @@ -local g = (import '../g.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local commonlib = import 'common-lib/common/main.libsonnet'; -local utils = commonlib.utils; -local prometheus = grafana.prometheus; - -local dashboardUidSuffix = '-search-and-index-overview'; - -local promDatasourceName = 'prometheus_datasource'; -local instanceLabels = ['index']; - -{ - - // override - local hideZeros = - { - matcher: { - id: 'byValue', - options: { - reducer: 'allIsZero', - op: 'gte', - value: 0, - }, - }, - properties: [ - { - id: 'custom.hideFrom', - value: { - tooltip: true, - viz: false, - legend: true, - }, - }, - ], - }, - // variables - local variables = (import '../variables.libsonnet').new( - filteringSelector=$._config.filteringSelector, - groupLabels=$._config.groupLabels, - instanceLabels=instanceLabels, - varMetric='opensearch_index_search_fetch_count', - ), - - local promDatasource = { - uid: '${%s}' % variables.datasources.prometheus.name, - }, - - local requestPerformanceRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Request performance', - collapsed: false, - }, - - local requestRatePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by (%(agg)s) (opensearch_index_search_query_current_number{%(queriesSelector)s, context=~"total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - query' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by (%(agg)s) (opensearch_index_search_fetch_current_number{%(queriesSelector)s, context=~"total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - fetch' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by (%(agg)s) (opensearch_index_search_scroll_current_number{%(queriesSelector)s, context=~"total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - scroll' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Request rate', - description: 'Rate of fetch, scroll, and query requests by selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local requestLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by (%(agg)s) (increase(opensearch_index_search_query_time_seconds{%(queriesSelector)s}[$__interval:]) / clamp_min(increase(opensearch_index_search_query_count{%(queriesSelector)s, context="total"}[$__interval:]), 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - query' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - prometheus.target( - 'avg by (%(agg)s) (increase(opensearch_index_search_fetch_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelector)s, context="total"}[$__interval:]), 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - fetch' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - prometheus.target( - 'avg by (%(agg)s) (increase(opensearch_index_search_scroll_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_search_scroll_count{%(queriesSelector)s, context="total"}[$__interval:]), 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - scroll' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'Request latency', - description: 'Latency of fetch, scroll, and query requests by selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local cacheHitRatioPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (100 * (opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"}) / clamp_min(opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"} + opensearch_index_requestcache_miss_count{%(queriesSelector)s, context="total"}, 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - request' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by(%(agg)s) (100 * (opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"}) / clamp_min(opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"} + opensearch_index_querycache_miss_number{%(queriesSelector)s, context="total"}, 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - query' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Cache hit ratio', - description: 'Ratio of query cache and request cache hits and misses.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local evictionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_querycache_evictions_count{%(queriesSelector)s, context="total"}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - query cache' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_requestcache_evictions_count{%(queriesSelector)s, context="total"}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - request cache' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_fielddata_evictions_count{%(queriesSelector)s, context="total"}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - field data' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'Evictions', - description: 'Total evictions count by cache type for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'evictions', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local indexPerformanceRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Index performance', - collapsed: false, - }, - - local indexRatePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_indexing_index_current_number{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Index rate', - description: 'Rate of indexed documents for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'documents/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local indexLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_indexing_index_time_seconds{%(queriesSelector)s, context=~"total"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelector)s, context=~"total"}[$__interval:]),1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'Index latency', - description: 'Document indexing latency for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local indexFailuresPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_indexing_index_failed_count{%(queriesSelector)s, context="total"}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'Index failures', - description: 'Number of indexing failures for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'failures', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local flushLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_flush_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelector)s, context="total"}[$__interval:]),1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Flush latency', - description: 'Index flush latency for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local mergeTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_merges_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:])) > 0' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - total' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_merges_total_stopped_time_seconds{%(queriesSelector)s, context="total"}[$__interval:])) > 0' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - stopped' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_merges_total_throttled_time_seconds{%(queriesSelector)s, context="total"}[$__interval:])) > 0' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - throttled' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Merge time', - description: 'Index merge time for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local refreshLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_refresh_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelector)s, context="total"}[$__interval:]),1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Refresh latency', - description: 'Index refresh latency for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local translogOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_translog_operations_number{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Translog operations', - description: 'Current number of translog operations for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'operations', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local docsDeletedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by (%(agg)s) (opensearch_index_indexing_delete_current_number{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Docs deleted', - description: 'Rate of documents deleted for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'documents/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local indexCapacityRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Index capacity', - collapsed: false, - }, - - local documentsIndexedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by (%(agg)s) (opensearch_index_indexing_index_count{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Documents indexed', - description: 'Number of indexed documents for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'documents', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local segmentCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_segments_number{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Segment count', - description: 'Current number of segments for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'segments', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local mergeCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_merges_total_docs_count{%(queriesSelector)s, context="total"}[$__interval:])) > 0' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Merge count', - description: 'Number of merge operations for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'merges', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local cacheSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_querycache_memory_size_bytes{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - query' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_requestcache_memory_size_bytes{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - request' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Cache size', - description: 'Size of query cache and request cache.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local storeSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_store_size_bytes{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Store size', - description: 'Size of the store in bytes for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local segmentSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_segments_memory_bytes{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Segment size', - description: 'Memory used by segments for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local mergeSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_merges_current_size_bytes{%(queriesSelector)s, context="total"}) > 0' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Merge size', - description: 'Size of merge operations in bytes for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - local shardCountPanel = - { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (index) (avg by(%(agg)s) (opensearch_index_shards_number{%(queriesSelector)s, type=~"active|active_primary"}))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Shard count', - description: 'The number of index shards for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'shards', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - grafanaDashboards+:: { - 'search-and-index-overview.json': - g.dashboard.new($._config.dashboardNamePrefix + 'OpenSearch search and index overview') - + g.dashboard.withTags($._config.dashboardTags) - + g.dashboard.time.withFrom($._config.dashboardPeriod) - + g.dashboard.withTimezone($._config.dashboardTimezone) - + g.dashboard.withRefresh($._config.dashboardRefresh) - + g.dashboard.withUid($._config.uid + dashboardUidSuffix) - + g.dashboard.withLinks( - g.dashboard.link.dashboards.new( - 'Other Opensearch dashboards', - $._config.dashboardTags - ) - + g.dashboard.link.dashboards.options.withIncludeVars(true) - + g.dashboard.link.dashboards.options.withKeepTime(true) - + g.dashboard.link.dashboards.options.withAsDropdown(false) - ) - + g.dashboard.withPanels( - [ - requestPerformanceRow { gridPos: { h: 1, w: 24, x: 0, y: 0 } }, - requestRatePanel { gridPos: { h: 8, w: 6, x: 0, y: 1 } }, - requestLatencyPanel { gridPos: { h: 8, w: 6, x: 6, y: 1 } }, - cacheHitRatioPanel { gridPos: { h: 8, w: 6, x: 12, y: 1 } }, - evictionsPanel { gridPos: { h: 8, w: 6, x: 18, y: 1 } }, - indexPerformanceRow { gridPos: { h: 1, w: 24, x: 0, y: 9 } }, - indexRatePanel { gridPos: { h: 8, w: 6, x: 0, y: 10 } }, - indexLatencyPanel { gridPos: { h: 8, w: 6, x: 6, y: 10 } }, - indexFailuresPanel { gridPos: { h: 8, w: 6, x: 12, y: 10 } }, - flushLatencyPanel { gridPos: { h: 8, w: 6, x: 18, y: 10 } }, - mergeTimePanel { gridPos: { h: 8, w: 6, x: 0, y: 18 } }, - refreshLatencyPanel { gridPos: { h: 8, w: 6, x: 6, y: 18 } }, - translogOperationsPanel { gridPos: { h: 8, w: 6, x: 12, y: 18 } }, - docsDeletedPanel { gridPos: { h: 8, w: 6, x: 18, y: 18 } }, - indexCapacityRow { gridPos: { h: 1, w: 24, x: 0, y: 26 } }, - documentsIndexedPanel { gridPos: { h: 8, w: 6, x: 0, y: 27 } }, - segmentCountPanel { gridPos: { h: 8, w: 6, x: 6, y: 27 } }, - mergeCountPanel { gridPos: { h: 8, w: 6, x: 12, y: 27 } }, - cacheSizePanel { gridPos: { h: 8, w: 6, x: 18, y: 27 } }, - storeSizePanel { gridPos: { h: 8, w: 6, x: 0, y: 35 } }, - segmentSizePanel { gridPos: { h: 8, w: 6, x: 6, y: 35 } }, - mergeSizePanel { gridPos: { h: 8, w: 6, x: 12, y: 35 } }, - shardCountPanel { gridPos: { h: 8, w: 6, x: 18, y: 35 } }, - ] - ) - + g.dashboard.withVariables(variables.multiInstance), - }, -} diff --git a/opensearch-mixin/dashboards_out/node-overview.json b/opensearch-mixin/dashboards_out/node-overview.json deleted file mode 100644 index ac83ca553..000000000 --- a/opensearch-mixin/dashboards_out/node-overview.json +++ /dev/null @@ -1,1706 +0,0 @@ -{ - "links": [ - { - "asDropdown": false, - "includeVars": true, - "keepTime": true, - "tags": [ - "opensearch-mixin" - ], - "title": "Other Opensearch dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "OpenSearch node roles over time.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "2": { - "color": "light-purple", - "index": 0, - "text": "data" - }, - "3": { - "color": "light-green", - "index": 1, - "text": "master" - }, - "4": { - "color": "light-blue", - "index": 2, - "text": "ingest" - }, - "5": { - "color": "light-yellow", - "index": 3, - "text": "cluster_manager" - }, - "6": { - "color": "super-light-red", - "index": 4, - "text": "remote_cluster_client" - } - }, - "type": "value" - } - ] - } - }, - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 1, - "maxDataPoints": 100, - "options": { - "legend": false, - "showValue": "never" - }, - "pluginVersion": "v10.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"data\"}[1m]) == 1\n) * 2\n", - "legendFormat": "{{node}}" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"master\"}[1m]) == 1\n) * 3\n", - "legendFormat": "{{node}}" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"ingest\"}[1m]) == 1\n) * 4\n", - "legendFormat": "{{node}}" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"cluster_manager\"}[1m]) == 1\n) * 5\n", - "legendFormat": "{{node}}" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"remote_cluster_client\"}[1m]) == 1\n) * 6\n", - "legendFormat": "{{node}}" - } - ], - "title": "Roles timeline", - "type": "status-history" - }, - { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 2, - "targets": [ ], - "title": "Node health", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "CPU usage percentage of the node's Operating System.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "custom": { - "fillOpacity": 5, - "gradientMode": "scheme", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "decimals": 1, - "max": 100, - "min": 0, - "unit": "percent" - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 2 - }, - "id": 3, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", - "legendFormat": "{{node}}" - } - ], - "title": "Node CPU usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Memory usage percentage of the node for the Operating System and OpenSearch", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "custom": { - "fillOpacity": 5, - "gradientMode": "scheme", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "decimals": 1, - "max": 100, - "min": 0, - "unit": "percent" - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 2 - }, - "id": 4, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "opensearch_os_mem_used_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", - "legendFormat": "{{node}}" - } - ], - "title": "Node memory usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Node file system read and write data.", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 1, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never", - "stacking": "normal" - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/time|used|busy|util/" - }, - "properties": [ - { - "id": "custom.axisSoftMax", - "value": 100 - }, - { - "id": "custom.drawStyle", - "value": "points" - }, - { - "id": "unit", - "value": "percent" - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 2 - }, - "id": 5, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job,opensearch_cluster,node) (rate(opensearch_fs_io_total_read_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", - "legendFormat": "{{node}} - read" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job,opensearch_cluster,node) (rate(opensearch_fs_io_total_write_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", - "legendFormat": "{{node}} - write" - } - ], - "title": "Node I/O", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Number of open connections for the selected node.", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never", - "stacking": "normal" - }, - "unit": "" - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 2 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_transport_server_open_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "legendFormat": "{{node}}" - } - ], - "title": "Node open connections", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Disk usage percentage of the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "custom": { - "fillOpacity": 1, - "gradientMode": "scheme", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "decimals": 1, - "max": 100, - "min": 0, - "unit": "percent" - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 8 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "100 - (100 * opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min(opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}, 1))", - "legendFormat": "{{node}}" - } - ], - "title": "Node disk usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Percentage of swap space used by OpenSearch and the Operating System on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "custom": { - "fillOpacity": 5, - "gradientMode": "scheme", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "decimals": 1, - "max": 100, - "min": 0, - "unit": "percent" - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 8 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "100 * opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min((opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} + opensearch_os_swap_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}), 1)", - "legendFormat": "{{node}}" - } - ], - "title": "Node memory swap", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Node network traffic sent and received.", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": false, - "axisLabel": "out(-) | in(+)", - "fillOpacity": 5, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "decimals": 1, - "noValue": "No traffic", - "unit": "bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sent/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 8 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_tx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", - "legendFormat": "{{node}} - sent" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_rx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", - "legendFormat": "{{node}} - received" - } - ], - "title": "Node network traffic", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Circuit breakers tripped on the selected node by type", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "trips" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 8 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(name, job,opensearch_cluster,node) (increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{node}} - {{ name }}" - } - ], - "title": "Circuit breakers", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 15 - }, - "id": 11, - "targets": [ ], - "title": "Node JVM", - "type": "row" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The amount of heap memory used vs committed on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 16 - }, - "id": 12, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_heap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - used" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_heap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - commited" - } - ], - "title": "JVM heap used vs. committed", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The amount of non-heap memory used vs committed on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 16 - }, - "id": 13, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_nonheap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - used" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_nonheap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - commited" - } - ], - "title": "JVM non-heap used vs. committed", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The number of threads running in the JVM on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "threads" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 16 - }, - "id": 14, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "JVM threads", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The number of buffer pools available on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "buffer pools" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 16 - }, - "id": 15, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by( job,opensearch_cluster,node, bufferpool) (opensearch_jvm_bufferpool_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - {{bufferpool}}" - } - ], - "title": "JVM buffer pools", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The uptime of the JVM in seconds on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 22 - }, - "id": 16, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.4.3", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job,opensearch_cluster,node) (opensearch_jvm_uptime_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "JVM uptime", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The number of garbage collection operations on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "operations" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 22 - }, - "id": 17, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (increase(opensearch_jvm_gc_collection_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "JVM garbage collections", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The amount of time spent on garbage collection on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 22 - }, - "id": 18, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (increase(opensearch_jvm_gc_collection_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "JVM garbage collection time", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The percent used of JVM buffer pool memory.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 22 - }, - "id": 19, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "100 * (sum by (job,opensearch_cluster,node, bufferpool) (opensearch_jvm_bufferpool_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})),1)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - {{bufferpool}}" - } - ], - "title": "JVM buffer pool usage", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 20, - "targets": [ ], - "title": "Thread pools", - "type": "row" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The number of threads in the thread pool for the selected node", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "threads" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 29 - }, - "id": 21, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job,opensearch_cluster,node) ((opensearch_threadpool_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "Thread pool threads", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The number of tasks in the thread pool for the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "tasks" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 29 - }, - "id": 22, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_threadpool_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "Thread pool tasks", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${loki_datasource}" - }, - "description": "The recent error logs being reported by OpenSearch.", - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 37 - }, - "id": 23, - "options": { - "dedupStrategy": "none", - "enableLogDetails": true, - "prettifyLogMessage": false, - "showCommonLabels": false, - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": false - }, - "targets": [ - { - "datasource": { - "uid": "${loki_datasource}" - }, - "editorMode": "code", - "expr": "{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} |~ \"\"", - "queryType": "range", - "refId": "A" - } - ], - "title": "Error logs panel", - "type": "logs" - } - ], - "refresh": "1m", - "schemaVersion": 36, - "tags": [ - "opensearch-mixin" - ], - "templating": { - "list": [ - { - "label": "Prometheus data source", - "name": "prometheus_datasource", - "query": "prometheus", - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Job", - "multi": true, - "name": "job", - "query": "label_values(opensearch_os_cpu_percent{opensearch_cluster!=\"\"}, job)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Opensearch_cluster", - "multi": true, - "name": "opensearch_cluster", - "query": "label_values(opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Node", - "multi": true, - "name": "node", - "query": "label_values(opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "hide": 2, - "label": "Loki data source", - "name": "loki_datasource", - "query": "loki", - "regex": "", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timezone": "default", - "title": "OpenSearch node overview", - "uid": "opensearch-node-overview" - } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json index 891e95e38..89f7eebdd 100644 --- a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json @@ -1,149 +1,45 @@ { + "annotations": { + "list": [ ] + }, + "editable": false, + "id": null, "links": [ { - "asDropdown": false, + "keepTime": true, + "title": "Opensearch Logs", + "type": "link", + "url": "/d/opensearch-logs" + }, + { + "asDropdown": true, "includeVars": true, "keepTime": true, "tags": [ - "opensearch-mixin" + "opensearch" ], - "title": "Other Opensearch dashboards", + "title": "All dashboards", "type": "dashboards" } ], "panels": [ { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "OpenSearch node roles.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "0": { - "color": "super-light-orange", - "index": 5, - "text": "False" - }, - "1": { - "color": "light-green", - "index": 3, - "text": "True" - }, - "Data": { - "color": "light-purple", - "index": 0, - "text": "data" - }, - "Ingest": { - "color": "light-blue", - "index": 2, - "text": "ingest" - }, - "Master": { - "color": "light-green", - "index": 1, - "text": "master" - }, - "Remote cluster client": { - "color": "light-orange", - "index": 4, - "text": "remote_cluster_client" - } - }, - "type": "value" - } - ] - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/Data|Master|Ingest|Remote.+|Cluster.+/" - }, - "properties": [ - { - "id": "custom.cellOptions", - "value": { - "type": "color-text" - } - } - ] - } - ] - }, + "collapsed": false, "gridPos": { - "h": 6, - "w": 24, + "h": 1, + "w": 0, "x": 0, "y": 0 }, "id": 1, - "pluginVersion": "v10.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (job,opensearch_cluster,node,node,nodeid,role,primary_ip) (last_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[1d]))", - "instant": true, - "legendFormat": "{{node}}" - } - ], - "title": "Roles", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "mode": "columns", - "valueLabel": "role" - } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "cluster_manager": 108, - "data": 105, - "ingest": 106, - "job": 3, - "master": 104, - "node": 3, - "nodeid": 3, - "opensearch_cluster": 3, - "remote_cluster_client": 107 - }, - "renameByName": { - "Time": "", - "cluster": "Cluster", - "cluster_manager": "Cluster manager", - "data": "Data", - "ingest": "Ingest", - "master": "Master", - "node": "Node", - "nodeid": "Nodeid", - "remote_cluster_client": "Remote cluster client" - } - } - } - ], - "type": "table" + "panels": [ ], + "title": "Cluster Overview", + "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The overall health and availability of the OpenSearch cluster.", "fieldConfig": { @@ -171,7 +67,6 @@ } ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -191,40 +86,35 @@ } ] } - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 3, + "h": 6, + "w": 5, "x": 0, - "y": 2 + "y": 1 }, "id": 2, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by(job,opensearch_cluster) (opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Cluster status" } ], "title": "Cluster status", @@ -232,7 +122,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of running nodes across the OpenSearch cluster.", "fieldConfig": { @@ -240,9 +131,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -258,40 +147,35 @@ } ] } - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 3, - "x": 3, - "y": 2 + "h": 6, + "w": 5, + "x": 5, + "y": 1 }, "id": 3, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by(job,opensearch_cluster) (opensearch_cluster_nodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_nodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Node count" } ], "title": "Node count", @@ -299,7 +183,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of data nodes in the OpenSearch cluster.", "fieldConfig": { @@ -307,9 +192,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -325,40 +208,35 @@ } ] } - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 3, - "x": 6, - "y": 2 + "h": 6, + "w": 5, + "x": 10, + "y": 1 }, "id": 4, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by(job,opensearch_cluster) (opensearch_cluster_datanodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_datanodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Data node count" } ], "title": "Data node count", @@ -366,7 +244,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of shards in the OpenSearch cluster across all indices.", "fieldConfig": { @@ -374,9 +253,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -392,40 +269,35 @@ } ] } - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 3, - "x": 9, - "y": 2 + "h": 6, + "w": 5, + "x": 15, + "y": 1 }, "id": 5, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum(max by (type) (opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))", + "expr": "sum(\n max by (job,opensearch_cluster,type) (\n opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Shard count" } ], "title": "Shard count", @@ -433,7 +305,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Percent of active shards across the OpenSearch cluster.", "fieldConfig": { @@ -441,9 +314,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -464,78 +335,91 @@ ] }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 3, - "x": 12, - "y": 2 + "h": 6, + "w": 4, + "x": 20, + "y": 1 }, "id": 6, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by(job,opensearch_cluster) (opensearch_cluster_shards_active_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_shards_active_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Active shards %%" } ], "title": "Active shards %", "type": "stat" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 7 + }, + "id": 7, + "panels": [ ], + "title": "Node Roles", + "type": "row" + }, { "datasource": { "type": "datasource", "uid": "-- Mixed --" }, - "description": "OpenSearch node roles over time.", + "description": "OpenSearch node roles.", "fieldConfig": { "defaults": { "mappings": [ { "options": { - "2": { + "0": { + "color": "super-light-orange", + "index": 5, + "text": "False" + }, + "1": { + "color": "light-green", + "index": 3, + "text": "True" + }, + "Data": { "color": "light-purple", "index": 0, "text": "data" }, - "3": { - "color": "light-green", - "index": 1, - "text": "master" - }, - "4": { + "Ingest": { "color": "light-blue", "index": 2, "text": "ingest" }, - "5": { - "color": "light-yellow", - "index": 3, - "text": "cluster_manager" + "Master": { + "color": "light-green", + "index": 1, + "text": "master" }, - "6": { - "color": "super-light-red", + "Remote cluster client": { + "color": "light-orange", "index": 4, "text": "remote_cluster_client" } @@ -543,69 +427,224 @@ "type": "value" } ] - } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Data|Master|Ingest|Remote.+|Cluster.+/" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + } + ] + } + ] }, "gridPos": { - "h": 5, - "w": 9, - "x": 15, - "y": 2 - }, - "id": 7, - "maxDataPoints": 100, - "options": { - "legend": false, - "showValue": "never" + "h": 8, + "w": 24, + "x": 0, + "y": 8 }, - "pluginVersion": "v10.0.0", + "id": 8, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"data\"}[1m]) == 1\n) * 2\n", - "legendFormat": "{{node}}" - }, + "expr": "max by (job,opensearch_cluster, nodeid, role, primary_ip) (last_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[1d]))", + "format": "time_series", + "instant": true, + "legendFormat": "{{node}}: Node role bool last seen", + "refId": "Node role bool last seen" + } + ], + "title": "Roles", + "transformations": [ { - "datasource": { + "id": "labelsToFields", + "options": { + "mode": "columns", + "valueLabel": "role" + } + }, + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "cluster_manager": 108, + "data": 105, + "ingest": 106, + "job": 3, + "master": 104, + "node": 3, + "nodeid": 3, + "opensearch_cluster": 3, + "remote_cluster_client": 107 + }, + "renameByName": { + "Time": "", + "cluster": "Cluster", + "cluster_manager": "Cluster manager", + "data": "Data", + "ingest": "Ingest", + "master": "Master", + "node": "Node", + "nodeid": "Nodeid", + "remote_cluster_client": "Remote cluster client" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "OpenSearch node roles over time.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "2": { + "color": "light-purple", + "index": 0, + "text": "data" + }, + "3": { + "color": "light-green", + "index": 1, + "text": "master" + }, + "4": { + "color": "light-blue", + "index": 2, + "text": "ingest" + }, + "5": { + "color": "light-yellow", + "index": 3, + "text": "cluster_manager" + }, + "6": { + "color": "super-light-red", + "index": 4, + "text": "remote_cluster_client" + } + }, + "type": "value" + } + ] + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 9, + "maxDataPoints": 100, + "options": { + "legend": false, + "showValue": "never" + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"data\"}[1m]) == 1) * 2", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / data", + "refId": "Node role: data" + }, + { + "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"master\"}[1m]) == 1\n) * 3\n", - "legendFormat": "{{node}}" + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"master\"}[1m]) == 1) * 3", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / master", + "refId": "Node role: master" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"ingest\"}[1m]) == 1\n) * 4\n", - "legendFormat": "{{node}}" + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"ingest\"}[1m]) == 1) * 4", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / ingest", + "refId": "Node role: ingest" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"cluster_manager\"}[1m]) == 1\n) * 5\n", - "legendFormat": "{{node}}" + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"cluster_manager\"}[1m]) == 1) * 5", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / cluster_manager", + "refId": "Node role: cluster_manager" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"remote_cluster_client\"}[1m]) == 1\n) * 6\n", - "legendFormat": "{{node}}" + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"remote_cluster_client\"}[1m]) == 1) * 6", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / remote_client", + "refId": "Node role: remote_cluster_client" } ], "title": "Roles timeline", "type": "status-history" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 24 + }, + "id": 10, + "panels": [ ], + "title": "Resource Usage", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top nodes by OS CPU usage across the OpenSearch cluster.", "fieldConfig": { @@ -613,11 +652,9 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "max": 100, "min": 0, "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -630,40 +667,35 @@ ] }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 9, + "h": 8, "w": 8, "x": 0, - "y": 4 + "y": 25 }, - "id": 8, + "id": 11, "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc(sum by(node, job,opensearch_cluster) (opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})))", + "expr": "topk(10, sort_desc(\n topk(10, sort_desc(sum by(job,opensearch_cluster,node) (opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})))\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Top nodes by CPU usage" } ], "title": "Top nodes by CPU usage", @@ -671,7 +703,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The total count of circuit breakers tripped across the OpenSearch cluster.", "fieldConfig": { @@ -679,9 +712,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -694,41 +725,36 @@ ] }, "unit": "trips" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 9, + "h": 8, "w": 8, "x": 8, - "y": 4 + "y": 25 }, - "id": 9, + "id": 12, "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,opensearch_cluster, node) (increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]))", + "expr": "sum by (job,opensearch_cluster) (\n increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Breakers tripped" } ], "title": "Breakers tripped", @@ -736,17 +762,16 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "Shard status counts across the Opensearch cluster.", + "description": "Shard status counts across the OpenSearch cluster.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -759,48 +784,57 @@ ] }, "unit": "shards" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 9, + "h": 8, "w": 8, "x": 16, - "y": 4 + "y": 25 }, - "id": 10, + "id": 13, "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by(type, job,opensearch_cluster) (opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "min by (job,opensearch_cluster,type) (\n opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{type}}" + "legendFormat": "{{type}}", + "refId": "Shard status" } ], "title": "Shard status", "type": "bargauge" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 33 + }, + "id": 14, + "panels": [ ], + "title": "Storage and Tasks", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top nodes by disk usage across the OpenSearch cluster.", "fieldConfig": { @@ -808,11 +842,9 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "max": 100, "min": 0, "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -825,40 +857,35 @@ ] }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 10, + "h": 8, "w": 8, "x": 0, - "y": 13 + "y": 34 }, - "id": 11, + "id": 15, "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc((100 * (sum by(node, job,opensearch_cluster) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})- sum by(node, job,opensearch_cluster) (opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})) / sum by(node, job,opensearch_cluster) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))))", + "expr": "topk(10, sort_desc((100 * (sum by(job,opensearch_cluster,node) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})- sum by(job,opensearch_cluster,node) (opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})) / sum by(job,opensearch_cluster,node) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Top nodes by disk usage" } ], "title": "Top nodes by disk usage", @@ -866,86 +893,35 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The total count of documents indexed across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "documents" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 8, + "h": 8, + "w": 16, "x": 8, - "y": 13 - }, - "id": 12, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 34 }, + "id": 16, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster) (opensearch_indices_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "avg by (job,opensearch_cluster) (\n opensearch_indices_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Total documents" } ], "title": "Total documents", @@ -953,86 +929,35 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of tasks waiting to be executed across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "tasks" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, + "h": 8, "w": 8, - "x": 16, - "y": 13 - }, - "id": 13, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "x": 0, + "y": 42 }, + "id": 17, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster) (opensearch_cluster_pending_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "avg by (job,opensearch_cluster) (\n opensearch_cluster_pending_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Pending tasks" } ], "title": "Pending tasks", @@ -1040,86 +965,35 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The total size of the store across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "bytes" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, + "h": 8, "w": 8, "x": 8, - "y": 18 - }, - "id": 14, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 42 }, + "id": 18, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster) (opensearch_indices_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "avg by (job,opensearch_cluster) (\n opensearch_indices_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Store size" } ], "title": "Store size", @@ -1127,86 +1001,34 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The max wait time for tasks to be executed across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, + "h": 8, "w": 8, "x": 16, - "y": 18 - }, - "id": 15, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 42 }, + "id": 19, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(job,opensearch_cluster) (opensearch_cluster_task_max_waiting_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "max by (job,opensearch_cluster) (\n opensearch_cluster_task_max_waiting_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "instant": false, + "legendFormat": "{{opensearch_cluster}}", + "refId": "Max task wait time" } ], "title": "Max task wait time", @@ -1214,106 +1036,47 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 23 + "y": 50 }, - "id": 16, - "targets": [ ], - "title": "Cluster search and index summary", + "id": 20, + "panels": [ ], + "title": "Search Performance", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 24 - }, - "id": 17, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 51 }, + "id": 21, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "topk(10, sort_desc(avg by(index, job,opensearch_cluster) (\n opensearch_index_search_fetch_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_search_query_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_search_scroll_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}\n)))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{index}}" + "instant": false, + "legendFormat": "{{index}}", + "refId": "Top indices by request rate" } ], "title": "Top indices by request rate", @@ -1321,87 +1084,35 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 24 - }, - "id": 18, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 51 }, + "id": 22, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "topk(10, sort_desc(sum by(index, job,opensearch_cluster) ((increase(opensearch_index_search_fetch_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_query_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_scroll_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]))\n/ clamp_min(increase(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_query_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_scroll_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]), 1))))\n", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Top indices by request latency" } ], "title": "Top indices by request latency", @@ -1409,181 +1120,84 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 24 - }, - "id": 19, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 51 }, + "id": 23, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc(avg by(index, job,opensearch_cluster) (\n 100 * (opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}) / \n clamp_min((opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_requestcache_miss_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_miss_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}), 1\n ))))\n", + "expr": "topk(10, sort_desc(avg by(index, job,opensearch_cluster) (\n 100 * (opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}) / \n clamp_min((opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_requestcache_miss_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_miss_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}), 1\n ))))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Top indices by combined cache hit ratio" } ], "title": "Top indices by combined cache hit ratio", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 59 + }, + "id": 24, + "panels": [ ], + "title": "Ingest Performance", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top nodes by rate of ingest across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "Bps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 32 - }, - "id": 20, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 60 }, + "id": 25, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sum by(node, job,opensearch_cluster) (rate(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__rate_interval])))", + "expr": "topk(10, sum by(job,opensearch_cluster,node) (rate(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__rate_interval])))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Top nodes by ingest rate" } ], "title": "Top nodes by ingest rate", @@ -1591,91 +1205,36 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top nodes by ingestion latency across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 32 - }, - "id": 21, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 60 }, + "id": 26, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sum by(job,opensearch_cluster, node) (\n increase(opensearch_ingest_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]) / \n clamp_min(increase(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]), 1)))\n", + "expr": "topk(10, sum by(job,opensearch_cluster,node) (increase(opensearch_ingest_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]) / clamp_min(increase(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]), 1)))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Top nodes by ingest latency" } ], "title": "Top nodes by ingest latency", @@ -1683,174 +1242,85 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top nodes by ingestion failures across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "errors" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 32 - }, - "id": 22, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 60 }, + "id": 27, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sum by(job,opensearch_cluster, node) (increase(opensearch_ingest_total_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", + "expr": "topk(10, sum by(job,opensearch_cluster,node) (increase(opensearch_ingest_total_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Top nodes by ingest errors" } ], "title": "Top nodes by ingest errors", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 68 + }, + "id": 28, + "panels": [ ], + "title": "Indexing Performance", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by rate of document indexing across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "documents/s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 40 - }, - "id": 23, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 69 }, + "id": 29, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, avg by(job,opensearch_cluster, index) (opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))", + "expr": "topk(10, avg by(index, job,opensearch_cluster) (opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Top indices by index rate" } ], "title": "Top indices by index rate", @@ -1858,87 +1328,36 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by indexing latency across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 40 - }, - "id": 24, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 69 }, + "id": 30, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, avg by(job,opensearch_cluster, index) \n(increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]) / \nclamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]), 1)))\n", + "expr": "topk(10, avg by(index, job,opensearch_cluster) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]), 1)))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Top indices by index latency" } ], "title": "Top indices by index latency", @@ -1946,97 +1365,46 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by index document failures across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "failures" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 40 - }, - "id": 25, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 69 }, + "id": 31, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, avg by(job,opensearch_cluster, index) (increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", + "expr": "topk(10, avg by(index, job,opensearch_cluster) (increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Top indices by index failures" } ], "title": "Top indices by index failures", "type": "timeseries" } ], - "refresh": "1m", - "schemaVersion": 36, + "refresh": "30s", + "schemaVersion": 39, "tags": [ - "opensearch-mixin" + "opensearch" ], "templating": { "list": [ @@ -2044,7 +1412,7 @@ "label": "Prometheus data source", "name": "prometheus_datasource", "query": "prometheus", - "regex": "", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", "type": "datasource" }, { @@ -2076,14 +1444,37 @@ "refresh": 2, "sort": 1, "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Node", + "multi": true, + "name": "node", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" } ] }, "time": { - "from": "now-1h", + "from": "now-30m", "to": "now" }, "timezone": "default", - "title": "OpenSearch cluster overview", + "title": "OpenSearch Cluster Overview", "uid": "opensearch-cluster-overview" } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/opensearch-logs.json b/opensearch-mixin/dashboards_out/opensearch-logs.json new file mode 100644 index 000000000..6ea00e8ea --- /dev/null +++ b/opensearch-mixin/dashboards_out/opensearch-logs.json @@ -0,0 +1,297 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": false, + "id": null, + "links": [ + { + "keepTime": true, + "title": "Opensearch Cluster Overview", + "type": "link", + "url": "/d/opensearch-cluster-overview" + }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "opensearch" + ], + "title": "All dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "Logs volume grouped by \"level\" label.", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "bars", + "fillOpacity": 50, + "stacking": { + "mode": "normal" + } + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)(rr.*|RR.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(W|w)(arn.*|ARN.*|rn|RN)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(T|t)(race|RACE)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "logs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "text", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "maxDataPoints": 100, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "sum by (level) (count_over_time({opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",level=~\"$level\",severity=~\"$severity\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", + "legendFormat": "{{ level }}" + } + ], + "title": "Logs volume", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "Value", + "renamePattern": "logs" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 18, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 2, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": true, + "showTime": false, + "wrapLogMessage": false + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",level=~\"$level\",severity=~\"$severity\"} \n|~ \"$regex_search\"\n\n\n" + } + ], + "title": "Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "opensearch" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Opensearch_cluster", + "multi": true, + "name": "opensearch_cluster", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Node", + "multi": true, + "name": "node", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "default", + "title": "OpenSearch Logs", + "uid": "opensearch-logs" + } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/opensearch-node-overview.json b/opensearch-mixin/dashboards_out/opensearch-node-overview.json new file mode 100644 index 000000000..845b8673d --- /dev/null +++ b/opensearch-mixin/dashboards_out/opensearch-node-overview.json @@ -0,0 +1,1288 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": false, + "id": null, + "links": [ + { + "keepTime": true, + "title": "Opensearch Logs", + "type": "link", + "url": "/d/opensearch-logs" + }, + { + "keepTime": true, + "title": "Opensearch Cluster Overview", + "type": "link", + "url": "/d/opensearch-cluster-overview" + }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "opensearch" + ], + "title": "All dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Node Roles", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "OpenSearch node roles over time.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "2": { + "color": "light-purple", + "index": 0, + "text": "data" + }, + "3": { + "color": "light-green", + "index": 1, + "text": "master" + }, + "4": { + "color": "light-blue", + "index": 2, + "text": "ingest" + }, + "5": { + "color": "light-yellow", + "index": 3, + "text": "cluster_manager" + }, + "6": { + "color": "super-light-red", + "index": 4, + "text": "remote_cluster_client" + } + }, + "type": "value" + } + ] + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 2, + "maxDataPoints": 100, + "options": { + "legend": false, + "showValue": "never" + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"data\"}[1m]) == 1) * 2", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / data", + "refId": "Node role: data" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"master\"}[1m]) == 1) * 3", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / master", + "refId": "Node role: master" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"ingest\"}[1m]) == 1) * 4", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / ingest", + "refId": "Node role: ingest" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"cluster_manager\"}[1m]) == 1) * 5", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / cluster_manager", + "refId": "Node role: cluster_manager" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"remote_cluster_client\"}[1m]) == 1) * 6", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / remote_client", + "refId": "Node role: remote_cluster_client" + } + ], + "title": "Roles timeline", + "type": "status-history" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "OpenSearch node roles.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": { + "color": "super-light-orange", + "index": 5, + "text": "False" + }, + "1": { + "color": "light-green", + "index": 3, + "text": "True" + }, + "Data": { + "color": "light-purple", + "index": 0, + "text": "data" + }, + "Ingest": { + "color": "light-blue", + "index": 2, + "text": "ingest" + }, + "Master": { + "color": "light-green", + "index": 1, + "text": "master" + }, + "Remote cluster client": { + "color": "light-orange", + "index": 4, + "text": "remote_cluster_client" + } + }, + "type": "value" + } + ] + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Data|Master|Ingest|Remote.+|Cluster.+/" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 3, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (job,opensearch_cluster, nodeid, role, primary_ip) (last_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[1d]))", + "format": "time_series", + "instant": true, + "legendFormat": "{{node}}: Node role bool last seen", + "refId": "Node role bool last seen" + } + ], + "title": "Roles", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "mode": "columns", + "valueLabel": "role" + } + }, + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "cluster_manager": 108, + "data": 105, + "ingest": 106, + "job": 3, + "master": 104, + "node": 3, + "nodeid": 3, + "opensearch_cluster": 3, + "remote_cluster_client": 107 + }, + "renameByName": { + "Time": "", + "cluster": "Cluster", + "cluster_manager": "Cluster manager", + "data": "Data", + "ingest": "Ingest", + "master": "Master", + "node": "Node", + "nodeid": "Nodeid", + "remote_cluster_client": "Remote cluster client" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 17 + }, + "id": 4, + "panels": [ ], + "title": "Node health", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "CPU usage percentage of the node's Operating System.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 18 + }, + "id": 5, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}", + "refId": "CPU %%" + } + ], + "title": "Node CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Memory usage percentage of the node for the Operating System and OpenSearch", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 18 + }, + "id": 6, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "opensearch_os_mem_used_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}", + "refId": "Memory used %%" + } + ], + "title": "Node memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Node file system read and write data.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 1, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "stacking": { + "mode": "normal" + } + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/time|used|busy|util/" + }, + "properties": [ + { + "id": "custom.axisSoftMax", + "value": 100 + }, + { + "id": "custom.drawStyle", + "value": "points" + }, + { + "id": "unit", + "value": "percent" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 18 + }, + "id": 7, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_fs_io_total_read_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}} - read", + "refId": "FS read bytes/s" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_fs_io_total_write_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}} - write", + "refId": "FS write bytes/s" + } + ], + "title": "Node I/O", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Number of open connections for the selected node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 18 + }, + "id": 8, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (opensearch_transport_server_open_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}", + "refId": "Transport server open" + } + ], + "title": "Node open connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Disk usage percentage of the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 1, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 26 + }, + "id": 9, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "100 - (100 * opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min(opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}, 1))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}", + "refId": "FS used %%" + } + ], + "title": "Node disk usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Percentage of swap space used by OpenSearch and the Operating System on the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 26 + }, + "id": 10, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "100 * opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min((opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} + opensearch_os_swap_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}), 1)", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}", + "refId": "Swap used %%" + } + ], + "title": "Node memory swap", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Network traffic on the node's Operating System.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 26 + }, + "id": 11, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_rx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}} - received", + "refId": "Transport RX bitrate" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_tx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}} - sent", + "refId": "Transport TX bitrate" + } + ], + "title": "Node network traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Circuit breakers tripped on the selected node by type", + "fieldConfig": { + "defaults": { + "unit": "trips" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 26 + }, + "id": 12, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (name, job,opensearch_cluster,node) (increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{node}} - {{ name }}", + "refId": "Circuit breaker trips by name" + } + ], + "title": "Circuit breakers", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 34 + }, + "id": 13, + "panels": [ ], + "title": "Node JVM", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM heap memory usage vs committed.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 35 + }, + "id": 14, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_heap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM heap used", + "refId": "JVM heap used" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_heap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM heap committed", + "refId": "JVM heap committed" + } + ], + "title": "JVM heap used vs committed", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM non-heap memory usage vs committed.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 35 + }, + "id": 15, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_nonheap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM non-heap used", + "refId": "JVM non-heap used" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_nonheap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM non-heap committed", + "refId": "JVM non-heap committed" + } + ], + "title": "JVM non-heap used vs committed", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM thread count.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "threads" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 35 + }, + "id": 16, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM threads", + "refId": "JVM threads" + } + ], + "title": "JVM threads", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM buffer pool usage.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 35 + }, + "id": 17, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster, bufferpool) (opensearch_jvm_bufferpool_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{ bufferpool }}", + "refId": "JVM buffer pools" + } + ], + "title": "JVM buffer pools", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM uptime in seconds.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "s" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 43 + }, + "id": 18, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_uptime_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM uptime", + "refId": "JVM uptime" + } + ], + "title": "JVM uptime", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM garbage collection count.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "collections" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 43 + }, + "id": 19, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (increase(opensearch_jvm_gc_collection_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM GC collections", + "refId": "JVM GC collections" + } + ], + "title": "JVM garbage collections", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM garbage collection time in milliseconds.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2 + }, + "unit": "ms" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 43 + }, + "id": 20, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (increase(opensearch_jvm_gc_collection_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM GC time", + "refId": "JVM GC time" + } + ], + "title": "JVM garbage collection time", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM buffer pool usage by pool.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 43 + }, + "id": 21, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "100 * (sum by (job,opensearch_cluster, bufferpool) (opensearch_jvm_bufferpool_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})),1)", + "format": "time_series", + "instant": false, + "legendFormat": "{{ bufferpool }}", + "refId": "JVM bufferpool used %%" + } + ], + "title": "JVM buffer pool usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 51 + }, + "id": 22, + "panels": [ ], + "title": "Thread pools", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Thread pool thread count.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "threads" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 23, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) ((opensearch_threadpool_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: Threadpool threads", + "refId": "Threadpool threads" + } + ], + "title": "Thread pool threads", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Thread pool task count.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "tasks" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 24, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_threadpool_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: Threadpool tasks", + "refId": "Threadpool tasks" + } + ], + "title": "Thread pool tasks", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "opensearch" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Opensearch_cluster", + "multi": true, + "name": "opensearch_cluster", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Node", + "multi": true, + "name": "node", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "default", + "title": "OpenSearch Node Overview", + "uid": "opensearch-node-overview" + } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/search-and-index-overview.json b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json similarity index 52% rename from opensearch-mixin/dashboards_out/search-and-index-overview.json rename to opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json index e52086683..4ead34a3c 100644 --- a/opensearch-mixin/dashboards_out/search-and-index-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json @@ -1,87 +1,55 @@ { + "annotations": { + "list": [ ] + }, + "editable": false, + "id": null, "links": [ { - "asDropdown": false, + "keepTime": true, + "title": "Opensearch Logs", + "type": "link", + "url": "/d/opensearch-logs" + }, + { + "keepTime": true, + "title": "Opensearch Cluster Overview", + "type": "link", + "url": "/d/opensearch-cluster-overview" + }, + { + "asDropdown": true, "includeVars": true, "keepTime": true, "tags": [ - "opensearch-mixin" + "opensearch" ], - "title": "Other Opensearch dashboards", + "title": "All dashboards", "type": "dashboards" } ], "panels": [ { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, "y": 0 }, "id": 1, - "targets": [ ], - "title": "Request performance", + "panels": [ ], + "title": "Search Performance", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Rate of fetch, scroll, and query requests by selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "reqps" }, "overrides": [ @@ -115,44 +83,47 @@ }, "id": 2, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_search_query_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_search_query_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - query" + "legendFormat": "{{index}} - query", + "refId": "Search queries in-flight" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_search_fetch_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_search_fetch_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - fetch" + "legendFormat": "{{index}} - fetch", + "refId": "Search fetch in-flight" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_search_scroll_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_search_scroll_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - scroll" + "legendFormat": "{{index}} - scroll", + "refId": "Search scroll in-flight" } ], "title": "Request rate", @@ -160,58 +131,12 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Latency of fetch, scroll, and query requests by selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "s" }, "overrides": [ @@ -245,47 +170,50 @@ }, "id": 3, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_query_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_query_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - query" + "legendFormat": "{{index}} - query", + "refId": "Search query latency (avg)" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_fetch_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - fetch" + "legendFormat": "{{index}} - fetch", + "refId": "Search fetch latency (avg)" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_scroll_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_scroll_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - scroll" + "legendFormat": "{{index}} - scroll", + "refId": "Search scroll latency (avg)" } ], "title": "Request latency", @@ -293,58 +221,12 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Ratio of query cache and request cache hits and misses.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "percent" }, "overrides": [ @@ -378,35 +260,35 @@ }, "id": 4, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (100 * (opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) / clamp_min(opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"} + opensearch_index_requestcache_miss_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}, 1))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - request" + "legendFormat": "{{index}} - request", + "refId": "Request cache hit rate %%" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (100 * (opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) / clamp_min(opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"} + opensearch_index_querycache_miss_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}, 1))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - query" + "legendFormat": "{{index}} - query", + "refId": "Query cache hit rate %%" } ], "title": "Cache hit ratio", @@ -414,47 +296,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Total evictions count by cache type for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -499,47 +347,50 @@ }, "id": 5, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_querycache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_querycache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - query cache" + "legendFormat": "{{index}} - query cache", + "refId": "Query cache evictions" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_requestcache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_requestcache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - request cache" + "legendFormat": "{{index}} - request cache", + "refId": "Request cache evictions" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_fielddata_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_fielddata_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - field data" + "legendFormat": "{{index}} - field data", + "refId": "Fielddata evictions" } ], "title": "Evictions", @@ -547,74 +398,25 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, "y": 9 }, "id": 6, - "targets": [ ], - "title": "Index performance", + "panels": [ ], + "title": "Indexing Performance", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Rate of indexed documents for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "documents/s" }, "overrides": [ @@ -647,27 +449,19 @@ "y": 10 }, "id": 7, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\",context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Indexing current" } ], "title": "Index rate", @@ -675,47 +469,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Document indexing latency for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -728,29 +488,7 @@ ] }, "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -759,28 +497,19 @@ "y": 10 }, "id": 8, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}[$__interval:]),1))", + "expr": "avg by(job,opensearch_cluster) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=~\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=~\"total\"}[$__interval:]),1))", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{node}}: Indexing latency (avg)", + "refId": "Indexing latency (avg)" } ], "title": "Index latency", @@ -788,47 +517,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Number of indexing failures for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -872,28 +567,20 @@ "y": 10 }, "id": 9, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\",context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Indexing failed (avg)" } ], "title": "Index failures", @@ -901,47 +588,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Index flush latency for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -985,27 +638,19 @@ "y": 10 }, "id": 10, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_flush_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]),1))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Flush latency (avg)" } ], "title": "Flush latency", @@ -1013,47 +658,16 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Index merge time for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "drawStyle": "points" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1098,44 +712,47 @@ }, "id": 11, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - total" + "legendFormat": "{{index}} - total", + "refId": "Merge time increase" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_stopped_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - stopped" + "legendFormat": "{{index}} - stopped", + "refId": "Merge stopped time increase" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_throttled_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - throttled" + "legendFormat": "{{index}} - throttled", + "refId": "Merge throttled time increase" } ], "title": "Merge time", @@ -1143,47 +760,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Index refresh latency for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1227,27 +810,19 @@ "y": 18 }, "id": 12, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_refresh_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]),1))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Refresh latency (avg)" } ], "title": "Refresh latency", @@ -1255,47 +830,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Current number of translog operations for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1339,27 +880,19 @@ "y": 18 }, "id": 13, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_translog_operations_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_translog_operations_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Translog operations" } ], "title": "Translog operations", @@ -1367,47 +900,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Rate of documents deleted for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1451,27 +950,19 @@ "y": 18 }, "id": 14, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_indexing_delete_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_indexing_delete_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Indexing delete current" } ], "title": "Docs deleted", @@ -1479,63 +970,26 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, "y": 26 }, "id": 15, - "targets": [ ], - "title": "Index capacity", + "panels": [ ], + "title": "Index Capacity", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Number of indexed documents for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1579,27 +1033,19 @@ "y": 27 }, "id": 16, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Indexing count (avg)" } ], "title": "Documents indexed", @@ -1607,47 +1053,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Current number of segments for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1691,27 +1103,19 @@ "y": 27 }, "id": 17, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_segments_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_segments_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Segments number" } ], "title": "Segment count", @@ -1719,47 +1123,16 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Number of merge operations for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "drawStyle": "points" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1803,27 +1176,19 @@ "y": 27 }, "id": 18, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_docs_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", + "expr": "avg by(job,opensearch_cluster) (increase(opensearch_index_merges_total_docs_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=\"total\"}[$__interval:])) > 0", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{node}}: Merge docs increase", + "refId": "Merge docs increase" } ], "title": "Merge count", @@ -1831,47 +1196,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Size of query cache and request cache.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1916,35 +1247,35 @@ }, "id": 19, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_querycache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster) (\n opensearch_index_querycache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - query" + "legendFormat": "{{opensearch_cluster}}: Query cache memory bytes", + "refId": "Query cache memory bytes" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_requestcache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster) (\n opensearch_index_requestcache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - request" + "legendFormat": "{{opensearch_cluster}}: Request cache memory bytes", + "refId": "Request cache memory bytes" } ], "title": "Cache size", @@ -1952,47 +1283,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Size of the store in bytes for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -2036,27 +1333,19 @@ "y": 35 }, "id": 20, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Store size bytes" } ], "title": "Store size", @@ -2064,47 +1353,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Memory used by segments for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -2148,27 +1403,19 @@ "y": 35 }, "id": 21, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_segments_memory_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_segments_memory_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Segments memory bytes" } ], "title": "Segment size", @@ -2176,47 +1423,16 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Size of merge operations in bytes for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "drawStyle": "points" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -2260,27 +1476,19 @@ "y": 35 }, "id": 22, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_merges_current_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) > 0", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Merge current size bytes" } ], "title": "Merge size", @@ -2288,47 +1496,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of index shards for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -2372,37 +1546,29 @@ "y": 35 }, "id": 23, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (index) (avg by(job,opensearch_cluster,index) (opensearch_index_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", type=~\"active|active_primary\"}))", + "expr": "sum by (index) (avg by(job,opensearch_cluster) (opensearch_index_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", type=~\"active|active_primary\"}))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{ index }}", + "refId": "Active shards per index" } ], "title": "Shard count", "type": "timeseries" } ], - "refresh": "1m", - "schemaVersion": 36, + "refresh": "30s", + "schemaVersion": 39, "tags": [ - "opensearch-mixin" + "opensearch" ], "templating": { "list": [ @@ -2410,7 +1576,7 @@ "label": "Prometheus data source", "name": "prometheus_datasource", "query": "prometheus", - "regex": "", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", "type": "datasource" }, { @@ -2423,7 +1589,7 @@ "label": "Job", "multi": true, "name": "job", - "query": "label_values(opensearch_index_search_fetch_count{opensearch_cluster!=\"\"}, job)", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", "refresh": 2, "sort": 1, "type": "query" @@ -2438,7 +1604,7 @@ "label": "Opensearch_cluster", "multi": true, "name": "opensearch_cluster", - "query": "label_values(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -2450,21 +1616,29 @@ "uid": "${prometheus_datasource}" }, "includeAll": true, - "label": "Index", + "label": "Node", "multi": true, - "name": "index", - "query": "label_values(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, index)", + "name": "node", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", "refresh": 2, "sort": 1, "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" } ] }, "time": { - "from": "now-1h", + "from": "now-30m", "to": "now" }, "timezone": "default", - "title": "OpenSearch search and index overview", + "title": "OpenSearch Search and Index Overview", "uid": "opensearch-search-and-index-overview" } \ No newline at end of file diff --git a/opensearch-mixin/g.libsonnet b/opensearch-mixin/g.libsonnet index 6da9f4eef..e6a2060ee 100644 --- a/opensearch-mixin/g.libsonnet +++ b/opensearch-mixin/g.libsonnet @@ -1 +1 @@ -import 'github.com/grafana/grafonnet/gen/grafonnet-v10.0.0/main.libsonnet' +import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet' diff --git a/opensearch-mixin/jsonnetfile.json b/opensearch-mixin/jsonnetfile.json index 53d0be67e..7205eeac9 100644 --- a/opensearch-mixin/jsonnetfile.json +++ b/opensearch-mixin/jsonnetfile.json @@ -1,33 +1,51 @@ { "version": 1, "dependencies": [ - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" - } + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet-lib.git", + "subdir": "grafonnet" + } + }, + "version": "master" }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/jsonnet-libs.git", - "subdir": "common-lib" - } + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v11.4.0" + } + }, + "version": "main" }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet.git", - "subdir": "gen/grafonnet-v10.0.0" - } + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" + } + }, + "version": "master" }, - "version": "main" - } + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "grafana-cloud-integration-utils" + } + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "logs-lib" + } + }, + "version": "master" + } ], "legacyImports": true } diff --git a/opensearch-mixin/links.libsonnet b/opensearch-mixin/links.libsonnet new file mode 100644 index 000000000..867b91809 --- /dev/null +++ b/opensearch-mixin/links.libsonnet @@ -0,0 +1,33 @@ +local g = import './g.libsonnet'; + +{ + local link = g.dashboard.link, + new(this): + { + opensearchClusterOverview: + link.link.new('Opensearch Cluster Overview', '/d/' + this.grafana.dashboards['opensearch-cluster-overview.json'].uid) + + link.link.options.withKeepTime(true), + + // opensearchNodeOverview: + // link.link.new('Opensearch Node Overview', '/d/' + this.grafana.dashboards['opensearch-node-overview.json'].uid) + // + link.link.options.withKeepTime(true), + + // opensearchSearchAndIndexOverview: + // link.link.new('Opensearch Search and Index Overview', '/d/' + this.grafana.dashboards['opensearch-search-and-index-overview.json'].uid) + // + link.link.options.withKeepTime(true), + + otherDashboards: + link.dashboards.new('All dashboards', this.config.dashboardTags) + + link.dashboards.options.withIncludeVars(true) + + link.dashboards.options.withKeepTime(true) + + link.dashboards.options.withAsDropdown(true), + } + + + if this.config.enableLokiLogs then + { + logs: + link.link.new('Opensearch Logs', '/d/' + this.grafana.dashboards['opensearch-logs.json'].uid) + + link.link.options.withKeepTime(true), + } + else {}, +} diff --git a/opensearch-mixin/main.libsonnet b/opensearch-mixin/main.libsonnet new file mode 100644 index 000000000..598ae832b --- /dev/null +++ b/opensearch-mixin/main.libsonnet @@ -0,0 +1,48 @@ +local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; +local dashboards = import './dashboards.libsonnet'; +local g = import './g.libsonnet'; +local links = import './links.libsonnet'; +local panels = import './panels.libsonnet'; +local rows = import './rows.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + withConfigMixin(config): { + config+: config, + }, + + new(): { + local this = self, + config: config, + signals: + { + [sig]: commonlib.signals.unmarshallJsonMulti( + this.config.signals[sig], + type=this.config.metricsSource + ) + for sig in std.objectFields(this.config.signals) + }, + + grafana: { + variables: commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='opensearch_cluster_status', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ), + annotations: {}, + links: links.new(this), + panels: panels.new(this), + dashboards: dashboards.new(this), + rows: rows.new(this), + }, + + prometheus: { + alerts: alerts.new(this), + recordingRules: {}, + }, + }, +} diff --git a/opensearch-mixin/mixin.libsonnet b/opensearch-mixin/mixin.libsonnet index 4d987cf31..d28e80b23 100644 --- a/opensearch-mixin/mixin.libsonnet +++ b/opensearch-mixin/mixin.libsonnet @@ -1,3 +1,31 @@ -(import 'dashboards/dashboards.libsonnet') + -(import 'alerts/alerts.libsonnet') + -(import 'config.libsonnet') +local mixinlib = import './main.libsonnet'; +local config = (import './config.libsonnet'); +local util = import 'grafana-cloud-integration-utils/util.libsonnet'; + + +local mixin = mixinlib.new() + + mixinlib.withConfigMixin( + { + filteringSelecter: config.filteringSelector, + uid: config.uid, + enableLokiLogs: true, + } + ); + +local label_patch = { + cluster+: { + allValue: '.*', + }, +}; + +{ + grafanaDashboards+:: { + [fname]: + local dashboard = util.decorate_dashboard(mixin.grafana.dashboards[fname], tags=config.dashboardTags); + dashboard + util.patch_variables(dashboard, label_patch) + + for fname in std.objectFields(mixin.grafana.dashboards) + }, + prometheusAlerts+:: mixin.prometheus.alerts, + prometheusRules+:: mixin.prometheus.recordingRules, +} diff --git a/opensearch-mixin/panels.libsonnet b/opensearch-mixin/panels.libsonnet index 42bc830bc..1b2e9e165 100644 --- a/opensearch-mixin/panels.libsonnet +++ b/opensearch-mixin/panels.libsonnet @@ -1,228 +1,1039 @@ -// variables.libsonnet local g = import './g.libsonnet'; local var = g.dashboard.variable; local commonlib = import 'common-lib/common/main.libsonnet'; local utils = commonlib.utils; { - new( - groupLabels, - instanceLabels, - variables, - ): { - - local promDatasource = { - uid: '${%s}' % variables.datasources.prometheus.name, - }, - osRolesTimeline: - g.panel.statusHistory.new('Roles timeline') - + g.panel.statusHistory.panelOptions.withDescription('OpenSearch node roles over time.') - + g.panel.statusHistory.options.withShowValue('never') - + g.panel.statusHistory.options.withLegend(false) - + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) - + g.panel.statusHistory.queryOptions.withTargets( - [ - g.query.prometheus.new( - promDatasource.uid, - ||| - max by (node,role) ( - max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="data"}[1m]) == 1 - ) * 2 - ||| - % { - queriesSelector: variables.queriesSelector, - }, - ) - + g.query.prometheus.withLegendFormat('{{node}}'), - g.query.prometheus.new( - promDatasource.uid, - ||| - max by (node,role) ( - max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="master"}[1m]) == 1 - ) * 3 - ||| - % { queriesSelector: variables.queriesSelector }, - ) - + g.query.prometheus.withLegendFormat('{{node}}'), - g.query.prometheus.new( - promDatasource.uid, - ||| - max by (node,role) ( - max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="ingest"}[1m]) == 1 - ) * 4 - ||| - % { queriesSelector: variables.queriesSelector }, - ) - + g.query.prometheus.withLegendFormat('{{node}}'), - g.query.prometheus.new( - promDatasource.uid, - ||| - max by (node,role) ( - max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="cluster_manager"}[1m]) == 1 - ) * 5 - ||| - % { queriesSelector: variables.queriesSelector }, - ) - + g.query.prometheus.withLegendFormat('{{node}}'), - g.query.prometheus.new( - promDatasource.uid, - ||| - max by (node,role) ( - max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="remote_cluster_client"}[1m]) == 1 - ) * 6 - ||| - % { queriesSelector: variables.queriesSelector }, - ) - + g.query.prometheus.withLegendFormat('{{node}}'), - ] - ) - + g.panel.statusHistory.standardOptions.withMappings([ - { - type: 'value', - options: { - '2': { - color: 'light-purple', - index: 0, - text: 'data', - }, - '3': { - color: 'light-green', - index: 1, - text: 'master', - }, - '4': { - color: 'light-blue', - index: 2, - text: 'ingest', - }, - '5': { - text: 'cluster_manager', - color: 'light-yellow', - index: 3, + new(this):: + { + local signals = this.signals, + + osRoles: + g.panel.table.new('Roles') + + g.panel.table.panelOptions.withDescription('OpenSearch node roles.') + + g.panel.table.queryOptions.withTargets([ + signals.roles.node_role_last_seen.asTarget() + + g.query.prometheus.withInstant(true), + ]) + + g.panel.table.queryOptions.withTransformations([ + {id: 'labelsToFields', options: {mode: 'columns', valueLabel: 'role'}}, + {id: 'merge', options: {}}, + { + id: 'organize', + options: { + excludeByName: {Time: true}, + indexByName: { + Time: 0, node: 3, nodeid: 3, master: 104, data: 105, + ingest: 106, remote_cluster_client: 107, cluster_manager: 108, + } + {[k]: 3 for k in this.config.groupLabels + this.config.instanceLabels}, + renameByName: { + Time: '', cluster: 'Cluster', cluster_manager: 'Cluster manager', + data: 'Data', ingest: 'Ingest', master: 'Master', + node: 'Node', nodeid: 'Nodeid', remote_cluster_client: 'Remote cluster client', + }, }, - '6': { - text: 'remote_cluster_client', - color: 'super-light-red', - index: 4, + }, + ]) + + g.panel.table.standardOptions.withMappings([ + g.panel.table.standardOptions.mapping.ValueMap.withType() + + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ + '0': {color: 'super-light-orange', index: 5, text: 'False'}, + '1': {color: 'light-green', index: 3, text: 'True'}, + Data: {color: 'light-purple', index: 0, text: 'data'}, + Ingest: {color: 'light-blue', index: 2, text: 'ingest'}, + Master: {color: 'light-green', index: 1, text: 'master'}, + 'Remote cluster client': {color: 'light-orange', index: 4, text: 'remote_cluster_client'}, + }), + ]) + + g.panel.table.standardOptions.withOverrides([ + g.panel.table.fieldOverride.byRegexp.new('/Data|Master|Ingest|Remote.+|Cluster.+/') + + g.panel.table.fieldOverride.byRegexp.withProperty('custom.cellOptions', {type: 'color-text'}), + ]), + + osRolesTimeline: + g.panel.statusHistory.new('Roles timeline') + + g.panel.statusHistory.panelOptions.withDescription('OpenSearch node roles over time.') + + g.panel.statusHistory.options.withShowValue('never') + + g.panel.statusHistory.options.withLegend(false) + + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) + + g.panel.statusHistory.queryOptions.withTargets([ + signals.roles.node_role_data.asTarget(), + signals.roles.node_role_master.asTarget(), + signals.roles.node_role_ingest.asTarget(), + signals.roles.node_role_cluster_manager.asTarget(), + signals.roles.node_role_remote_cluster_client.asTarget(), + ]) + + g.panel.statusHistory.standardOptions.withMappings([ + { + type: 'value', + options: { + '2': {color: 'light-purple', index: 0, text: 'data'}, + '3': {color: 'light-green', index: 1, text: 'master'}, + '4': {color: 'light-blue', index: 2, text: 'ingest'}, + '5': {color: 'light-yellow', index: 3, text: 'cluster_manager'}, + '6': {color: 'super-light-red', index: 4, text: 'remote_cluster_client'}, }, }, - }, - ]), - - osRoles: - g.panel.table.new('Roles') - + g.panel.table.panelOptions.withDescription('OpenSearch node roles.') - + g.panel.table.queryOptions.withTargets([ - g.query.prometheus.new( - promDatasource.uid, - 'max by (%(agg)s) (last_over_time(opensearch_node_role_bool{%(queriesSelector)s}[1d]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', groupLabels + instanceLabels + ['node', 'nodeid', 'role', 'primary_ip']), + ]), + + // Cluster Overview Panels + clusterStatusPanel: + g.panel.stat.new('Cluster status') + + g.panel.stat.panelOptions.withDescription('The overall health and availability of the OpenSearch cluster.') + + g.panel.stat.queryOptions.withTargets([ + signals.cluster.cluster_status.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.withMappings([ + g.panel.stat.standardOptions.mapping.ValueMap.withType() + + g.panel.stat.standardOptions.mapping.ValueMap.withOptions({ + '0': {index: 0, text: 'Green'}, + '1': {index: 1, text: 'Yellow'}, + '2': {index: 2, text: 'Red'}, + }), + ]) + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(null), + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(0), + g.panel.stat.standardOptions.threshold.step.withColor('yellow') + + g.panel.stat.standardOptions.threshold.step.withValue(1), + g.panel.stat.standardOptions.threshold.step.withColor('red') + + g.panel.stat.standardOptions.threshold.step.withValue(2), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + nodeCountPanel: + g.panel.stat.new('Node count') + + g.panel.stat.panelOptions.withDescription('The number of running nodes across the OpenSearch cluster.') + + g.panel.stat.queryOptions.withTargets([ + signals.cluster.cluster_nodes_number.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(null), + g.panel.stat.standardOptions.threshold.step.withColor('red') + + g.panel.stat.standardOptions.threshold.step.withValue(0), + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(1), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + dataNodeCountPanel: + g.panel.stat.new('Data node count') + + g.panel.stat.panelOptions.withDescription('The number of data nodes in the OpenSearch cluster.') + + g.panel.stat.queryOptions.withTargets([ + signals.cluster.cluster_datanodes_number.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(null), + g.panel.stat.standardOptions.threshold.step.withColor('red') + + g.panel.stat.standardOptions.threshold.step.withValue(0), + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(1), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + shardCountPanel: + g.panel.stat.new('Shard count') + + g.panel.stat.panelOptions.withDescription('The number of shards in the OpenSearch cluster across all indices.') + + g.panel.stat.queryOptions.withTargets([ + signals.cluster.cluster_shards_number_total.withExprWrappersMixin(['sum(', ')']).asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(null), + g.panel.stat.standardOptions.threshold.step.withColor('red') + + g.panel.stat.standardOptions.threshold.step.withValue(0), + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(1), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + activeShardsPercentagePanel: + g.panel.stat.new('Active shards %') + + g.panel.stat.panelOptions.withDescription('Percent of active shards across the OpenSearch cluster.') + + g.panel.stat.queryOptions.withTargets([ + signals.cluster.cluster_shards_active_percent.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(null), + g.panel.stat.standardOptions.threshold.step.withColor('red') + + g.panel.stat.standardOptions.threshold.step.withValue(0), + g.panel.stat.standardOptions.threshold.step.withColor('yellow') + + g.panel.stat.standardOptions.threshold.step.withValue(1), + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(100), + ]) + + g.panel.stat.standardOptions.withUnit('percent') + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + topNodesByCPUUsagePanel: + g.panel.barGauge.new('Top nodes by CPU usage') + + g.panel.barGauge.panelOptions.withDescription('Top nodes by OS CPU usage across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withTargets([ + signals.topk.os_cpu_percent_topk.withExprWrappersMixin(['topk(10, sort_desc(', ')']).asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.barGauge.standardOptions.color.withMode('thresholds') + + g.panel.barGauge.standardOptions.thresholds.withSteps([ + g.panel.barGauge.standardOptions.threshold.step.withColor('green') + + g.panel.barGauge.standardOptions.threshold.step.withValue(null), + g.panel.barGauge.standardOptions.threshold.step.withColor('red') + + g.panel.barGauge.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.barGauge.standardOptions.withMin(0) + + g.panel.barGauge.standardOptions.withMax(100) + + g.panel.barGauge.standardOptions.withUnit('percent') + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + breakersTrippedPanel: + g.panel.barGauge.new('Breakers tripped') + + g.panel.barGauge.panelOptions.withDescription('The total count of circuit breakers tripped across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withTargets([ + signals.topk.circuitbreaker_tripped_count_sum.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.barGauge.standardOptions.color.withMode('thresholds') + + g.panel.barGauge.standardOptions.thresholds.withSteps([ + g.panel.barGauge.standardOptions.threshold.step.withColor('green') + + g.panel.barGauge.standardOptions.threshold.step.withValue(null), + g.panel.barGauge.standardOptions.threshold.step.withColor('red') + + g.panel.barGauge.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.barGauge.standardOptions.withUnit('trips') + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + shardStatusPanel: + g.panel.barGauge.new('Shard status') + + g.panel.barGauge.panelOptions.withDescription('Shard status counts across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withTargets([ + signals.cluster.cluster_shards_number_by_type.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.barGauge.standardOptions.color.withMode('thresholds') + + g.panel.barGauge.standardOptions.thresholds.withSteps([ + g.panel.barGauge.standardOptions.threshold.step.withColor('green') + + g.panel.barGauge.standardOptions.threshold.step.withValue(null), + g.panel.barGauge.standardOptions.threshold.step.withColor('red') + + g.panel.barGauge.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.barGauge.standardOptions.withUnit('shards') + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + topNodesByDiskUsagePanel: + g.panel.barGauge.new('Top nodes by disk usage') + + g.panel.barGauge.panelOptions.withDescription('Top nodes by disk usage across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withTargets([ + signals.topk.fs_path_used_percent_topk.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.barGauge.standardOptions.color.withMode('thresholds') + + g.panel.barGauge.standardOptions.thresholds.withSteps([ + g.panel.barGauge.standardOptions.threshold.step.withColor('green') + + g.panel.barGauge.standardOptions.threshold.step.withValue(null), + g.panel.barGauge.standardOptions.threshold.step.withColor('red') + + g.panel.barGauge.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.barGauge.standardOptions.withMin(0) + + g.panel.barGauge.standardOptions.withMax(100) + + g.panel.barGauge.standardOptions.withUnit('percent') + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + totalDocumentsPanel: + g.panel.timeSeries.new('Total documents') + + g.panel.timeSeries.panelOptions.withDescription('The total count of documents indexed across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.cluster.indices_indexing_index_count_avg.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('documents'), + + pendingTasksPanel: + g.panel.timeSeries.new('Pending tasks') + + g.panel.timeSeries.panelOptions.withDescription('The number of tasks waiting to be executed across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.cluster.cluster_pending_tasks_number.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('tasks'), + + storeSizePanel: + g.panel.timeSeries.new('Store size') + + g.panel.timeSeries.panelOptions.withDescription('The total size of the store across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.cluster.indices_store_size_bytes_avg.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes'), + + maxTaskWaitTimePanel: + g.panel.timeSeries.new('Max task wait time') + + g.panel.timeSeries.panelOptions.withDescription('The max wait time for tasks to be executed across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([signals.cluster.cluster_task_max_wait_seconds.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + topIndicesByRequestRatePanel: + g.panel.timeSeries.new('Top indices by request rate') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([signals.topk.search_current_inflight_topk.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + topIndicesByRequestLatencyPanel: + g.panel.timeSeries.new('Top indices by request latency') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.search_avg_latency_topk.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + topIndicesByCombinedCacheHitRatioPanel: + g.panel.timeSeries.new('Top indices by combined cache hit ratio') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.request_query_cache_hit_rate_topk.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('percent'), + + topNodesByIngestRatePanel: + g.panel.timeSeries.new('Top nodes by ingest rate') + + g.panel.timeSeries.panelOptions.withDescription('Top nodes by rate of ingest across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.ingest_throughput_topk.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('Bps'), + + topNodesByIngestLatencyPanel: + g.panel.timeSeries.new('Top nodes by ingest latency') + + g.panel.timeSeries.panelOptions.withDescription('Top nodes by ingestion latency across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.ingest_latency_topk.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + topNodesByIngestErrorsPanel: + g.panel.timeSeries.new('Top nodes by ingest errors') + + g.panel.timeSeries.panelOptions.withDescription('Top nodes by ingestion failures across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.ingest_failures_topk.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('errors'), + + topIndicesByIndexRatePanel: + g.panel.timeSeries.new('Top indices by index rate') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by rate of document indexing across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.indexing_current_topk.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('documents/s'), + + topIndicesByIndexLatencyPanel: + g.panel.timeSeries.new('Top indices by index latency') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by indexing latency across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.indexing_latency_topk.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + topIndicesByIndexFailuresPanel: + g.panel.timeSeries.new('Top indices by index failures') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by index document failures across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.indexing_failed_topk.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('failures'), + + // Node Overview Panels - Refactored to use modern patterns and signals + + // Node CPU usage + nodeCpuUsage: + g.panel.timeSeries.new('Node CPU usage') + + g.panel.timeSeries.panelOptions.withDescription('CPU usage percentage of the node\'s Operating System.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.os_cpu_percent.asTarget()]) + + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + + g.panel.timeSeries.standardOptions.withDecimals(1) + + g.panel.timeSeries.standardOptions.withMax(100) + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node memory usage + nodeMemoryUsage: + g.panel.timeSeries.new('Node memory usage') + + g.panel.timeSeries.panelOptions.withDescription('Memory usage percentage of the node for the Operating System and OpenSearch') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.os_mem_used_percent.asTarget()]) + + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + + g.panel.timeSeries.standardOptions.withDecimals(1) + + g.panel.timeSeries.standardOptions.withMax(100) + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node I/O + nodeIO: + g.panel.timeSeries.new('Node I/O') + + g.panel.timeSeries.panelOptions.withDescription('Node file system read and write data.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.node.fs_read_bps.asTarget(), + signals.node.fs_write_bps.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('Bps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(1) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('opacity') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byRegexp.new('/time|used|busy|util/') + + g.panel.timeSeries.fieldOverride.byRegexp.withProperty('custom.axisSoftMax', 100) + + g.panel.timeSeries.fieldOverride.byRegexp.withProperty('custom.drawStyle', 'points') + + g.panel.timeSeries.fieldOverride.byRegexp.withProperty('unit', 'percent'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node open connections + nodeOpenConnections: + g.panel.timeSeries.new('Node open connections') + + g.panel.timeSeries.panelOptions.withDescription('Number of open connections for the selected node.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.transport_open_connections.asTarget()]) + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(30) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('opacity') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node disk usage + nodeDiskUsage: + g.panel.timeSeries.new('Node disk usage') + + g.panel.timeSeries.panelOptions.withDescription('Disk usage percentage of the selected node.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.fs_used_percent.asTarget()]) + + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + + g.panel.timeSeries.standardOptions.withDecimals(1) + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withMax(100) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(1) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node memory swap + nodeMemorySwap: + g.panel.timeSeries.new('Node memory swap') + + g.panel.timeSeries.panelOptions.withDescription('Percentage of swap space used by OpenSearch and the Operating System on the selected node.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.os_swap_used_percent.asTarget()]) + + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + + g.panel.timeSeries.standardOptions.withDecimals(1) + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withMax(100) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node network traffic + nodeNetworkTraffic: + g.panel.timeSeries.new('Node network traffic') + + g.panel.timeSeries.panelOptions.withDescription('Network traffic on the node\'s Operating System.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.node.transport_rx_bps.asTarget(), + signals.node.transport_tx_bps.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('Bps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // Circuit breakers + circuitBreakers: + g.panel.timeSeries.new('Circuit breakers') + + g.panel.timeSeries.panelOptions.withDescription('Circuit breakers tripped on the selected node by type') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.node.circuitbreaker_tripped_sum_by_name.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('trips'), + + // JVM heap used vs committed + jvmHeapUsedVsCommitted: + g.panel.timeSeries.new('JVM heap used vs committed') + + g.panel.timeSeries.panelOptions.withDescription('JVM heap memory usage vs committed.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.node.jvm_heap_used_bytes.asTarget(), + signals.node.jvm_heap_committed_bytes.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM non-heap used vs committed + jvmNonheapUsedVsCommitted: + g.panel.timeSeries.new('JVM non-heap used vs committed') + + g.panel.timeSeries.panelOptions.withDescription('JVM non-heap memory usage vs committed.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.node.jvm_nonheap_used_bytes.asTarget(), + signals.node.jvm_nonheap_committed_bytes.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM threads + jvmThreads: + g.panel.timeSeries.new('JVM threads') + + g.panel.timeSeries.panelOptions.withDescription('JVM thread count.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_threads.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('threads') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM buffer pools + jvmBufferPools: + g.panel.timeSeries.new('JVM buffer pools') + + g.panel.timeSeries.panelOptions.withDescription('JVM buffer pool usage.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_bufferpool_number.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM uptime + jvmUptime: + g.panel.timeSeries.new('JVM uptime') + + g.panel.timeSeries.panelOptions.withDescription('JVM uptime in seconds.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_uptime.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM garbage collections + jvmGarbageCollections: + g.panel.timeSeries.new('JVM garbage collections') + + g.panel.timeSeries.panelOptions.withDescription('JVM garbage collection count.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_gc_collections.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('collections') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM garbage collection time + jvmGarbageCollectionTime: + g.panel.timeSeries.new('JVM garbage collection time') + + g.panel.timeSeries.panelOptions.withDescription('JVM garbage collection time in milliseconds.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_gc_time.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), + + // JVM buffer pool usage + jvmBufferPoolUsage: + g.panel.timeSeries.new('JVM buffer pool usage') + + g.panel.timeSeries.panelOptions.withDescription('JVM buffer pool usage by pool.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_bufferpool_used_percent.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // Thread pool threads + threadPoolThreads: + g.panel.timeSeries.new('Thread pool threads') + + g.panel.timeSeries.panelOptions.withDescription('Thread pool thread count.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.threadpool_threads.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('threads') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // Thread pool tasks + threadPoolTasks: + g.panel.timeSeries.new('Thread pool tasks') + + g.panel.timeSeries.panelOptions.withDescription('Thread pool task count.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.threadpool_tasks.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('tasks') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // Search and Index Overview Panels - Refactored to use modern patterns and signals + // Search Performance Panels + searchRequestRatePanel: + g.panel.timeSeries.new('Request rate') + + g.panel.timeSeries.panelOptions.withDescription('Rate of fetch, scroll, and query requests by selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.search.search_query_current_avg.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.search.search_fetch_current_avg.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.search.search_scroll_current_avg.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.standardOptions.withOverrides([ + { + matcher: {id: 'byValue', options: {reducer: 'allIsZero', op: 'gte', value: 0}}, + properties: [{id: 'custom.hideFrom', value: {tooltip: true, viz: false, legend: true}}], }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(instanceLabels)) - + g.query.prometheus.withInstant(true), - ]) - + g.panel.table.standardOptions.withMappings([ - { - options: { - '0': { - color: 'super-light-orange', - index: 5, - text: 'False', - }, - '1': { - color: 'light-green', - index: 3, - text: 'True', - }, - Data: { - color: 'light-purple', - index: 0, - text: 'data', - }, - Ingest: { - color: 'light-blue', - index: 2, - text: 'ingest', - }, - Master: { - color: 'light-green', - index: 1, - text: 'master', - }, - 'Remote cluster client': { - color: 'light-orange', - index: 4, - text: 'remote_cluster_client', - }, + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + searchRequestLatencyPanel: + g.panel.timeSeries.new('Request latency') + + g.panel.timeSeries.panelOptions.withDescription('Latency of fetch, scroll, and query requests by selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.search.search_query_latency_avg.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + signals.search.search_fetch_latency_avg.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + signals.search.search_scroll_latency_avg.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.standardOptions.withOverrides([ + { + matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, + properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], }, - type: 'value', - }, - ]) - + g.panel.table.standardOptions.withOverrides([ - { - matcher: { - id: 'byRegexp', - options: '/Data|Master|Ingest|Remote.+|Cluster.+/', + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + searchCacheHitRatioPanel: + g.panel.timeSeries.new('Cache hit ratio') + + g.panel.timeSeries.panelOptions.withDescription('Ratio of query cache and request cache hits and misses.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.search.request_cache_hit_rate.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.search.query_cache_hit_rate.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.standardOptions.withOverrides([ + { + matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, + properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], }, - properties: [ - { - id: 'custom.cellOptions', - value: { - type: 'color-text', - }, - }, - ], - }, - ]) - + g.panel.table.queryOptions.withTransformations([ - { - id: 'labelsToFields', - options: { - mode: 'columns', - valueLabel: 'role', + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + searchCacheEvictionsPanel: + g.panel.timeSeries.new('Evictions') + + g.panel.timeSeries.panelOptions.withDescription('Total evictions count by cache type for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.search.query_cache_evictions.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + signals.search.request_cache_evictions.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + signals.search.fielddata_evictions.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('evictions') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + { + matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, + properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], }, - }, - { - id: 'merge', - options: {}, - }, - { - id: 'organize', - options: { - excludeByName: { - Time: true, - }, - indexByName: { - Time: 0, // hide time - node: 3, - nodeid: 3, - master: 104, - data: 105, - ingest: 106, - remote_cluster_client: 107, - cluster_manager: 108, - } + { - [k]: 3 - for k in groupLabels + instanceLabels - } - , - renameByName: { - Time: '', - cluster: 'Cluster', - //roles: - cluster_manager: 'Cluster manager', - data: 'Data', - ingest: 'Ingest', - master: 'Master', - node: 'Node', - nodeid: 'Nodeid', - remote_cluster_client: 'Remote cluster client', - }, + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + // Indexing Performance Panels + indexingRatePanel: + g.panel.timeSeries.new('Index rate') + + g.panel.timeSeries.panelOptions.withDescription('Rate of indexed documents for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.indexing_current.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('documents/s') + + g.panel.timeSeries.standardOptions.withOverrides([ + { + matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, + properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], }, - }, - ]), - }, + ]), + + indexingLatencyPanel: + g.panel.timeSeries.new('Index latency') + + g.panel.timeSeries.panelOptions.withDescription('Document indexing latency for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.indexing_latency.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]), + + indexingFailuresPanel: + g.panel.timeSeries.new('Index failures') + + g.panel.timeSeries.panelOptions.withDescription('Number of indexing failures for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.indexing_failed.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('failures') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + // Index Operations Panels + flushLatencyPanel: + g.panel.timeSeries.new('Flush latency') + + g.panel.timeSeries.panelOptions.withDescription('Index flush latency for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.flush_latency.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + mergeTimePanel: + g.panel.timeSeries.new('Merge time') + + g.panel.timeSeries.panelOptions.withDescription('Index merge time for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.merge_time.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.indexing.merge_stopped_time.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.indexing.merge_throttled_time.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + refreshLatencyPanel: + g.panel.timeSeries.new('Refresh latency') + + g.panel.timeSeries.panelOptions.withDescription('Index refresh latency for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.refresh_latency.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + // Index Statistics Panels + translogOperationsPanel: + g.panel.timeSeries.new('Translog operations') + + g.panel.timeSeries.panelOptions.withDescription('Current number of translog operations for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.translog_ops.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('operations') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + docsDeletedPanel: + g.panel.timeSeries.new('Docs deleted') + + g.panel.timeSeries.panelOptions.withDescription('Rate of documents deleted for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.indexing_delete_current.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('documents/s') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + documentsIndexedPanel: + g.panel.timeSeries.new('Documents indexed') + + g.panel.timeSeries.panelOptions.withDescription('Number of indexed documents for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.indexing_count.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('documents') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + // Index Structure Panels + segmentCountPanel: + g.panel.timeSeries.new('Segment count') + + g.panel.timeSeries.panelOptions.withDescription('Current number of segments for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.segments_number.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('segments') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + mergeCountPanel: + g.panel.timeSeries.new('Merge count') + + g.panel.timeSeries.panelOptions.withDescription('Number of merge operations for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.merge_docs.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('merges') + + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + // Cache and Memory Panels + cacheSizePanel: + g.panel.timeSeries.new('Cache size') + + g.panel.timeSeries.panelOptions.withDescription('Size of query cache and request cache.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.search.query_cache_memory.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.search.request_cache_memory.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + searchAndIndexStoreSizePanel: + g.panel.timeSeries.new('Store size') + + g.panel.timeSeries.panelOptions.withDescription('Size of the store in bytes for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.store_size_bytes.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + segmentSizePanel: + g.panel.timeSeries.new('Segment size') + + g.panel.timeSeries.panelOptions.withDescription('Memory used by segments for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.segments_memory_bytes.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + mergeSizePanel: + g.panel.timeSeries.new('Merge size') + + g.panel.timeSeries.panelOptions.withDescription('Size of merge operations in bytes for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.merge_current_size.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + searchAndIndexShardCountPanel: + g.panel.timeSeries.new('Shard count') + + g.panel.timeSeries.panelOptions.withDescription('The number of index shards for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.shards_per_index.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('shards') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + }, } diff --git a/opensearch-mixin/rows.libsonnet b/opensearch-mixin/rows.libsonnet new file mode 100644 index 000000000..a80b2526d --- /dev/null +++ b/opensearch-mixin/rows.libsonnet @@ -0,0 +1,156 @@ +local g = import './g.libsonnet'; + +{ + new(this): { + clusterOverviewRow: + g.panel.row.new('Cluster Overview') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.clusterStatusPanel { gridPos+: { w: 5, h: 6 } }, + this.grafana.panels.nodeCountPanel { gridPos+: { w: 5, h: 6 } }, + this.grafana.panels.dataNodeCountPanel { gridPos+: { w: 5, h: 6 } }, + this.grafana.panels.shardCountPanel { gridPos+: { w: 5, h: 6 } }, + this.grafana.panels.activeShardsPercentagePanel { gridPos+: { w: 4, h: 6 } }, + ]), + + rolesRow: + g.panel.row.new('Node Roles') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.osRoles { gridPos+: { w: 24 } }, + this.grafana.panels.osRolesTimeline { gridPos+: { w: 24 } }, + ]), + + resourceUsageRow: + g.panel.row.new('Resource Usage') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topNodesByCPUUsagePanel { gridPos+: { w: 8 } }, + this.grafana.panels.breakersTrippedPanel { gridPos+: { w: 8 } }, + this.grafana.panels.shardStatusPanel { gridPos+: { w: 8 } }, + ]), + + storageAndTasksRow: + g.panel.row.new('Storage and Tasks') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topNodesByDiskUsagePanel { gridPos+: { w: 8 } }, + this.grafana.panels.totalDocumentsPanel { gridPos+: { w: 16 } }, + this.grafana.panels.pendingTasksPanel { gridPos+: { w: 8 } }, + this.grafana.panels.storeSizePanel { gridPos+: { w: 8 } }, + this.grafana.panels.maxTaskWaitTimePanel { gridPos+: { w: 8 } }, + ]), + + searchPerformanceRow: + g.panel.row.new('Search Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topIndicesByRequestRatePanel { gridPos+: { w: 8 } }, + this.grafana.panels.topIndicesByRequestLatencyPanel { gridPos+: { w: 8 } }, + this.grafana.panels.topIndicesByCombinedCacheHitRatioPanel { gridPos+: { w: 8 } }, + ]), + + ingestPerformanceRow: + g.panel.row.new('Ingest Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topNodesByIngestRatePanel { gridPos+: { w: 8 } }, + this.grafana.panels.topNodesByIngestLatencyPanel { gridPos+: { w: 8 } }, + this.grafana.panels.topNodesByIngestErrorsPanel { gridPos+: { w: 8 } }, + ]), + + indexingPerformanceRow: + g.panel.row.new('Indexing Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topIndicesByIndexRatePanel { gridPos+: { w: 8 } }, + this.grafana.panels.topIndicesByIndexLatencyPanel { gridPos+: { w: 8 } }, + this.grafana.panels.topIndicesByIndexFailuresPanel { gridPos+: { w: 8 } }, + ]), + + // Node Overview Dashboard Rows + nodeRolesRow: + g.panel.row.new('Node Roles') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.osRolesTimeline { gridPos+: { w: 24 } }, + this.grafana.panels.osRoles { gridPos+: { w: 24 } }, + ]), + + nodeHealthRow: + g.panel.row.new('Node health') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.nodeCpuUsage { gridPos+: { w: 6 } }, + this.grafana.panels.nodeMemoryUsage { gridPos+: { w: 6 } }, + this.grafana.panels.nodeIO { gridPos+: { w: 6 } }, + this.grafana.panels.nodeOpenConnections { gridPos+: { w: 6 } }, + this.grafana.panels.nodeDiskUsage { gridPos+: { w: 6 } }, + this.grafana.panels.nodeMemorySwap { gridPos+: { w: 6 } }, + this.grafana.panels.nodeNetworkTraffic { gridPos+: { w: 6 } }, + this.grafana.panels.circuitBreakers { gridPos+: { w: 6 } }, + ]), + + nodeJVMRow: + g.panel.row.new('Node JVM') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.jvmHeapUsedVsCommitted { gridPos+: { w: 6 } }, + this.grafana.panels.jvmNonheapUsedVsCommitted { gridPos+: { w: 6 } }, + this.grafana.panels.jvmThreads { gridPos+: { w: 6 } }, + this.grafana.panels.jvmBufferPools { gridPos+: { w: 6 } }, + this.grafana.panels.jvmUptime { gridPos+: { w: 6 } }, + this.grafana.panels.jvmGarbageCollections { gridPos+: { w: 6 } }, + this.grafana.panels.jvmGarbageCollectionTime { gridPos+: { w: 6 } }, + this.grafana.panels.jvmBufferPoolUsage { gridPos+: { w: 6 } }, + ]), + + threadPoolsRow: + g.panel.row.new('Thread pools') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.threadPoolThreads { gridPos+: { w: 12 } }, + this.grafana.panels.threadPoolTasks { gridPos+: { w: 12 } }, + ]), + + + // Search and Index Overview Dashboard Rows + searchAndIndexSearchPerformanceRow: + g.panel.row.new('Search Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.searchRequestRatePanel { gridPos+: { w: 6 } }, + this.grafana.panels.searchRequestLatencyPanel { gridPos+: { w: 6 } }, + this.grafana.panels.searchCacheHitRatioPanel { gridPos+: { w: 6 } }, + this.grafana.panels.searchCacheEvictionsPanel { gridPos+: { w: 6 } }, + ]), + + searchAndIndexIndexingPerformanceRow: + g.panel.row.new('Indexing Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.indexingRatePanel { gridPos+: { w: 6 } }, + this.grafana.panels.indexingLatencyPanel { gridPos+: { w: 6 } }, + this.grafana.panels.indexingFailuresPanel { gridPos+: { w: 6 } }, + this.grafana.panels.flushLatencyPanel { gridPos+: { w: 6 } }, + this.grafana.panels.mergeTimePanel { gridPos+: { w: 6 } }, + this.grafana.panels.refreshLatencyPanel { gridPos+: { w: 6 } }, + this.grafana.panels.translogOperationsPanel { gridPos+: { w: 6 } }, + this.grafana.panels.docsDeletedPanel { gridPos+: { w: 6 } }, + ]), + + searchAndIndexCapacityRow: + g.panel.row.new('Index Capacity') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.documentsIndexedPanel { gridPos+: { w: 6 } }, + this.grafana.panels.segmentCountPanel { gridPos+: { w: 6 } }, + this.grafana.panels.mergeCountPanel { gridPos+: { w: 6 } }, + this.grafana.panels.cacheSizePanel { gridPos+: { w: 6 } }, + this.grafana.panels.searchAndIndexStoreSizePanel { gridPos+: { w: 6 } }, + this.grafana.panels.segmentSizePanel { gridPos+: { w: 6 } }, + this.grafana.panels.mergeSizePanel { gridPos+: { w: 6 } }, + this.grafana.panels.searchAndIndexShardCountPanel { gridPos+: { w: 6 } }, + ]), + }, +} diff --git a/opensearch-mixin/signals/cluster.libsonnet b/opensearch-mixin/signals/cluster.libsonnet new file mode 100644 index 000000000..d67a91f70 --- /dev/null +++ b/opensearch-mixin/signals/cluster.libsonnet @@ -0,0 +1,152 @@ +// Cluster-level signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_cluster_status', + }, + signals: { + cluster_status: { + name: 'Cluster status', + description: 'Overall cluster health status as a numeric code.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_status{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_nodes_number: { + name: 'Node count', + description: 'The number of running nodes across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_nodes_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_datanodes_number: { + name: 'Data node count', + description: 'The number of data nodes in the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_datanodes_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_shards_number_total: { + name: 'Shard count', + description: 'The number of shards in the OpenSearch cluster across all indices.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'opensearch_cluster_shards_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + aggKeepLabels: ['type'], + }, + }, + }, + cluster_shards_number_by_type: { + name: 'Shard status', + description: 'Shard status counts across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_shards_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{type}}', + aggKeepLabels: ['type'], + }, + }, + }, + cluster_shards_active_percent: { + name: 'Active shards %%', + description: 'Percent of active shards across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + unit: 'percent', + sources: { + prometheus: { + expr: 'opensearch_cluster_shards_active_percent{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_pending_tasks_number: { + name: 'Pending tasks', + description: 'The number of tasks waiting to be executed across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + sources: { + prometheus: { + expr: 'opensearch_cluster_pending_tasks_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_task_max_wait_seconds: { + name: 'Max task wait time', + description: 'The max wait time for tasks to be executed across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + unit: 's', + sources: { + prometheus: { + expr: 'opensearch_cluster_task_max_waiting_time_seconds{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + indices_indexing_index_count_avg: { + name: 'Total documents', + description: 'The total count of documents indexed across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_indices_indexing_index_count{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + indices_store_size_bytes_avg: { + name: 'Store size', + description: 'The total size of the store across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_indices_store_size_bytes{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/indexing.libsonnet b/opensearch-mixin/signals/indexing.libsonnet new file mode 100644 index 000000000..b0189182b --- /dev/null +++ b/opensearch-mixin/signals/indexing.libsonnet @@ -0,0 +1,265 @@ +// Indexing operation signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_index_indexing_index_current_number', + }, + signals: { + indexing_current: { + name: 'Indexing current', + description: 'In-flight indexing operations.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_current_number{%(queriesSelectorGroupOnly)s,index=~"$index",context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + indexing_latency: { + name: 'Indexing latency (avg)', + description: 'Average indexing latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_indexing_index_time_seconds{%(queriesSelector)s, context=~"total"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelector)s, context=~"total"}[$__interval:]),1))', + }, + }, + }, + indexing_count: { + name: 'Indexing count (avg)', + description: 'Indexing ops count.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'documents', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + indexing_failed: { + name: 'Indexing failed (avg)', + description: 'Indexing failures per interval.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'failures', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_failed_count{%(queriesSelectorGroupOnly)s,index=~"$index",context="total"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + indexing_delete_current: { + name: 'Indexing delete current', + description: 'In-flight delete operations.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'documents/s', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_delete_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + flush_latency: { + name: 'Flush latency (avg)', + description: 'Average flush latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ',index) (increase(opensearch_index_flush_total_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]),1))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + flush_count: { + name: 'Flush count (avg)', + description: 'Flush count proxy (per mapping).', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_flush_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelector)s, context="total"}[$__interval:]),1))', + }, + }, + }, + merge_time: { + name: 'Merge time increase', + description: 'Merge time increase (boolean >0).', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:])) > 0', + legendCustomTemplate: '{{index}} - total', + }, + }, + }, + merge_stopped_time: { + name: 'Merge stopped time increase', + description: 'Merge stopped time increase (boolean >0).', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_stopped_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:])) > 0', + legendCustomTemplate: '{{index}} - stopped', + }, + }, + }, + merge_throttled_time: { + name: 'Merge throttled time increase', + description: 'Merge throttled time increase (boolean >0).', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_throttled_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:])) > 0', + legendCustomTemplate: '{{index}} - throttled', + }, + }, + }, + merge_docs: { + name: 'Merge docs increase', + description: 'Merge docs increase (boolean >0).', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_merges_total_docs_count{%(queriesSelector)s, context="total"}[$__interval:])) > 0', + }, + }, + }, + merge_current_size: { + name: 'Merge current size bytes', + description: 'Merge current size (boolean >0).', + type: 'raw', + unit: 'bytes', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ',index) (opensearch_index_merges_current_size_bytes{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}) > 0', + legendCustomTemplate: '{{index}}', + }, + }, + }, + refresh_latency: { + name: 'Refresh latency (avg)', + description: 'Average refresh latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_refresh_total_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]),1))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + refresh_count: { + name: 'Refresh count (avg)', + description: 'Refresh count proxy (per mapping).', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_refresh_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelector)s, context="total"}[$__interval:]),1))', + }, + }, + }, + translog_ops: { + name: 'Translog operations', + description: 'Translog operation count.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'operations', + sources: { + prometheus: { + expr: 'opensearch_index_translog_operations_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + segments_number: { + name: 'Segments number', + description: 'Number of segments.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'segments', + sources: { + prometheus: { + expr: 'opensearch_index_segments_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + segments_memory_bytes: { + name: 'Segments memory bytes', + description: 'Segment memory usage.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_segments_memory_bytes{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + store_size_bytes: { + name: 'Store size bytes', + description: 'Store size in bytes.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_store_size_bytes{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + shards_per_index: { + name: 'Active shards per index', + description: 'Active shards per index.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'sum by (index) (avg by(' + this.groupAggList + ') (opensearch_index_shards_number{%(queriesSelector)s, type=~"active|active_primary"}))', + legendCustomTemplate: '{{ index }}', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/node.libsonnet b/opensearch-mixin/signals/node.libsonnet new file mode 100644 index 000000000..978675fe9 --- /dev/null +++ b/opensearch-mixin/signals/node.libsonnet @@ -0,0 +1,270 @@ +// Node-level signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_os_cpu_percent', + }, + signals: { + os_cpu_percent: { + name: 'CPU %%', + description: 'Node CPU percent.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'opensearch_os_cpu_percent{%(queriesSelector)s}', + legendCustomTemplate: '{{node}}', + }, + }, + }, + os_mem_used_percent: { + name: 'Memory used %%', + description: 'Node memory used percent.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'opensearch_os_mem_used_percent{%(queriesSelector)s}', + legendCustomTemplate: '{{node}}', + }, + }, + }, + os_swap_used_percent: { + name: 'Swap used %%', + description: 'Swap used percent.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: '100 * opensearch_os_swap_used_bytes{%(queriesSelector)s} / clamp_min((opensearch_os_swap_used_bytes{%(queriesSelector)s} + opensearch_os_swap_free_bytes{%(queriesSelector)s}), 1)', + legendCustomTemplate: '{{node}}', + }, + }, + }, + fs_read_bps: { + name: 'FS read bytes/s', + description: 'Filesystem read rate.', + type: 'raw', + unit: 'Bps', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_fs_io_total_read_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{node}} - read', + }, + }, + }, + fs_write_bps: { + name: 'FS write bytes/s', + description: 'Filesystem write rate.', + type: 'raw', + unit: 'Bps', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_fs_io_total_write_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{node}} - write', + }, + }, + }, + fs_used_percent: { + name: 'FS used %%', + description: 'Filesystem used percent.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: '100 - (100 * opensearch_fs_path_free_bytes{%(queriesSelector)s} / clamp_min(opensearch_fs_path_total_bytes{%(queriesSelector)s}, 1))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + transport_open_connections: { + name: 'Transport server open', + description: 'Open transport server connections.', + type: 'raw', + unit: 'connections', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggListWithInstance + ') (opensearch_transport_server_open_number{%(queriesSelector)s})', + legendCustomTemplate: '{{node}}', + }, + }, + }, + transport_tx_bps: { + name: 'Transport TX bitrate', + description: 'Transport transmit bitrate.', + type: 'raw', + unit: 'bit/s', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_transport_tx_bytes_count{%(queriesSelector)s}[$__rate_interval])) * 8', + legendCustomTemplate: '{{node}} - sent', + }, + }, + }, + transport_rx_bps: { + name: 'Transport RX bitrate', + description: 'Transport receive bitrate.', + type: 'raw', + unit: 'bit/s', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_transport_rx_bytes_count{%(queriesSelector)s}[$__rate_interval])) * 8', + legendCustomTemplate: '{{node}} - received', + }, + }, + }, + circuitbreaker_tripped_sum_by_name: { + name: 'Circuit breaker trips by name', + description: 'Circuit breaker trips by breaker name.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'sum by (name, ' + this.groupAggListWithInstance + ') (increase(opensearch_circuitbreaker_tripped_count{%(queriesSelector)s}[$__interval:]))', + legendCustomTemplate: '{{node}} - {{ name }}', + }, + }, + }, + jvm_heap_used_bytes: { + name: 'JVM heap used', + description: 'JVM heap used.', + type: 'raw', + unit: 'bytes', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_heap_used_bytes{%(queriesSelector)s})', + }, + }, + }, + jvm_heap_committed_bytes: { + name: 'JVM heap committed', + description: 'JVM heap committed.', + type: 'raw', + unit: 'bytes', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_heap_committed_bytes{%(queriesSelector)s})', + }, + }, + }, + jvm_nonheap_used_bytes: { + name: 'JVM non-heap used', + description: 'JVM non-heap used.', + type: 'raw', + unit: 'bytes', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_nonheap_used_bytes{%(queriesSelector)s})', + }, + }, + }, + jvm_nonheap_committed_bytes: { + name: 'JVM non-heap committed', + description: 'JVM non-heap committed.', + type: 'raw', + unit: 'bytes', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_nonheap_committed_bytes{%(queriesSelector)s})', + }, + }, + }, + jvm_threads: { + name: 'JVM threads', + description: 'JVM thread count.', + type: 'raw', + unit: 'threads', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_threads_number{%(queriesSelector)s})', + }, + }, + }, + jvm_bufferpool_number: { + name: 'JVM buffer pools', + description: 'Number of JVM buffer pools.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ', bufferpool) (opensearch_jvm_bufferpool_number{%(queriesSelector)s})', + legendCustomTemplate: '{{ bufferpool }}', + }, + }, + }, + jvm_uptime: { + name: 'JVM uptime', + description: 'JVM uptime seconds.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_uptime_seconds{%(queriesSelector)s})', + }, + }, + }, + jvm_gc_collections: { + name: 'JVM GC collections', + description: 'GC collections per interval.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (increase(opensearch_jvm_gc_collection_count{%(queriesSelector)s}[$__interval:]))', + }, + }, + }, + jvm_gc_time: { + name: 'JVM GC time', + description: 'GC time per interval.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (increase(opensearch_jvm_gc_collection_time_seconds{%(queriesSelector)s}[$__interval:]))', + }, + }, + }, + jvm_bufferpool_used_percent: { + name: 'JVM bufferpool used %%', + description: 'Percent of bufferpool used.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: '100 * (sum by (' + this.groupAggList + ', bufferpool) (opensearch_jvm_bufferpool_used_bytes{%(queriesSelector)s})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{%(queriesSelector)s})),1)', + legendCustomTemplate: '{{ bufferpool }}', + }, + }, + }, + threadpool_threads: { + name: 'Threadpool threads', + description: 'Total threadpool threads.', + type: 'raw', + unit: 'threads', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') ((opensearch_threadpool_threads_number{%(queriesSelector)s}))', + }, + }, + }, + threadpool_tasks: { + name: 'Threadpool tasks', + description: 'Threadpool tasks.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_threadpool_tasks_number{%(queriesSelector)s})', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/roles.libsonnet b/opensearch-mixin/signals/roles.libsonnet new file mode 100644 index 000000000..ad7cd8d8d --- /dev/null +++ b/opensearch-mixin/signals/roles.libsonnet @@ -0,0 +1,81 @@ +// Node role signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_node_role_bool', + }, + signals: { + node_role_data: { + name: 'Node role: data', + description: 'Data role present flag.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="data"}[1m]) == 1) * 2', + legendCustomTemplate: '{{ node }} / data', + }, + }, + }, + node_role_master: { + name: 'Node role: master', + description: 'Master role present flag.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="master"}[1m]) == 1) * 3', + legendCustomTemplate: '{{ node }} / master', + }, + }, + }, + node_role_ingest: { + name: 'Node role: ingest', + description: 'Ingest role present flag.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="ingest"}[1m]) == 1) * 4', + legendCustomTemplate: '{{ node }} / ingest', + }, + }, + }, + node_role_cluster_manager: { + name: 'Node role: cluster_manager', + description: 'Cluster manager role present flag.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="cluster_manager"}[1m]) == 1) * 5', + legendCustomTemplate: '{{ node }} / cluster_manager', + }, + }, + }, + node_role_remote_cluster_client: { + name: 'Node role: remote_cluster_client', + description: 'Remote cluster client role present flag.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="remote_cluster_client"}[1m]) == 1) * 6', + legendCustomTemplate: '{{ node }} / remote_client', + }, + }, + }, + node_role_last_seen: { + name: 'Node role bool last seen', + description: 'Last seen role bool within 1d.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (' + this.groupAggList + ', nodeid, role, primary_ip) (last_over_time(opensearch_node_role_bool{%(queriesSelector)s}[1d]))', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/search.libsonnet b/opensearch-mixin/signals/search.libsonnet new file mode 100644 index 000000000..3a2d0f21b --- /dev/null +++ b/opensearch-mixin/signals/search.libsonnet @@ -0,0 +1,195 @@ +// Search operation signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_index_search_query_current_number', + }, + signals: { + search_query_current_avg: { + name: 'Search queries in-flight', + description: 'In-flight search queries.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_search_query_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context=~"total"}', + legendCustomTemplate: '{{index}} - query', + aggKeepLabels: ['index'], + }, + }, + }, + search_fetch_current_avg: { + name: 'Search fetch in-flight', + description: 'In-flight fetch operations.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_search_fetch_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context=~"total"}', + legendCustomTemplate: '{{index}} - fetch', + aggKeepLabels: ['index'], + }, + }, + }, + search_scroll_current_avg: { + name: 'Search scroll in-flight', + description: 'In-flight scroll operations.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_search_scroll_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context=~"total"}', + legendCustomTemplate: '{{index}} - scroll', + aggKeepLabels: ['index'], + }, + }, + }, + search_query_latency_avg: { + name: 'Search query latency (avg)', + description: 'Average query latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_query_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index"}[$__interval:]) / clamp_min(increase(opensearch_index_search_query_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]), 1))', + legendCustomTemplate: '{{index}} - query', + }, + }, + }, + search_fetch_latency_avg: { + name: 'Search fetch latency (avg)', + description: 'Average fetch latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_fetch_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]), 1))', + legendCustomTemplate: '{{index}} - fetch', + }, + }, + }, + search_scroll_latency_avg: { + name: 'Search scroll latency (avg)', + description: 'Average scroll latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_scroll_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_search_scroll_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]), 1))', + legendCustomTemplate: '{{index}} - scroll', + }, + }, + }, + request_cache_hit_rate: { + name: 'Request cache hit rate %%', + description: 'Request cache hit rate.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (100 * (opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}) / clamp_min(opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"} + opensearch_index_requestcache_miss_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}, 1))', + legendCustomTemplate: '{{index}} - request', + }, + }, + }, + query_cache_hit_rate: { + name: 'Query cache hit rate %%', + description: 'Query cache hit rate.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (100 * (opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}) / clamp_min(opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"} + opensearch_index_querycache_miss_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}, 1))', + legendCustomTemplate: '{{index}} - query', + }, + }, + }, + query_cache_evictions: { + name: 'Query cache evictions', + description: 'Query cache evictions per interval.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_querycache_evictions_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + rangeFunction: 'increase', + aggKeepLabels: ['index'], + legendCustomTemplate: '{{index}} - query cache', + }, + }, + }, + request_cache_evictions: { + name: 'Request cache evictions', + description: 'Request cache evictions per interval.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_requestcache_evictions_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + rangeFunction: 'increase', + aggKeepLabels: ['index'], + legendCustomTemplate: '{{index}} - request cache', + }, + }, + }, + fielddata_evictions: { + name: 'Fielddata evictions', + description: 'Fielddata evictions per interval.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_fielddata_evictions_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + rangeFunction: 'increase', + aggKeepLabels: ['index'], + legendCustomTemplate: '{{index}} - field data', + }, + }, + }, + query_cache_memory: { + name: 'Query cache memory bytes', + description: 'Query cache memory.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_querycache_memory_size_bytes{%(queriesSelector)s, context="total"}', + }, + }, + }, + request_cache_memory: { + name: 'Request cache memory bytes', + description: 'Request cache memory.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_requestcache_memory_size_bytes{%(queriesSelector)s, context="total"}', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/topk.libsonnet b/opensearch-mixin/signals/topk.libsonnet new file mode 100644 index 000000000..b1309cf68 --- /dev/null +++ b/opensearch-mixin/signals/topk.libsonnet @@ -0,0 +1,163 @@ +// TopK and ranking signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_os_cpu_percent', + }, + signals: { + os_cpu_percent_topk: { + name: 'Top nodes by CPU usage', + description: 'Top nodes by OS CPU usage across the OpenSearch cluster.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(sum by(' + this.groupAggListWithInstance + ') (opensearch_os_cpu_percent{%(queriesSelectorGroupOnly)s})))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + fs_path_used_percent_topk: { + name: 'Top nodes by disk usage', + description: 'Top nodes by disk usage across the OpenSearch cluster.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'topk(10, sort_desc((100 * (sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s})- sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_free_bytes{%(queriesSelectorGroupOnly)s})) / sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s}))))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + circuitbreaker_tripped_count_sum: { + name: 'Breakers tripped', + description: 'The total count of circuit breakers tripped across the OpenSearch cluster.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'sum', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_circuitbreaker_tripped_count{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{node}}', + rangeFunction: 'increase', + }, + }, + }, + search_current_inflight_topk: { + name: 'Top indices by request rate', + description: 'Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.', + type: 'raw', + unit: 'reqps', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(avg by(index, ' + this.groupAggList + ') (\n opensearch_index_search_fetch_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_search_query_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_search_scroll_current_number{%(queriesSelectorGroupOnly)s, context="total"}\n)))\n', + legendCustomTemplate: '{{index}}', + }, + }, + }, + search_avg_latency_topk: { + name: 'Top indices by request latency', + description: 'Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(sum by(index, ' + this.groupAggList + ') ((increase(opensearch_index_search_fetch_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_query_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_scroll_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]))\n/ clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_query_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_scroll_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]), 1))))\n', + legendCustomTemplate: '{{index}}', + }, + }, + }, + request_query_cache_hit_rate_topk: { + name: 'Top indices by combined cache hit ratio', + description: 'Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(avg by(index, ' + this.groupAggList + ') (\n 100 * (opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"}) / \n clamp_min((opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_requestcache_miss_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_querycache_miss_number{%(queriesSelectorGroupOnly)s, context="total"}), 1\n ))))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + ingest_throughput_topk: { + name: 'Top nodes by ingest rate', + description: 'Top nodes by rate of ingest across the OpenSearch cluster.', + type: 'raw', + unit: 'ops', + sources: { + prometheus: { + expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (rate(opensearch_ingest_total_count{%(queriesSelectorGroupOnly)s}[$__rate_interval])))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + ingest_latency_topk: { + name: 'Top nodes by ingest latency', + description: 'Top nodes by ingestion latency across the OpenSearch cluster.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (increase(opensearch_ingest_total_time_seconds{%(queriesSelectorGroupOnly)s}[$__interval:]) / clamp_min(increase(opensearch_ingest_total_count{%(queriesSelectorGroupOnly)s}[$__interval:]), 1)))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + ingest_failures_topk: { + name: 'Top nodes by ingest errors', + description: 'Top nodes by ingestion failures across the OpenSearch cluster.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (increase(opensearch_ingest_total_failed_count{%(queriesSelectorGroupOnly)s}[$__interval:])))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + indexing_current_topk: { + name: 'Top indices by index rate', + description: 'Top indices by rate of document indexing across the OpenSearch cluster.', + type: 'raw', + unit: 'ops', + sources: { + prometheus: { + expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (opensearch_index_indexing_index_current_number{%(queriesSelectorGroupOnly)s}))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + indexing_latency_topk: { + name: 'Top indices by index latency', + description: 'Top indices by indexing latency across the OpenSearch cluster.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (increase(opensearch_index_indexing_index_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]), 1)))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + indexing_failed_topk: { + name: 'Top indices by index failures', + description: 'Top indices by index document failures across the OpenSearch cluster.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (increase(opensearch_index_indexing_index_failed_count{%(queriesSelectorGroupOnly)s}[$__interval:])))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/variables.libsonnet b/opensearch-mixin/variables.libsonnet deleted file mode 100644 index 8122dafe1..000000000 --- a/opensearch-mixin/variables.libsonnet +++ /dev/null @@ -1,76 +0,0 @@ -// variables.libsonnet -local g = import './g.libsonnet'; -local var = g.dashboard.variable; -local commonlib = import 'common-lib/common/main.libsonnet'; -local utils = commonlib.utils; - -{ - new( - filteringSelector, - groupLabels, - instanceLabels, - varMetric, - enableLokiLogs=false, - ): { - local root = self, - local variablesFromLabels(groupLabels, instanceLabels, filteringSelector, multiInstance=true) = - local chainVarProto(index, chainVar) = - var.query.new(chainVar.label) - + var.query.withDatasourceFromVariable(root.datasources.prometheus) - + var.query.queryTypes.withLabelValues( - chainVar.label, - '%s{%s}' % [varMetric, chainVar.chainSelector], - ) - + var.query.generalOptions.withLabel(utils.toSentenceCase(chainVar.label)) - + var.query.selectionOptions.withIncludeAll( - value=if (!multiInstance && std.member(instanceLabels, chainVar.label)) then false else true, - customAllValue='.+' - ) - + var.query.selectionOptions.withMulti( - if (!multiInstance && std.member(instanceLabels, chainVar.label)) then false else true, - ) - + var.query.refresh.onTime() - + var.query.withSort( - i=1, - type='alphabetical', - asc=true, - caseInsensitive=false - ); - std.mapWithIndex(chainVarProto, utils.chainLabels(groupLabels + instanceLabels, [filteringSelector])), - datasources: { - prometheus: - var.datasource.new('prometheus_datasource', 'prometheus') - + var.datasource.generalOptions.withLabel('Prometheus data source') - + var.datasource.withRegex(''), - }, - // Use on dashboards where multiple entities can be selected, like fleet dashboards - multiInstance: - [root.datasources.prometheus] - + variablesFromLabels(groupLabels, instanceLabels, filteringSelector), - // Use on dashboards where only single entity can be selected - singleInstance: - [root.datasources.prometheus] - + variablesFromLabels(groupLabels, instanceLabels, filteringSelector, multiInstance=false), - - queriesSelector: - '%s,%s' % [ - filteringSelector, - utils.labelsToPromQLSelector(groupLabels + instanceLabels), - ], - } - + if enableLokiLogs then self.withLokiLogs() else {}, - - withLokiLogs(): { - datasources+: { - loki: - var.datasource.new('loki_datasource', 'loki') - + var.datasource.generalOptions.withLabel('Loki data source') - + var.datasource.withRegex('') - + var.datasource.generalOptions.showOnDashboard.withNothing(), - }, - - multiInstance+: [self.datasources.loki], - singleInstance+: [self.datasources.loki], - }, - -}