@@ -764,199 +764,7 @@ spec:
764764 quantile: "0.5"
765765 record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
766766 - name : node-exporter
767- rules :
768- - alert : NodeFilesystemAlmostOutOfSpace
769- annotations :
770- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
771- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemAlmostOutOfSpace.md
772- summary : Filesystem has less than 5% space left.
773- expr : |
774- (
775- node_filesystem_avail_bytes{job="node-exporter",fstype!="shiftfs"} / node_filesystem_size_bytes{job="node-exporter",fstype!="shiftfs"} * 100 < 5
776- and
777- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
778- )
779- for : 15m
780- labels :
781- severity : critical
782- - alert : NodeFilesystemAlmostOutOfSpace
783- annotations :
784- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
785- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemAlmostOutOfSpace.md
786- summary : Filesystem has less than 3% space left.
787- expr : |
788- (
789- node_filesystem_avail_bytes{job="node-exporter",fstype!="shiftfs"} / node_filesystem_size_bytes{job="node-exporter",fstype!="shiftfs"} * 100 < 3
790- and
791- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
792- )
793- for : 15m
794- labels :
795- severity : critical
796- - alert : NodeFilesystemFilesFillingUp
797- annotations :
798- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
799- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemFilesFillingUp.md
800- summary : Filesystem is predicted to run out of inodes within the next 24 hours.
801- expr : |
802- (
803- node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"} / node_filesystem_files{job="node-exporter",fstype!="shiftfs"} * 100 < 40
804- and
805- predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"}[6h], 24*60*60) < 0
806- and
807- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
808- )
809- for : 1h
810- labels :
811- severity : warning
812- - alert : NodeFilesystemFilesFillingUp
813- annotations :
814- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
815- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemFilesFillingUp.md
816- summary : Filesystem is predicted to run out of inodes within the next 4 hours.
817- expr : |
818- (
819- node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"} / node_filesystem_files{job="node-exporter",fstype!="shiftfs"} * 100 < 20
820- and
821- predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"}[6h], 4*60*60) < 0
822- and
823- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
824- )
825- for : 1h
826- labels :
827- severity : critical
828- - alert : NodeFilesystemAlmostOutOfFiles
829- annotations :
830- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
831- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemAlmostOutOfFiles.md
832- summary : Filesystem has less than 5% inodes left.
833- expr : |
834- (
835- node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"} / node_filesystem_files{job="node-exporter",fstype!="shiftfs"} * 100 < 5
836- and
837- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
838- )
839- for : 1h
840- labels :
841- severity : warning
842- - alert : NodeFilesystemAlmostOutOfFiles
843- annotations :
844- description : Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
845- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFilesystemAlmostOutOfFiles.md
846- summary : Filesystem has less than 3% inodes left.
847- expr : |
848- (
849- node_filesystem_files_free{job="node-exporter",fstype!="shiftfs"} / node_filesystem_files{job="node-exporter",fstype!="shiftfs"} * 100 < 3
850- and
851- node_filesystem_readonly{job="node-exporter",fstype!="shiftfs"} == 0
852- )
853- for : 1h
854- labels :
855- severity : critical
856- - alert : NodeNetworkReceiveErrs
857- annotations :
858- description : ' {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
859- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeNetworkReceiveErrs.md
860- summary : Network interface is reporting many receive errors.
861- expr : |
862- rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
863- for : 1h
864- labels :
865- severity : warning
866- - alert : NodeNetworkTransmitErrs
867- annotations :
868- description : ' {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
869- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeNetworkTransmitErrs.md
870- summary : Network interface is reporting many transmit errors.
871- expr : |
872- rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
873- for : 1h
874- labels :
875- severity : warning
876- - alert : NodeTextFileCollectorScrapeError
877- annotations :
878- description : Node Exporter text file collector failed to scrape.
879- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeTextFileCollectorScrapeError.md
880- summary : Node Exporter text file collector failed to scrape.
881- expr : |
882- node_textfile_scrape_error{job="node-exporter"} == 1
883- labels :
884- severity : warning
885- - alert : NodeClockSkewDetected
886- annotations :
887- description : Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
888- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeClockSkewDetected.md
889- summary : Clock skew detected.
890- expr : |
891- (
892- node_timex_offset_seconds{job="node-exporter"} > 0.05
893- and
894- deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0
895- )
896- or
897- (
898- node_timex_offset_seconds{job="node-exporter"} < -0.05
899- and
900- deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
901- )
902- for : 10m
903- labels :
904- severity : warning
905- - alert : NodeClockNotSynchronising
906- annotations :
907- description : Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
908- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeClockNotSynchronising.md
909- summary : Clock not synchronising.
910- expr : |
911- min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
912- and
913- node_timex_maxerror_seconds{job="node-exporter"} >= 16
914- for : 10m
915- labels :
916- severity : warning
917- - alert : NodeRAIDDegraded
918- annotations :
919- description : RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
920- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeRAIDDegraded.md
921- summary : RAID Array is degraded
922- expr : |
923- node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"}) > 0
924- for : 15m
925- labels :
926- severity : critical
927- - alert : NodeRAIDDiskFailure
928- annotations :
929- description : At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.
930- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeRAIDDiskFailure.md
931- summary : Failed device in RAID array
932- expr : |
933- node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+)"} > 0
934- labels :
935- severity : warning
936- - alert : NodeFileDescriptorLimit
937- annotations :
938- description : File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
939- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFileDescriptorLimit.md
940- summary : Kernel is predicted to exhaust file descriptors limit soon.
941- expr : |
942- (
943- node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
944- )
945- for : 15m
946- labels :
947- severity : warning
948- - alert : NodeFileDescriptorLimit
949- annotations :
950- description : File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
951- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/NodeFileDescriptorLimit.md
952- summary : Kernel is predicted to exhaust file descriptors limit soon.
953- expr : |
954- (
955- node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
956- )
957- for : 15m
958- labels :
959- severity : critical
767+ rules : []
960768 - name : node-exporter.rules
961769 rules :
962770 - expr : |
0 commit comments