From 9bfc60d13a71f5e1d2bb10f4b72c0c85315b8365 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 10 Feb 2025 14:11:34 +0200 Subject: [PATCH] add feature to configurate promhttp error handling Signed-off-by: Nikita Popov --- README.md | 16 +++++++++------- stackdriver_exporter.go | 19 ++++++++++++++++++- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 0d04e05..a0a2814 100644 --- a/README.md +++ b/README.md @@ -78,22 +78,23 @@ If you are still using the legacy [Access scopes][access-scopes], the `https://w | Flag | Required | Default | Description | | ----------------------------------- | -------- |---------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `google.project-ids` | No | GCloud SDK auto-discovery | Repeatable flag of Google Project IDs | -| `google.projects.filter` | No | | GCloud projects filter expression. See more [here](https://cloud.google.com/sdk/gcloud/reference/projects/list). | +| `google.project-ids` | No | GCloud SDK auto-discovery | Repeatable flag of Google Project IDs | +| `google.projects.filter` | No | | GCloud projects filter expression. See more [here](https://cloud.google.com/sdk/gcloud/reference/projects/list). | | `monitoring.metrics-ingest-delay` | No | | Offsets metric collection by a delay appropriate for each metric type, e.g. because bigquery metrics are slow to appear | | `monitoring.drop-delegated-projects` | No | No | Drop metrics from attached projects and fetch `project_id` only. | -| `monitoring.metrics-prefixes` | Yes | | Repeatable flag of Google Stackdriver Monitoring Metric Type prefixes (see [example][metrics-prefix-example] and [available metrics][metrics-list]) | +| `monitoring.metrics-prefixes` | Yes | | Repeatable flag of Google Stackdriver Monitoring Metric Type prefixes (see [example][metrics-prefix-example] and [available metrics][metrics-list]) | | `monitoring.metrics-interval` | No | `5m` | Metric's timestamp interval to request from the Google Stackdriver Monitoring Metrics API. Only the most recent data point is used | | `monitoring.metrics-offset` | No | `0s` | Offset (into the past) for the metric's timestamp interval to request from the Google Stackdriver Monitoring Metrics API, to handle latency in published metrics | -| `monitoring.filters` | No | | Additonal filters to be sent on the Monitoring API call. Add multiple filters by providing this parameter multiple times. See [monitoring.filters](#using-filters) for more info. | +| `monitoring.filters` | No | | Additonal filters to be sent on the Monitoring API call. Add multiple filters by providing this parameter multiple times. See [monitoring.filters](#using-filters) for more info. | | `monitoring.aggregate-deltas` | No | | If enabled will treat all DELTA metrics as an in-memory counter instead of a gauge. Be sure to read [what to know about aggregating DELTA metrics](#what-to-know-about-aggregating-delta-metrics) | | `monitoring.aggregate-deltas-ttl` | No | `30m` | How long should a delta metric continue to be exported and stored after GCP stops producing it. Read [slow moving metrics](#slow-moving-metrics) to understand the problem this attempts to solve | | `monitoring.descriptor-cache-ttl` | No | `0s` | How long should the metric descriptors for a prefixed be cached for | +| `promhttp.error-handling` | No | `httpErrorOnError` | Defines how errors are handled by promhttp.Handler while serving metrics. Possible values: `httpErrorOnError`, `continueOnError`, `panicOnError` are mapped to [available options][promhttp-error-handling-opts] | | `stackdriver.max-retries` | No | `0` | Max number of retries that should be attempted on 503 errors from stackdriver. | -| `stackdriver.http-timeout` | No | `10s` | How long should stackdriver_exporter wait for a result from the Stackdriver API. | +| `stackdriver.http-timeout` | No | `10s` | How long should stackdriver_exporter wait for a result from the Stackdriver API. | | `stackdriver.max-backoff=` | No | | Max time between each request in an exp backoff scenario. | -| `stackdriver.backoff-jitter` | No | `1s` | The amount of jitter to introduce in a exp backoff scenario. | -| `stackdriver.retry-statuses` | No | `503` | The HTTP statuses that should trigger a retry. | +| `stackdriver.backoff-jitter` | No | `1s` | The amount of jitter to introduce in a exp backoff scenario. | +| `stackdriver.retry-statuses` | No | `503` | The HTTP statuses that should trigger a retry. | | `web.config.file` | No | | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | | `web.listen-address` | No | `:9255` | Address to listen on for web interface and telemetry Repeatable for multiple addresses. | | `web.systemd-socket` | No | | Use systemd socket activation listeners instead of port listeners (Linux only). | @@ -247,4 +248,5 @@ Apache License 2.0, see [LICENSE][license]. [monitored-resources]: https://cloud.google.com/monitoring/api/resources [prometheus]: https://prometheus.io/ [prometheus-boshrelease]: https://github.com/cloudfoundry-community/prometheus-boshrelease +[promhttp-error-handling-opts]: https://github.com/prometheus/client_golang/blob/main/prometheus/promhttp/http.go#L323 [stackdriver]: https://cloud.google.com/monitoring/ diff --git a/stackdriver_exporter.go b/stackdriver_exporter.go index cc9ff65..860ac6a 100644 --- a/stackdriver_exporter.go +++ b/stackdriver_exporter.go @@ -137,6 +137,10 @@ var ( monitoringDescriptorCacheOnlyGoogle = kingpin.Flag( "monitoring.descriptor-cache-only-google", "Only cache descriptors for *.googleapis.com metrics", ).Default("true").Bool() + + promHttpErrorHandling = kingpin.Flag( + "promhttp.error-handling", "Defines how errors are handled by promhttp.Handler while serving metrics", + ).Default("httpErrorOnError").Enum("httpErrorOnError", "continueOnError", "panicOnError") ) func init() { @@ -277,7 +281,10 @@ func (h *handler) innerHandler(filters map[string]bool) http.Handler { registry, } } - opts := promhttp.HandlerOpts{ErrorLog: slog.NewLogLogger(h.logger.Handler(), slog.LevelError)} + opts := promhttp.HandlerOpts{ + ErrorLog: slog.NewLogLogger(h.logger.Handler(), slog.LevelError), + ErrorHandling: getPromHttpErrorHandlingOpt(*promHttpErrorHandling), + } // Delegate http serving to Prometheus client library, which will call collector.Collect. return promhttp.HandlerFor(gatherers, opts) } @@ -464,3 +471,13 @@ func parseMetricExtraFilters() []collectors.MetricFilter { } return extraFilters } + +func getPromHttpErrorHandlingOpt(flagOpt string) promhttp.HandlerErrorHandling { + if flagOpt == "continueOnError" { + return promhttp.ContinueOnError + } + if flagOpt == "panicOnError" { + return promhttp.PanicOnError + } + return promhttp.HTTPErrorOnError +}