@@ -145,13 +145,13 @@ func init() {
145145}
146146
147147type kubernetesConfig struct {
148- location string
149- clusterName string
148+ Location string
149+ ClusterName string
150150}
151151
152152type genericConfig struct {
153- location string
154- namespace string
153+ Location string
154+ Namespace string
155155}
156156
157157type fileConfig struct {
@@ -173,95 +173,103 @@ type fileConfig struct {
173173 } `json:"aggregated_counters"`
174174}
175175
176+ // Note: When adding a new config field, consider adding it to
177+ // statusz-tmpl.html
178+ type mainConfig struct {
179+ ConfigFilename string
180+ ProjectIDResource string
181+ KubernetesLabels kubernetesConfig
182+ GenericLabels genericConfig
183+ StackdriverAddress * url.URL
184+ MetricsPrefix string
185+ UseGKEResource bool
186+ StoreInFilesDirectory string
187+ WALDirectory string
188+ PrometheusURL * url.URL
189+ ListenAddress string
190+ EnableStatusz bool
191+ Filters []string
192+ Filtersets []string
193+ Aggregations retrieval.CounterAggregatorConfig
194+ MetricRenames map [string ]string
195+ StaticMetadata []scrape.MetricMetadata
196+ UseRestrictedIPs bool
197+ manualResolver * manual.Resolver
198+ MonitoringBackends []string
199+
200+ LogLevel promlog.AllowedLevel
201+ }
202+
176203func main () {
177204 if os .Getenv ("DEBUG" ) != "" {
178205 runtime .SetBlockProfileRate (20 )
179206 runtime .SetMutexProfileFraction (20 )
180207 }
181208
182- cfg := struct {
183- configFilename string
184- projectIdResource string
185- kubernetesLabels kubernetesConfig
186- genericLabels genericConfig
187- stackdriverAddress * url.URL
188- metricsPrefix string
189- useGkeResource bool
190- storeInFilesDirectory string
191- walDirectory string
192- prometheusURL * url.URL
193- listenAddress string
194- filters []string
195- filtersets []string
196- aggregations retrieval.CounterAggregatorConfig
197- metricRenames map [string ]string
198- staticMetadata []scrape.MetricMetadata
199- useRestrictedIps bool
200- manualResolver * manual.Resolver
201- monitoringBackends []string
202-
203- logLevel promlog.AllowedLevel
204- }{}
209+ var cfg mainConfig
205210
206211 a := kingpin .New (filepath .Base (os .Args [0 ]), "The Prometheus monitoring server" )
207212
208213 a .Version (version .Print ("prometheus" ))
209214
210215 a .HelpFlag .Short ('h' )
211216
212- a .Flag ("config-file" , "A configuration file." ).StringVar (& cfg .configFilename )
217+ a .Flag ("config-file" , "A configuration file." ).StringVar (& cfg .ConfigFilename )
213218
214219 projectId := a .Flag ("stackdriver.project-id" , "The Google project ID where Stackdriver will store the metrics." ).
215220 Required ().
216221 String ()
217222
218223 a .Flag ("stackdriver.api-address" , "Address of the Stackdriver Monitoring API." ).
219- Default ("https://monitoring.googleapis.com:443/" ).URLVar (& cfg .stackdriverAddress )
224+ Default ("https://monitoring.googleapis.com:443/" ).URLVar (& cfg .StackdriverAddress )
220225
221226 a .Flag ("stackdriver.use-restricted-ips" , "If true, send all requests through restricted VIPs (EXPERIMENTAL)." ).
222- Default ("false" ).BoolVar (& cfg .useRestrictedIps )
227+ Default ("false" ).BoolVar (& cfg .UseRestrictedIPs )
223228
224229 a .Flag ("stackdriver.kubernetes.location" , "Value of the 'location' label in the Kubernetes Stackdriver MonitoredResources." ).
225- StringVar (& cfg .kubernetesLabels . location )
230+ StringVar (& cfg .KubernetesLabels . Location )
226231
227232 a .Flag ("stackdriver.kubernetes.cluster-name" , "Value of the 'cluster_name' label in the Kubernetes Stackdriver MonitoredResources." ).
228- StringVar (& cfg .kubernetesLabels . clusterName )
233+ StringVar (& cfg .KubernetesLabels . ClusterName )
229234
230235 a .Flag ("stackdriver.generic.location" , "Location for metrics written with the generic resource, e.g. a cluster or data center name." ).
231- StringVar (& cfg .genericLabels . location )
236+ StringVar (& cfg .GenericLabels . Location )
232237
233238 a .Flag ("stackdriver.generic.namespace" , "Namespace for metrics written with the generic resource, e.g. a cluster or data center name." ).
234- StringVar (& cfg .genericLabels . namespace )
239+ StringVar (& cfg .GenericLabels . Namespace )
235240
236241 a .Flag ("stackdriver.metrics-prefix" , "Customized prefix for Stackdriver metrics. If not set, external.googleapis.com/prometheus will be used" ).
237- StringVar (& cfg .metricsPrefix )
242+ StringVar (& cfg .MetricsPrefix )
238243
239244 a .Flag ("stackdriver.use-gke-resource" ,
240245 "Whether to use the legacy gke_container MonitoredResource type instead of k8s_container" ).
241- Default ("false" ).BoolVar (& cfg .useGkeResource )
246+ Default ("false" ).BoolVar (& cfg .UseGKEResource )
242247
243248 a .Flag ("stackdriver.store-in-files-directory" , "If specified, store the CreateTimeSeriesRequest protobuf messages to files under this directory, instead of sending protobuf messages to Stackdriver Monitoring API." ).
244- StringVar (& cfg .storeInFilesDirectory )
249+ StringVar (& cfg .StoreInFilesDirectory )
245250
246251 a .Flag ("prometheus.wal-directory" , "Directory from where to read the Prometheus TSDB WAL." ).
247- Default ("data/wal" ).StringVar (& cfg .walDirectory )
252+ Default ("data/wal" ).StringVar (& cfg .WALDirectory )
248253
249254 a .Flag ("prometheus.api-address" , "Address to listen on for UI, API, and telemetry." ).
250- Default ("http://127.0.0.1:9090/" ).URLVar (& cfg .prometheusURL )
255+ Default ("http://127.0.0.1:9090/" ).URLVar (& cfg .PrometheusURL )
251256
252257 a .Flag ("monitoring.backend" , "Monitoring backend(s) for internal metrics" ).Default ("prometheus" ).
253- EnumsVar (& cfg .monitoringBackends , "prometheus" , "stackdriver" )
258+ EnumsVar (& cfg .MonitoringBackends , "prometheus" , "stackdriver" )
254259
255260 a .Flag ("web.listen-address" , "Address to listen on for UI, API, and telemetry." ).
256- Default ("0.0.0.0:9091" ).StringVar (& cfg .listenAddress )
261+ Default ("0.0.0.0:9091" ).StringVar (& cfg .ListenAddress )
262+
263+ a .Flag ("web.enable-statusz" , "If true, then enables a /statusz endpoint on the web server with diagnostic information." ).
264+ Default ("true" ).BoolVar (& cfg .EnableStatusz )
257265
258266 a .Flag ("include" , "PromQL metric and label matcher which must pass for a series to be forwarded to Stackdriver. If repeated, the series must pass any of the filter sets to be forwarded." ).
259- StringsVar (& cfg .filtersets )
267+ StringsVar (& cfg .Filtersets )
260268
261269 a .Flag ("filter" , "PromQL-style matcher for a single label which must pass for a series to be forwarded to Stackdriver. If repeated, the series must pass all filters to be forwarded. Deprecated, please use --include instead." ).
262- StringsVar (& cfg .filters )
270+ StringsVar (& cfg .Filters )
263271
264- promlogflag .AddFlags (a , & cfg .logLevel )
272+ promlogflag .AddFlags (a , & cfg .LogLevel )
265273
266274 _ , err := a .Parse (os .Args [1 :])
267275 if err != nil {
@@ -270,25 +278,25 @@ func main() {
270278 os .Exit (2 )
271279 }
272280
273- logger := promlog .New (cfg .logLevel )
274- if cfg .configFilename != "" {
275- cfg .metricRenames , cfg .staticMetadata , cfg .aggregations , err = parseConfigFile (cfg .configFilename )
281+ logger := promlog .New (cfg .LogLevel )
282+ if cfg .ConfigFilename != "" {
283+ cfg .MetricRenames , cfg .StaticMetadata , cfg .Aggregations , err = parseConfigFile (cfg .ConfigFilename )
276284 if err != nil {
277- msg := fmt .Sprintf ("Parse config file %s" , cfg .configFilename )
285+ msg := fmt .Sprintf ("Parse config file %s" , cfg .ConfigFilename )
278286 level .Error (logger ).Log ("msg" , msg , "err" , err )
279287 os .Exit (2 )
280288 }
281289
282290 // Enable Stackdriver monitoring backend if counter aggregator configuration is present.
283- if len (cfg .aggregations ) > 0 {
291+ if len (cfg .Aggregations ) > 0 {
284292 sdEnabled := false
285- for _ , backend := range cfg .monitoringBackends {
293+ for _ , backend := range cfg .MonitoringBackends {
286294 if backend == "stackdriver" {
287295 sdEnabled = true
288296 }
289297 }
290298 if ! sdEnabled {
291- cfg .monitoringBackends = append (cfg .monitoringBackends , "stackdriver" )
299+ cfg .MonitoringBackends = append (cfg .MonitoringBackends , "stackdriver" )
292300 }
293301 }
294302 }
@@ -304,7 +312,7 @@ func main() {
304312 * projectId = getGCEProjectID ()
305313 }
306314
307- for _ , backend := range cfg .monitoringBackends {
315+ for _ , backend := range cfg .MonitoringBackends {
308316 switch backend {
309317 case "prometheus" :
310318 promExporter , err := oc_prometheus .NewExporter (oc_prometheus.Options {
@@ -332,10 +340,10 @@ func main() {
332340
333341 var staticLabels = map [string ]string {
334342 retrieval .ProjectIDLabel : * projectId ,
335- retrieval .KubernetesLocationLabel : cfg .kubernetesLabels . location ,
336- retrieval .KubernetesClusterNameLabel : cfg .kubernetesLabels . clusterName ,
337- retrieval .GenericLocationLabel : cfg .genericLabels . location ,
338- retrieval .GenericNamespaceLabel : cfg .genericLabels . namespace ,
343+ retrieval .KubernetesLocationLabel : cfg .KubernetesLabels . Location ,
344+ retrieval .KubernetesClusterNameLabel : cfg .KubernetesLabels . ClusterName ,
345+ retrieval .GenericLocationLabel : cfg .GenericLabels . Location ,
346+ retrieval .GenericNamespaceLabel : cfg .GenericLabels . Namespace ,
339347 }
340348 fillMetadata (& staticLabels )
341349 for k , v := range staticLabels {
@@ -344,14 +352,14 @@ func main() {
344352 }
345353 }
346354
347- filtersets , err := parseFiltersets (logger , cfg .filtersets , cfg .filters )
355+ filtersets , err := parseFiltersets (logger , cfg .Filtersets , cfg .Filters )
348356 if err != nil {
349357 level .Error (logger ).Log ("msg" , "Error parsing --include (or --filter)" , "err" , err )
350358 os .Exit (2 )
351359 }
352360
353- cfg .projectIdResource = fmt .Sprintf ("projects/%v" , * projectId )
354- if cfg .useRestrictedIps {
361+ cfg .ProjectIDResource = fmt .Sprintf ("projects/%v" , * projectId )
362+ if cfg .UseRestrictedIPs {
355363 // manual.GenerateAndRegisterManualResolver generates a Resolver and a random scheme.
356364 // It also registers the resolver. rb.InitialAddrs adds the addresses we are using
357365 // to resolve GCP API calls to the resolver.
@@ -364,23 +372,23 @@ func main() {
364372 {Addr : "199.36.153.7:443" },
365373 })
366374 }
367- targetsURL , err := cfg .prometheusURL .Parse (targets .DefaultAPIEndpoint )
375+ targetsURL , err := cfg .PrometheusURL .Parse (targets .DefaultAPIEndpoint )
368376 if err != nil {
369377 panic (err )
370378 }
371379 targetCache := targets .NewCache (logger , httpClient , targetsURL )
372380
373- metadataURL , err := cfg .prometheusURL .Parse (metadata .DefaultEndpointPath )
381+ metadataURL , err := cfg .PrometheusURL .Parse (metadata .DefaultEndpointPath )
374382 if err != nil {
375383 panic (err )
376384 }
377- metadataCache := metadata .NewCache (httpClient , metadataURL , cfg .staticMetadata )
385+ metadataCache := metadata .NewCache (httpClient , metadataURL , cfg .StaticMetadata )
378386
379387 // We instantiate a context here since the tailer is used by two other components.
380388 // The context will be used in the lifecycle of prometheusReader further down.
381389 ctx , cancel := context .WithCancel (context .Background ())
382390
383- tailer , err := tail .Tail (ctx , cfg .walDirectory )
391+ tailer , err := tail .Tail (ctx , cfg .WALDirectory )
384392 if err != nil {
385393 level .Error (logger ).Log ("msg" , "Tailing WAL failed" , "err" , err )
386394 os .Exit (1 )
@@ -399,23 +407,23 @@ func main() {
399407
400408 var scf stackdriver.StorageClientFactory
401409
402- if len (cfg .storeInFilesDirectory ) > 0 {
403- err := os .MkdirAll (cfg .storeInFilesDirectory , 0700 )
410+ if len (cfg .StoreInFilesDirectory ) > 0 {
411+ err := os .MkdirAll (cfg .StoreInFilesDirectory , 0700 )
404412 if err != nil {
405413 level .Error (logger ).Log (
406414 "msg" , "Failure creating directory." ,
407415 "err" , err )
408416 os .Exit (1 )
409417 }
410418 scf = & fileClientFactory {
411- dir : cfg .storeInFilesDirectory ,
419+ dir : cfg .StoreInFilesDirectory ,
412420 logger : log .With (logger , "component" , "storage" ),
413421 }
414422 } else {
415423 scf = & stackdriverClientFactory {
416424 logger : log .With (logger , "component" , "storage" ),
417- projectIdResource : cfg .projectIdResource ,
418- url : cfg .stackdriverAddress ,
425+ projectIdResource : cfg .ProjectIDResource ,
426+ url : cfg .StackdriverAddress ,
419427 timeout : 10 * time .Second ,
420428 manualResolver : cfg .manualResolver ,
421429 }
@@ -434,7 +442,7 @@ func main() {
434442
435443 counterAggregator , err := retrieval .NewCounterAggregator (
436444 log .With (logger , "component" , "counter_aggregator" ),
437- & cfg .aggregations )
445+ & cfg .Aggregations )
438446 if err != nil {
439447 level .Error (logger ).Log ("msg" , "Creating counter aggregator failed" , "err" , err )
440448 os .Exit (1 )
@@ -443,15 +451,15 @@ func main() {
443451
444452 prometheusReader := retrieval .NewPrometheusReader (
445453 log .With (logger , "component" , "Prometheus reader" ),
446- cfg .walDirectory ,
454+ cfg .WALDirectory ,
447455 tailer ,
448456 filtersets ,
449- cfg .metricRenames ,
457+ cfg .MetricRenames ,
450458 retrieval .TargetsWithDiscoveredLabels (targetCache , labels .FromMap (staticLabels )),
451459 metadataCache ,
452460 queueManager ,
453- cfg .metricsPrefix ,
454- cfg .useGkeResource ,
461+ cfg .MetricsPrefix ,
462+ cfg .UseGKEResource ,
455463 counterAggregator ,
456464 )
457465
@@ -471,6 +479,14 @@ func main() {
471479
472480 http .Handle ("/metrics" , promhttp .Handler ())
473481
482+ if cfg .EnableStatusz {
483+ http .Handle ("/statusz" , & statuszHandler {
484+ logger : logger ,
485+ projectId : * projectId ,
486+ cfg : & cfg ,
487+ })
488+ }
489+
474490 var g group.Group
475491 {
476492 ctx , cancel := context .WithCancel (context .Background ())
@@ -507,16 +523,16 @@ func main() {
507523 // depends on to exit properly.
508524 g .Add (
509525 func () error {
510- startOffset , err := retrieval .ReadProgressFile (cfg .walDirectory )
526+ startOffset , err := retrieval .ReadProgressFile (cfg .WALDirectory )
511527 if err != nil {
512528 level .Warn (logger ).Log ("msg" , "reading progress file failed" , "err" , err )
513529 startOffset = 0
514530 }
515531 // Write the file again once to ensure we have write permission on startup.
516- if err := retrieval .SaveProgressFile (cfg .walDirectory , startOffset ); err != nil {
532+ if err := retrieval .SaveProgressFile (cfg .WALDirectory , startOffset ); err != nil {
517533 return err
518534 }
519- waitForPrometheus (ctx , logger , cfg .prometheusURL )
535+ waitForPrometheus (ctx , logger , cfg .PrometheusURL )
520536 // Sleep a fixed amount of time to allow the first scrapes to complete.
521537 select {
522538 case <- time .After (time .Minute ):
@@ -557,7 +573,7 @@ func main() {
557573 {
558574 cancel := make (chan struct {})
559575 server := & http.Server {
560- Addr : cfg .listenAddress ,
576+ Addr : cfg .ListenAddress ,
561577 }
562578 g .Add (
563579 func () error {
0 commit comments