@@ -202,40 +202,46 @@ def __init__(self,
202202 #
203203 # GPU cache
204204 #
205- # Deprecated in 0.9 - Renamed as vllm:kv_cache_usage_perc
206- # TODO: in 0.10, only enable if show_hidden_metrics=True
207- gauge_gpu_cache_usage = self ._gauge_cls (
208- name = "vllm:gpu_cache_usage_perc" ,
209- documentation = (
210- "GPU KV-cache usage. 1 means 100 percent usage."
211- "DEPRECATED: Use vllm:kv_cache_usage_perc instead." ),
212- multiprocess_mode = "mostrecent" ,
213- labelnames = labelnames )
214- self .gauge_gpu_cache_usage = make_per_engine (gauge_gpu_cache_usage ,
215- engine_indexes ,
216- model_name )
217-
218- # Deprecated in 0.9 - Renamed as vllm:prefix_cache_queries
219- # TODO: in 0.10, only enable if show_hidden_metrics=True
220- counter_gpu_prefix_cache_queries = self ._counter_cls (
221- name = "vllm:gpu_prefix_cache_queries" ,
222- documentation = (
223- "GPU prefix cache queries, in terms of number of queried"
224- "tokens. DEPRECATED: Use vllm:prefix_cache_queries instead." ),
225- labelnames = labelnames )
226- self .counter_gpu_prefix_cache_queries = make_per_engine (
227- counter_gpu_prefix_cache_queries , engine_indexes , model_name )
228-
229- # Deprecated in 0.9 - Renamed as vllm:prefix_cache_hits
230- # TODO: in 0.10, only enable if show_hidden_metrics=True
231- counter_gpu_prefix_cache_hits = self ._counter_cls (
232- name = "vllm:gpu_prefix_cache_hits" ,
233- documentation = (
234- "GPU prefix cache hits, in terms of number of cached "
235- "tokens. DEPRECATED: Use vllm:prefix_cache_hits instead." ),
236- labelnames = labelnames )
237- self .counter_gpu_prefix_cache_hits = make_per_engine (
238- counter_gpu_prefix_cache_hits , engine_indexes , model_name )
205+ # Deprecated in 0.9.2 - Renamed as vllm:kv_cache_usage_perc
206+ # With 0.11.x you can enable with --show-hidden-metrics-for-version=0.10
207+ # TODO: remove in 0.12.0
208+ if self .show_hidden_metrics :
209+ gauge_gpu_cache_usage = self ._gauge_cls (
210+ name = "vllm:gpu_cache_usage_perc" ,
211+ documentation = (
212+ "GPU KV-cache usage. 1 means 100 percent usage."
213+ "DEPRECATED: Use vllm:kv_cache_usage_perc instead." ),
214+ multiprocess_mode = "mostrecent" ,
215+ labelnames = labelnames )
216+ self .gauge_gpu_cache_usage = make_per_engine (
217+ gauge_gpu_cache_usage , engine_indexes , model_name )
218+
219+ # Deprecated in 0.9.2 - Renamed as vllm:prefix_cache_queries
220+ # With 0.11.x you can enable with --show-hidden-metrics-for-version=0.10
221+ # TODO: remove in 0.12.0
222+ if self .show_hidden_metrics :
223+ counter_gpu_prefix_cache_queries = self ._counter_cls (
224+ name = "vllm:gpu_prefix_cache_queries" ,
225+ documentation = (
226+ "GPU prefix cache queries, in terms of number of queried"
227+ "tokens. DEPRECATED: Use vllm:prefix_cache_queries instead."
228+ ),
229+ labelnames = labelnames )
230+ self .counter_gpu_prefix_cache_queries = make_per_engine (
231+ counter_gpu_prefix_cache_queries , engine_indexes , model_name )
232+
233+ # Deprecated in 0.9.2 - Renamed as vllm:prefix_cache_hits
234+ # With 0.11.x you can enable with --show-hidden-metrics-for-version=0.10
235+ # TODO: remove in 0.12.0
236+ if self .show_hidden_metrics :
237+ counter_gpu_prefix_cache_hits = self ._counter_cls (
238+ name = "vllm:gpu_prefix_cache_hits" ,
239+ documentation = (
240+ "GPU prefix cache hits, in terms of number of cached "
241+ "tokens. DEPRECATED: Use vllm:prefix_cache_hits instead." ),
242+ labelnames = labelnames )
243+ self .counter_gpu_prefix_cache_hits = make_per_engine (
244+ counter_gpu_prefix_cache_hits , engine_indexes , model_name )
239245
240246 gauge_kv_cache_usage = self ._gauge_cls (
241247 name = "vllm:kv_cache_usage_perc" ,
@@ -509,15 +515,17 @@ def record(self,
509515 self .gauge_scheduler_waiting [engine_idx ].set (
510516 scheduler_stats .num_waiting_reqs )
511517
512- self .gauge_gpu_cache_usage [engine_idx ].set (
513- scheduler_stats .kv_cache_usage )
518+ if self .show_hidden_metrics :
519+ self .gauge_gpu_cache_usage [engine_idx ].set (
520+ scheduler_stats .kv_cache_usage )
514521 self .gauge_kv_cache_usage [engine_idx ].set (
515522 scheduler_stats .kv_cache_usage )
516523
517- self .counter_gpu_prefix_cache_queries [engine_idx ].inc (
518- scheduler_stats .prefix_cache_stats .queries )
519- self .counter_gpu_prefix_cache_hits [engine_idx ].inc (
520- scheduler_stats .prefix_cache_stats .hits )
524+ if self .show_hidden_metrics :
525+ self .counter_gpu_prefix_cache_queries [engine_idx ].inc (
526+ scheduler_stats .prefix_cache_stats .queries )
527+ self .counter_gpu_prefix_cache_hits [engine_idx ].inc (
528+ scheduler_stats .prefix_cache_stats .hits )
521529
522530 self .counter_prefix_cache_queries [engine_idx ].inc (
523531 scheduler_stats .prefix_cache_stats .queries )
0 commit comments