@@ -33,7 +33,7 @@ func NewMetricsCollector() (*MetricsCollector, error) {
3333 }, nil
3434}
3535
36- func (s * MetricsCollector ) GetSystemMetrics () (* schemas.SystemMetrics , error ) {
36+ func (s * MetricsCollector ) GetSystemMetrics (ctx context. Context ) (* schemas.SystemMetrics , error ) {
3737 timestamp := time .Now ()
3838 cpuUsage , err := s .GetCPUUsageMicroseconds ()
3939 if err != nil {
@@ -48,7 +48,7 @@ func (s *MetricsCollector) GetSystemMetrics() (*schemas.SystemMetrics, error) {
4848 return nil , err
4949 }
5050 memoryWorkingSet := memoryUsage - memoryCache
51- gpuMetrics , err := s .GetGPUMetrics ()
51+ gpuMetrics , err := s .GetGPUMetrics (ctx )
5252 if err != nil {
5353 log .Debug (context .TODO (), "Failed to get gpu metrics" , "err" , err )
5454 }
@@ -148,16 +148,16 @@ func (s *MetricsCollector) GetMemoryCacheBytes() (uint64, error) {
148148 return 0 , fmt .Errorf ("inactive_file not found in cpu.stat" )
149149}
150150
151- func (s * MetricsCollector ) GetGPUMetrics () ([]schemas.GPUMetrics , error ) {
151+ func (s * MetricsCollector ) GetGPUMetrics (ctx context. Context ) ([]schemas.GPUMetrics , error ) {
152152 var metrics []schemas.GPUMetrics
153153 var err error
154154 switch s .gpuVendor {
155155 case common .GpuVendorNvidia :
156- metrics , err = s .GetNVIDIAGPUMetrics ()
156+ metrics , err = s .GetNVIDIAGPUMetrics (ctx )
157157 case common .GpuVendorAmd :
158- metrics , err = s .GetAMDGPUMetrics ()
158+ metrics , err = s .GetAMDGPUMetrics (ctx )
159159 case common .GpuVendorIntel :
160- metrics , err = s .GetIntelAcceleratorMetrics ()
160+ metrics , err = s .GetIntelAcceleratorMetrics (ctx )
161161 case common .GpuVendorTenstorrent :
162162 err = errors .New ("tenstorrent metrics not suppored" )
163163 case common .GpuVendorNone :
@@ -169,8 +169,8 @@ func (s *MetricsCollector) GetGPUMetrics() ([]schemas.GPUMetrics, error) {
169169 return metrics , err
170170}
171171
172- func (s * MetricsCollector ) GetNVIDIAGPUMetrics () ([]schemas.GPUMetrics , error ) {
173- cmd := exec .Command ( "nvidia-smi" , "--query-gpu=memory.used,utilization.gpu" , "--format=csv,noheader,nounits" )
172+ func (s * MetricsCollector ) GetNVIDIAGPUMetrics (ctx context. Context ) ([]schemas.GPUMetrics , error ) {
173+ cmd := exec .CommandContext ( ctx , "nvidia-smi" , "--query-gpu=memory.used,utilization.gpu" , "--format=csv,noheader,nounits" )
174174 var out bytes.Buffer
175175 cmd .Stdout = & out
176176 if err := cmd .Run (); err != nil {
@@ -179,8 +179,8 @@ func (s *MetricsCollector) GetNVIDIAGPUMetrics() ([]schemas.GPUMetrics, error) {
179179 return parseNVIDIASMILikeMetrics (out .String ())
180180}
181181
182- func (s * MetricsCollector ) GetAMDGPUMetrics () ([]schemas.GPUMetrics , error ) {
183- cmd := exec .Command ( "amd-smi" , "monitor" , "-vu" , "--csv" )
182+ func (s * MetricsCollector ) GetAMDGPUMetrics (ctx context. Context ) ([]schemas.GPUMetrics , error ) {
183+ cmd := exec .CommandContext ( ctx , "amd-smi" , "monitor" , "-vu" , "--csv" )
184184 var out bytes.Buffer
185185 cmd .Stdout = & out
186186 if err := cmd .Run (); err != nil {
@@ -245,8 +245,8 @@ func (s *MetricsCollector) getAMDGPUMetrics(csv string) ([]schemas.GPUMetrics, e
245245 return metrics , nil
246246}
247247
248- func (s * MetricsCollector ) GetIntelAcceleratorMetrics () ([]schemas.GPUMetrics , error ) {
249- cmd := exec .Command ( "hl-smi" , "--query-aip=memory.used,utilization.aip" , "--format=csv,noheader,nounits" )
248+ func (s * MetricsCollector ) GetIntelAcceleratorMetrics (ctx context. Context ) ([]schemas.GPUMetrics , error ) {
249+ cmd := exec .CommandContext ( ctx , "hl-smi" , "--query-aip=memory.used,utilization.aip" , "--format=csv,noheader,nounits" )
250250 var out bytes.Buffer
251251 cmd .Stdout = & out
252252 if err := cmd .Run (); err != nil {
0 commit comments