Skip to content

Commit fc5aecd

Browse files
Refactor internal metrics package and API references to properly use context (#3226)
Signed-off-by: Matías Insaurralde <matias@insaurral.de>
1 parent 8829072 commit fc5aecd

File tree

2 files changed

+13
-13
lines changed

2 files changed

+13
-13
lines changed

runner/internal/metrics/metrics.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func NewMetricsCollector() (*MetricsCollector, error) {
3333
}, nil
3434
}
3535

36-
func (s *MetricsCollector) GetSystemMetrics() (*schemas.SystemMetrics, error) {
36+
func (s *MetricsCollector) GetSystemMetrics(ctx context.Context) (*schemas.SystemMetrics, error) {
3737
timestamp := time.Now()
3838
cpuUsage, err := s.GetCPUUsageMicroseconds()
3939
if err != nil {
@@ -48,7 +48,7 @@ func (s *MetricsCollector) GetSystemMetrics() (*schemas.SystemMetrics, error) {
4848
return nil, err
4949
}
5050
memoryWorkingSet := memoryUsage - memoryCache
51-
gpuMetrics, err := s.GetGPUMetrics()
51+
gpuMetrics, err := s.GetGPUMetrics(ctx)
5252
if err != nil {
5353
log.Debug(context.TODO(), "Failed to get gpu metrics", "err", err)
5454
}
@@ -148,16 +148,16 @@ func (s *MetricsCollector) GetMemoryCacheBytes() (uint64, error) {
148148
return 0, fmt.Errorf("inactive_file not found in cpu.stat")
149149
}
150150

151-
func (s *MetricsCollector) GetGPUMetrics() ([]schemas.GPUMetrics, error) {
151+
func (s *MetricsCollector) GetGPUMetrics(ctx context.Context) ([]schemas.GPUMetrics, error) {
152152
var metrics []schemas.GPUMetrics
153153
var err error
154154
switch s.gpuVendor {
155155
case common.GpuVendorNvidia:
156-
metrics, err = s.GetNVIDIAGPUMetrics()
156+
metrics, err = s.GetNVIDIAGPUMetrics(ctx)
157157
case common.GpuVendorAmd:
158-
metrics, err = s.GetAMDGPUMetrics()
158+
metrics, err = s.GetAMDGPUMetrics(ctx)
159159
case common.GpuVendorIntel:
160-
metrics, err = s.GetIntelAcceleratorMetrics()
160+
metrics, err = s.GetIntelAcceleratorMetrics(ctx)
161161
case common.GpuVendorTenstorrent:
162162
err = errors.New("tenstorrent metrics not suppored")
163163
case common.GpuVendorNone:
@@ -169,8 +169,8 @@ func (s *MetricsCollector) GetGPUMetrics() ([]schemas.GPUMetrics, error) {
169169
return metrics, err
170170
}
171171

172-
func (s *MetricsCollector) GetNVIDIAGPUMetrics() ([]schemas.GPUMetrics, error) {
173-
cmd := exec.Command("nvidia-smi", "--query-gpu=memory.used,utilization.gpu", "--format=csv,noheader,nounits")
172+
func (s *MetricsCollector) GetNVIDIAGPUMetrics(ctx context.Context) ([]schemas.GPUMetrics, error) {
173+
cmd := exec.CommandContext(ctx, "nvidia-smi", "--query-gpu=memory.used,utilization.gpu", "--format=csv,noheader,nounits")
174174
var out bytes.Buffer
175175
cmd.Stdout = &out
176176
if err := cmd.Run(); err != nil {
@@ -179,8 +179,8 @@ func (s *MetricsCollector) GetNVIDIAGPUMetrics() ([]schemas.GPUMetrics, error) {
179179
return parseNVIDIASMILikeMetrics(out.String())
180180
}
181181

182-
func (s *MetricsCollector) GetAMDGPUMetrics() ([]schemas.GPUMetrics, error) {
183-
cmd := exec.Command("amd-smi", "monitor", "-vu", "--csv")
182+
func (s *MetricsCollector) GetAMDGPUMetrics(ctx context.Context) ([]schemas.GPUMetrics, error) {
183+
cmd := exec.CommandContext(ctx, "amd-smi", "monitor", "-vu", "--csv")
184184
var out bytes.Buffer
185185
cmd.Stdout = &out
186186
if err := cmd.Run(); err != nil {
@@ -245,8 +245,8 @@ func (s *MetricsCollector) getAMDGPUMetrics(csv string) ([]schemas.GPUMetrics, e
245245
return metrics, nil
246246
}
247247

248-
func (s *MetricsCollector) GetIntelAcceleratorMetrics() ([]schemas.GPUMetrics, error) {
249-
cmd := exec.Command("hl-smi", "--query-aip=memory.used,utilization.aip", "--format=csv,noheader,nounits")
248+
func (s *MetricsCollector) GetIntelAcceleratorMetrics(ctx context.Context) ([]schemas.GPUMetrics, error) {
249+
cmd := exec.CommandContext(ctx, "hl-smi", "--query-aip=memory.used,utilization.aip", "--format=csv,noheader,nounits")
250250
var out bytes.Buffer
251251
cmd.Stdout = &out
252252
if err := cmd.Run(); err != nil {

runner/internal/runner/api/http.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func (s *Server) metricsGetHandler(w http.ResponseWriter, r *http.Request) (inte
3333
if err != nil {
3434
return nil, &api.Error{Status: http.StatusInternalServerError, Err: err}
3535
}
36-
metrics, err := metricsCollector.GetSystemMetrics()
36+
metrics, err := metricsCollector.GetSystemMetrics(r.Context())
3737
if err != nil {
3838
return nil, &api.Error{Status: http.StatusInternalServerError, Err: err}
3939
}

0 commit comments

Comments
 (0)