Skip to content

Commit d93c5f5

Browse files
AnilAltinaygvisor-bot
authored andcommitted
Add SGLang Docker tests/benchmark
PiperOrigin-RevId: 824805273
1 parent 7532ec0 commit d93c5f5

File tree

11 files changed

+977
-5
lines changed

11 files changed

+977
-5
lines changed

.buildkite/pipeline.yaml

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ _templates:
1515
if: build.env("STAGED_BINARIES") == null && build.branch != "master"
1616
source_test_continuous: &source_test_continuous
1717
if: build.env("STAGED_BINARIES") == null && build.branch == "master"
18+
gpu_test: &gpu_test
19+
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
20+
gpu_test_continuous: &gpu_test_continuous
21+
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i ) && build.branch == "master"
1822
platform_specific_agents: &platform_specific_agents {}
1923
kvm_agents: &kvm_agents {kvm: "true"}
2024
ubuntu_agents: &ubuntu_agents {os: "ubuntu"}
@@ -184,22 +188,30 @@ steps:
184188
commands:
185189
- tools/gpu/cos_drivers_test.sh
186190
- <<: *common
191+
<<: *gpu_test
187192
label: ":screwdriver: GPU Tests"
188-
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
189193
commands:
190194
- make sudo TARGETS=//tools/gpu:main ARGS="install --latest" || cat /var/log/nvidia-installer.log
191195
- make gpu-all-tests
192196
agents:
193197
queue: gpu
194198
- <<: *common
199+
<<: *gpu_test_continuous
200+
label: ":nuget: L4 GPU Tests"
201+
commands:
202+
- make sudo TARGETS=//tools/gpu:main ARGS="install --latest" || cat /var/log/nvidia-installer.log
203+
- make l4-gpu-tests
204+
agents:
205+
queue: l4-gpu
206+
- <<: *common
207+
<<: *gpu_test
195208
label: ":female_supervillain: COS GPU Tests"
196-
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
197209
commands:
198210
- make cos-gpu-all-tests
199211
agents:
200212
queue: cos-canary-gpu
201-
- label: ":fish: CUDA tests"
202-
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
213+
- <<: *gpu_test
214+
label: ":fish: CUDA tests"
203215
# This is its own test rather than being part of the GPU tests,
204216
# because it takes around 30 minutes to run.
205217
parallelism: 8
@@ -212,8 +224,8 @@ steps:
212224
agents:
213225
queue: gpu
214226
- <<: *common
227+
<<: *gpu_test
215228
label: ":screwdriver: All GPU Drivers Test"
216-
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
217229
parallelism: 8
218230
commands:
219231
- tools/gpu/all_drivers_test.sh

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,14 @@ cos-gpu-smoke-tests: gpu-smoke-images $(RUNTIME_BIN)
313313
gpu-images: gpu-smoke-images load-gpu_pytorch load-gpu_ollama load-gpu_ollama_client load-basic_busybox load-basic_alpine load-basic_python load-gpu_stable-diffusion-xl load-gpu_vllm load-gpu_nccl-tests load-benchmarks_ffmpeg
314314
.PHONY: gpu-images
315315

316+
l4-gpu-images: load-gpu_sglang load-gpu_sglang_client
317+
.PHONY: l4-gpu-images
318+
319+
l4-gpu-tests: l4-gpu-images $(RUNTIME_BIN)
320+
@$(call install_runtime,$(RUNTIME),--nvproxy=true --nvproxy-docker=true --nvproxy-allowed-driver-capabilities=all)
321+
@$(call sudo,test/gpu:sglang_test,--runtime=$(RUNTIME) -test.v $(ARGS))
322+
.PHONY: l4-gpu-tests
323+
316324
gpu-all-tests: gpu-images gpu-smoke-tests $(RUNTIME_BIN)
317325
@$(call install_runtime,$(RUNTIME),--nvproxy=true --nvproxy-docker=true --nvproxy-allowed-driver-capabilities=all)
318326
@$(call sudo,test/gpu:pytorch_test,--runtime=$(RUNTIME) -test.v $(ARGS))

images/gpu/ollama/client/client.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ import (
3131
"time"
3232
)
3333

34+
// LINT.IfChange
35+
3436
// Flags.
3537
var (
3638
url = flag.String("url", "", "HTTP request URL.")
@@ -150,3 +152,5 @@ func main() {
150152
}
151153
fmt.Fprintf(os.Stderr, "STATS: %s\n", string(metricsBytes))
152154
}
155+
156+
// LINT.ThenChange(../../sglang/client/client.go)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FROM alpine/git@sha256:4d7fe8d770483993c0cec264d49a573bac49e5239db47a9846572352e72da49c AS downloader
2+
# post checkout hook. checks that command git lfs is available.
3+
RUN apk add git-lfs && \
4+
git lfs install && \
5+
GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/qwen/qwen2.5-0.5b-instruct /qwen2.5-0.5b-instruct
6+
7+
FROM lmsysorg/sglang@sha256:119cf3a894b380a78d81e1557c8cc58ccc234b4854232e0e1dbf39916a4c7e75
8+
COPY --from=downloader /qwen2.5-0.5b-instruct /qwen2.5-0.5b-instruct
9+
10+
ENTRYPOINT ["python3"]
11+
CMD ["-m", "sglang.launch_server", "--device", "cuda", "--model", "/qwen2.5-0.5b-instruct", "--host", "0.0.0.0", "--port", "30000", "--random-seed", "42"]

images/gpu/sglang/client/BUILD

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
load("//tools:defs.bzl", "go_binary")
2+
3+
package(
4+
default_applicable_licenses = ["//:license"],
5+
licenses = ["notice"],
6+
)
7+
8+
go_binary(
9+
name = "client",
10+
srcs = ["client.go"],
11+
)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
FROM golang:1.22 AS builder
2+
3+
COPY client.go /client.go
4+
RUN CGO_ENABLED=0 go build -o /httpclient /client.go
5+
6+
FROM alpine:latest
7+
COPY --from=builder /httpclient /usr/bin/
8+
CMD ["/usr/bin/httpclient"]

images/gpu/sglang/client/client.go

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
// Copyright 2024 The gVisor Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// A simple `curl`-like HTTP client that prints metrics after the request.
16+
// All of its output is structured to be unambiguous even if stdout/stderr
17+
// is combined, as is the case for Kubernetes logs.
18+
// Useful for communicating with SGLang.
19+
package main
20+
21+
import (
22+
"bufio"
23+
"bytes"
24+
"encoding/base64"
25+
"encoding/json"
26+
"flag"
27+
"fmt"
28+
"net/http"
29+
"os"
30+
"sort"
31+
"strings"
32+
"time"
33+
)
34+
35+
// LINT.IfChange
36+
37+
// Flags.
38+
var (
39+
url = flag.String("url", "", "HTTP request URL.")
40+
method = flag.String("method", "GET", "HTTP request method (GET or POST).")
41+
postDataBase64 = flag.String("post_base64", "", "HTTP request POST data in base64 format; ignored for GET requests.")
42+
timeout = flag.Duration("timeout", 0, "HTTP request timeout; 0 for no timeout.")
43+
)
44+
45+
// bufSize is the size of buffers used for HTTP requests and responses.
46+
const bufSize = 1024 * 1024 // 1MiB
47+
48+
// fatalf crashes the program with a given error message.
49+
func fatalf(format string, values ...any) {
50+
fmt.Fprintf(os.Stderr, "FATAL: "+format+"\n", values...)
51+
os.Exit(1)
52+
}
53+
54+
// Metrics contains the request metrics to export to JSON.
55+
// This is parsed by the sglang library at `test/gpu/sglang/sglang.go`.
56+
type Metrics struct {
57+
// ProgramStarted is the time when the program started.
58+
ProgramStarted time.Time `json:"program_started"`
59+
// RequestSent is the time when the HTTP request was sent.
60+
RequestSent time.Time `json:"request_sent"`
61+
// ResponseReceived is the time when the HTTP response headers were received.
62+
ResponseReceived time.Time `json:"response_received"`
63+
// FirstByteRead is the time when the first HTTP response body byte was read.
64+
FirstByteRead time.Time `json:"first_byte_read"`
65+
// LastByteRead is the time when the last HTTP response body byte was read.
66+
LastByteRead time.Time `json:"last_byte_read"`
67+
}
68+
69+
func main() {
70+
var metrics Metrics
71+
metrics.ProgramStarted = time.Now()
72+
flag.Parse()
73+
if *url == "" {
74+
fatalf("--url is required")
75+
}
76+
client := http.Client{
77+
Transport: &http.Transport{
78+
MaxIdleConns: 1,
79+
IdleConnTimeout: *timeout,
80+
ReadBufferSize: bufSize,
81+
WriteBufferSize: bufSize,
82+
},
83+
Timeout: *timeout,
84+
}
85+
var request *http.Request
86+
var err error
87+
switch *method {
88+
case "GET":
89+
request, err = http.NewRequest("GET", *url, nil)
90+
case "POST":
91+
postData, postDataErr := base64.StdEncoding.DecodeString(*postDataBase64)
92+
if postDataErr != nil {
93+
fatalf("cannot decode POST data: %v", postDataErr)
94+
}
95+
request, err = http.NewRequest("POST", *url, bytes.NewBuffer(postData))
96+
default:
97+
err = fmt.Errorf("unknown method %q", *method)
98+
}
99+
if err != nil {
100+
fatalf("cannot create request: %v", err)
101+
}
102+
orderedReqHeaders := make([]string, 0, len(request.Header))
103+
for k := range request.Header {
104+
orderedReqHeaders = append(orderedReqHeaders, k)
105+
}
106+
sort.Strings(orderedReqHeaders)
107+
for _, k := range orderedReqHeaders {
108+
for _, v := range request.Header[k] {
109+
fmt.Fprintf(os.Stderr, "REQHEADER: %s: %s\n", k, v)
110+
}
111+
}
112+
metrics.RequestSent = time.Now()
113+
resp, err := client.Do(request)
114+
metrics.ResponseReceived = time.Now()
115+
if err != nil {
116+
fatalf("cannot make request: %v", err)
117+
}
118+
gotFirstByte := false
119+
scanner := bufio.NewScanner(resp.Body)
120+
for scanner.Scan() {
121+
if !gotFirstByte {
122+
metrics.FirstByteRead = time.Now()
123+
gotFirstByte = true
124+
}
125+
if scanner.Text() == "" {
126+
continue
127+
}
128+
fmt.Printf("BODY: %q\n", strings.TrimPrefix(scanner.Text(), "data: "))
129+
}
130+
// Check for any errors that may have occurred during scanning
131+
if err := scanner.Err(); err != nil {
132+
fatalf("error reading response body: %v", err)
133+
}
134+
metrics.LastByteRead = time.Now()
135+
if err := resp.Body.Close(); err != nil {
136+
fatalf("cannot close response body: %v", err)
137+
}
138+
orderedRespHeaders := make([]string, 0, len(resp.Header))
139+
for k := range resp.Header {
140+
orderedRespHeaders = append(orderedRespHeaders, k)
141+
}
142+
sort.Strings(orderedRespHeaders)
143+
for _, k := range orderedRespHeaders {
144+
for _, v := range resp.Header[k] {
145+
fmt.Fprintf(os.Stderr, "RESPHEADER: %s: %s\n", k, v)
146+
}
147+
}
148+
metricsBytes, err := json.Marshal(&metrics)
149+
if err != nil {
150+
fatalf("cannot marshal metrics: %v", err)
151+
}
152+
fmt.Fprintf(os.Stderr, "STATS: %s\n", string(metricsBytes))
153+
}
154+
155+
// LINT.ThenChange(../../ollama/client/client.go)

test/gpu/BUILD

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,23 @@ go_test(
5252
],
5353
)
5454

55+
go_test(
56+
name = "sglang_test",
57+
srcs = ["sglang_test.go"],
58+
# runsc is needed to invalidate the bazel cache in case of any code changes.
59+
data = ["//runsc"],
60+
tags = [
61+
"manual",
62+
"noguitar",
63+
"notap",
64+
],
65+
visibility = ["//:sandbox"],
66+
deps = [
67+
"//pkg/test/dockerutil",
68+
"//test/gpu/sglang",
69+
],
70+
)
71+
5572
go_test(
5673
name = "sr_test",
5774
srcs = ["sr_test.go"],

test/gpu/sglang/BUILD

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
load("//tools:defs.bzl", "go_library")
2+
3+
package(
4+
default_applicable_licenses = ["//:license"],
5+
licenses = ["notice"],
6+
)
7+
8+
go_library(
9+
name = "sglang",
10+
testonly = 1,
11+
srcs = ["sglang.go"],
12+
stateify = False, # Does not support some generics methods.
13+
visibility = ["//:sandbox"],
14+
deps = [
15+
"//pkg/test/dockerutil",
16+
"//pkg/test/testutil",
17+
],
18+
)

0 commit comments

Comments
 (0)