Skip to content

Commit 19abc45

Browse files
AnilAltinaygvisor-bot
authored andcommitted
SGLang GKE test
PiperOrigin-RevId: 832193475
1 parent fb338e7 commit 19abc45

File tree

5 files changed

+587
-1
lines changed

5 files changed

+587
-1
lines changed

test/gpu/sglang/sglang.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,43 @@ func (r *Response) NumTokens() int {
547547
return len(r.data)
548548
}
549549

550+
// TimeToFirstToken returns the time it took between the request starting
551+
// and the first token being received by the client.
552+
func (r *Response) TimeToFirstToken() time.Duration {
553+
if !r.Done() {
554+
return -1
555+
}
556+
return r.metrics.FirstByteRead.Sub(r.metrics.RequestSent)
557+
}
558+
559+
// TimeToLastToken returns the time it took between the request starting
560+
// and the last token being received by the client.
561+
func (r *Response) TimeToLastToken() time.Duration {
562+
if !r.Done() {
563+
return -1
564+
}
565+
return r.metrics.LastByteRead.Sub(r.metrics.RequestSent)
566+
}
567+
568+
// OutputTokensPerSecond computes the average number of output tokens
569+
// generated per second.
570+
func (r *Response) OutputTokensPerSecond() float64 {
571+
if !r.Done() {
572+
return -1
573+
}
574+
return float64(len(r.data)-1) / r.data[len(r.data)-1].MetaInfo.E2ELatency
575+
}
576+
577+
// E2ELatency returns the elapsed time between when start_time was recorded and
578+
// the current moment in seconds.
579+
// See https://github.com/sgl-project/sglang/blob/4a2768a86b2905b9b7f19d415261b9d4af639e19/sgl-router/src/routers/grpc/regular/streaming.rs#L904
580+
func (r *Response) E2ELatency() float64 {
581+
if len(r.data) == 0 {
582+
return 0
583+
}
584+
return r.data[len(r.data)-1].MetaInfo.E2ELatency
585+
}
586+
550587
// String returns the response text, if it is done.
551588
func (r *Response) String() string {
552589
if len(r.data) == 0 {

test/kubernetes/benchmarks/BUILD

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,46 @@ go_test(
397397
],
398398
)
399399

400+
go_library(
401+
name = "sglang",
402+
testonly = True,
403+
srcs = ["sglang.go"],
404+
embedsrcs = [
405+
"//test/kubernetes/benchmarks/resources:files", # keep
406+
],
407+
nogo = False,
408+
deps = [
409+
"//pkg/sync",
410+
"//test/gpu/sglang",
411+
"//test/kubernetes",
412+
"//test/kubernetes/benchmarks/profiling",
413+
"//test/kubernetes/benchmetric",
414+
"//test/kubernetes/k8sctx",
415+
"//test/kubernetes/testcluster",
416+
"@io_k8s_api//core/v1:go_default_library",
417+
"@io_k8s_apimachinery//pkg/api/resource:go_default_library",
418+
"@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
419+
"@io_k8s_apimachinery//pkg/util/intstr:go_default_library",
420+
],
421+
)
422+
423+
go_test(
424+
name = "sglang_test",
425+
srcs = ["sglang_test.go"],
426+
library = ":sglang",
427+
nogo = False,
428+
tags = [
429+
"local",
430+
"noguitar",
431+
"notap",
432+
],
433+
deps = [
434+
"//test/kubernetes/k8sctx",
435+
"//test/kubernetes/k8sctx/kubectlctx",
436+
"//test/kubernetes/testcluster",
437+
],
438+
)
439+
400440
go_library(
401441
name = "stablediffusion",
402442
testonly = True,

0 commit comments

Comments
 (0)