File tree Expand file tree Collapse file tree 5 files changed +587
-1
lines changed Expand file tree Collapse file tree 5 files changed +587
-1
lines changed Original file line number Diff line number Diff line change @@ -547,6 +547,43 @@ func (r *Response) NumTokens() int {
547547 return len (r .data )
548548}
549549
550+ // TimeToFirstToken returns the time it took between the request starting
551+ // and the first token being received by the client.
552+ func (r * Response ) TimeToFirstToken () time.Duration {
553+ if ! r .Done () {
554+ return - 1
555+ }
556+ return r .metrics .FirstByteRead .Sub (r .metrics .RequestSent )
557+ }
558+
559+ // TimeToLastToken returns the time it took between the request starting
560+ // and the last token being received by the client.
561+ func (r * Response ) TimeToLastToken () time.Duration {
562+ if ! r .Done () {
563+ return - 1
564+ }
565+ return r .metrics .LastByteRead .Sub (r .metrics .RequestSent )
566+ }
567+
568+ // OutputTokensPerSecond computes the average number of output tokens
569+ // generated per second.
570+ func (r * Response ) OutputTokensPerSecond () float64 {
571+ if ! r .Done () {
572+ return - 1
573+ }
574+ return float64 (len (r .data )- 1 ) / r .data [len (r .data )- 1 ].MetaInfo .E2ELatency
575+ }
576+
577+ // E2ELatency returns the elapsed time between when start_time was recorded and
578+ // the current moment in seconds.
579+ // See https://github.com/sgl-project/sglang/blob/4a2768a86b2905b9b7f19d415261b9d4af639e19/sgl-router/src/routers/grpc/regular/streaming.rs#L904
580+ func (r * Response ) E2ELatency () float64 {
581+ if len (r .data ) == 0 {
582+ return 0
583+ }
584+ return r .data [len (r .data )- 1 ].MetaInfo .E2ELatency
585+ }
586+
550587// String returns the response text, if it is done.
551588func (r * Response ) String () string {
552589 if len (r .data ) == 0 {
Original file line number Diff line number Diff line change @@ -397,6 +397,46 @@ go_test(
397397 ],
398398)
399399
400+ go_library (
401+ name = "sglang" ,
402+ testonly = True ,
403+ srcs = ["sglang.go" ],
404+ embedsrcs = [
405+ "//test/kubernetes/benchmarks/resources:files" , # keep
406+ ],
407+ nogo = False ,
408+ deps = [
409+ "//pkg/sync" ,
410+ "//test/gpu/sglang" ,
411+ "//test/kubernetes" ,
412+ "//test/kubernetes/benchmarks/profiling" ,
413+ "//test/kubernetes/benchmetric" ,
414+ "//test/kubernetes/k8sctx" ,
415+ "//test/kubernetes/testcluster" ,
416+ "@io_k8s_api//core/v1:go_default_library" ,
417+ "@io_k8s_apimachinery//pkg/api/resource:go_default_library" ,
418+ "@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library" ,
419+ "@io_k8s_apimachinery//pkg/util/intstr:go_default_library" ,
420+ ],
421+ )
422+
423+ go_test (
424+ name = "sglang_test" ,
425+ srcs = ["sglang_test.go" ],
426+ library = ":sglang" ,
427+ nogo = False ,
428+ tags = [
429+ "local" ,
430+ "noguitar" ,
431+ "notap" ,
432+ ],
433+ deps = [
434+ "//test/kubernetes/k8sctx" ,
435+ "//test/kubernetes/k8sctx/kubectlctx" ,
436+ "//test/kubernetes/testcluster" ,
437+ ],
438+ )
439+
400440go_library (
401441 name = "stablediffusion" ,
402442 testonly = True ,
You can’t perform that action at this time.
0 commit comments