@@ -424,86 +424,83 @@ The following sections provide details on how we achieved our performance and in
424424
425425Results were obtained by running ` scripts/inference_benchmark.sh --gpu Ampere` on NVIDIA A100 (40G).
426426
427- > NOTE: We observed a few regression cases against the performance with TRT-8.4.3 with small batch sizes. The regression issues are under investigation.
428-
429427# #### BERT Base
430428
431429| Sequence Length | Batch Size | INT8 Latency (ms) | | | FP16 Latency (ms) | | |
432430| -----------------| ------------| -----------------| -----------------| ---------| -----------------| -----------------| ---------|
433431| | | 95th Percentile | 99th Percentile | Average | 95th Percentile | 99th Percentile | Average |
434- | 128 | 1 | 0.55 | 0.7 | 0.55 | 0.8 | 0.8 | 0.64 |
435- | 128 | 2 | 0.77 | 0.77 | 0.61 | 0.93 | 0.94 | 0.75 |
436- | 128 | 4 | 0.83 | 0.83 | 0.76 | 0.95 | 1.22 | 0.95 |
437- | 128 | 8 | 1.21 | 1.22 | 0.96 | 1.35 | 1.36 | 1.36 |
438- | 128 | 12 | 1.23 | 1.43 | 1.23 | 1.85 | 1.86 | 1.84 |
439- | 128 | 16 | 1.42 | 1.82 | 1.42 | 2.13 | 2.14 | 2.12 |
440- | 128 | 24 | 1.86 | 1.88 | 1.86 | 3.18 | 3.24 | 3.17 |
441- | 128 | 32 | 2.3 | 2.31 | 2.3 | 4.1 | 4.14 | 4.06 |
442- | 128 | 64 | 4.26 | 4.26 | 4.23 | 8.08 | 8.15 | 8.05 |
443- | 128 | 128 | 8.24 | 8.3 | 8.22 | 16.07 | 16.1 | 15.91 |
444- | 384 | 1 | 1.14 | 1.15 | 1.14 | 1.28 | 1.64 | 1.28 |
445- | 384 | 2 | 1.33 | 1.7 | 1.33 | 1.59 | 1.59 | 1.59 |
446- | 384 | 4 | 1.69 | 1.7 | 1.69 | 2.25 | 2.27 | 2.25 |
447- | 384 | 8 | 2.25 | 2.25 | 2.24 | 3.51 | 3.52 | 3.48 |
448- | 384 | 12 | 3.38 | 3.39 | 3.38 | 4.99 | 5.08 | 4.97 |
449- | 384 | 16 | 4.16 | 4.17 | 4.15 | 6.73 | 6.73 | 6.64 |
450- | 384 | 24 | 5.86 | 5.87 | 5.86 | 9.81 | 9.82 | 9.69 |
451- | 384 | 32 | 7.82 | 7.83 | 7.81 | 13.45 | 13.53 | 13.39 |
452- | 384 | 64 | 15.25 | 15.35 | 15.21 | 25.98 | 26.05 | 25.8 |
453- | 384 | 128 | 29.8 | 29.82 | 29.53 | 50.98 | 51.1 | 50.59 |
432+ | 128 | 1 | 0.55 | 0.70 | 0.55 | 0.61 | 0.78 | 0.62 |
433+ | 128 | 2 | 0.78 | 0.78 | 0.62 | 0.72 | 0.92 | 0.73 |
434+ | 128 | 4 | 0.74 | 0.93 | 0.74 | 0.93 | 0.93 | 0.93 |
435+ | 128 | 8 | 0.95 | 0.95 | 0.94 | 1.31 | 1.31 | 1.31 |
436+ | 128 | 12 | 1.21 | 1.53 | 1.22 | 1.73 | 1.77 | 1.72 |
437+ | 128 | 16 | 1.34 | 1.34 | 1.34 | 2.09 | 2.10 | 2.07 |
438+ | 128 | 24 | 1.84 | 1.84 | 1.84 | 3.07 | 3.09 | 3.03 |
439+ | 128 | 32 | 2.27 | 2.27 | 2.26 | 3.93 | 3.94 | 3.90 |
440+ | 128 | 64 | 4.21 | 4.25 | 4.18 | 7.79 | 7.80 | 7.72 |
441+ | 128 | 128 | 8.25 | 8.26 | 8.14 | 15.41 | 15.42 | 15.27 |
442+ | 384 | 1 | 1.14 | 1.46 | 1.14 | 1.26 | 1.26 | 1.25 |
443+ | 384 | 2 | 1.31 | 1.31 | 1.31 | 1.55 | 1.55 | 1.55 |
444+ | 384 | 4 | 1.67 | 1.67 | 1.67 | 2.13 | 2.17 | 2.13 |
445+ | 384 | 8 | 2.22 | 2.22 | 2.22 | 3.36 | 3.39 | 3.35 |
446+ | 384 | 12 | 3.34 | 3.35 | 3.34 | 4.84 | 4.88 | 4.79 |
447+ | 384 | 16 | 4.04 | 4.04 | 4.04 | 6.40 | 6.46 | 6.39 |
448+ | 384 | 24 | 5.76 | 5.76 | 5.74 | 9.54 | 9.66 | 9.44 |
449+ | 384 | 32 | 7.71 | 7.71 | 7.70 | 13.02 | 13.03 | 12.90 |
450+ | 384 | 64 | 15.01 | 15.01 | 14.91 | 25.25 | 25.26 | 24.89 |
451+ | 384 | 128 | 29.26 | 29.26 | 29.13 | 49.12 | 49.25 | 48.81 |
454452
455453# #### BERT Large
456454
457455| Sequence Length | Batch Size | INT8 Latency (ms) | | | FP16 Latency (ms) | | |
458456| -----------------| ------------| -----------------| -----------------| ---------| -----------------| -----------------| ---------|
459457| | | 95th Percentile | 99th Percentile | Average | 95th Percentile | 99th Percentile | Average |
460- | 128 | 1 | 1.25 | 1.57 | 1.25 | 1.67 | 1.7 | 1.67 |
461- | 128 | 2 | 1.44 | 1.45 | 1.44 | 1.88 | 1.9 | 1.88 |
462- | 128 | 4 | 2.0 | 2.01 | 2.0 | 2.72 | 2.73 | 2.71 |
463- | 128 | 8 | 2.73 | 2.74 | 2.73 | 4.4 | 4.41 | 4.38 |
464- | 128 | 12 | 3.44 | 3.45 | 3.44 | 5.25 | 5.25 | 5.2 |
465- | 128 | 16 | 4.07 | 4.08 | 4.06 | 7.37 | 7.39 | 7.32 |
466- | 128 | 24 | 5.31 | 5.32 | 5.3 | 10.02 | 10.1 | 9.97 |
467- | 128 | 32 | 7.14 | 7.15 | 7.09 | 13.77 | 13.8 | 13.68 |
468- | 128 | 64 | 13.19 | 13.2 | 13.06 | 26.03 | 26.05 | 25.77 |
469- | 128 | 128 | 25.62 | 25.65 | 25.39 | 51.59 | 51.72 | 51.2 |
470- | 384 | 1 | 2.84 | 2.85 | 2.84 | 3.06 | 3.08 | 3.06 |
471- | 384 | 2 | 3.05 | 3.06 | 3.05 | 4.08 | 4.31 | 4.08 |
472- | 384 | 4 | 4.37 | 4.38 | 4.36 | 5.85 | 5.87 | 5.85 |
473- | 384 | 8 | 7.24 | 7.25 | 7.22 | 11.46 | 11.55 | 11.41 |
474- | 384 | 12 | 9.35 | 9.38 | 9.34 | 16.15 | 16.15 | 16.0 |
475- | 384 | 16 | 12.38 | 12.4 | 12.37 | 22.06 | 22.12 | 21.86 |
476- | 384 | 24 | 17.93 | 18.1 | 17.82 | 32.42 | 32.54 | 32.17 |
477- | 384 | 32 | 23.29 | 23.3 | 23.13 | 42.78 | 42.9 | 42.52 |
478- | 384 | 64 | 45.6 | 45.62 | 45.29 | 83.5 | 83.68 | 82.86 |
479- | 384 | 128 | 89.73 | 89.81 | 89.04 | 163.72 | 164.23 | 162.67 |
458+ | 128 | 1 | 1.24 | 1.25 | 1.24 | 1.58 | 1.60 | 1.58 |
459+ | 128 | 2 | 1.44 | 1.44 | 1.44 | 1.83 | 1.84 | 1.82 |
460+ | 128 | 4 | 1.78 | 1.79 | 1.78 | 2.54 | 2.54 | 2.53 |
461+ | 128 | 8 | 2.82 | 2.82 | 2.81 | 3.98 | 4.00 | 3.97 |
462+ | 128 | 12 | 3.11 | 3.11 | 3.11 | 5.08 | 5.12 | 5.04 |
463+ | 128 | 16 | 4.06 | 4.07 | 4.06 | 6.96 | 6.96 | 6.91 |
464+ | 128 | 24 | 5.31 | 5.32 | 5.31 | 9.69 | 9.70 | 9.63 |
465+ | 128 | 32 | 7.07 | 7.07 | 7.02 | 13.11 | 13.12 | 12.93 |
466+ | 128 | 64 | 12.97 | 13.08 | 12.89 | 24.94 | 25.22 | 24.74 |
467+ | 128 | 128 | 25.48 | 25.72 | 25.28 | 49.30 | 49.46 | 49.18 |
468+ | 384 | 1 | 2.59 | 2.59 | 2.59 | 2.98 | 2.99 | 2.98 |
469+ | 384 | 2 | 3.04 | 3.05 | 3.04 | 4.01 | 4.03 | 4.00 |
470+ | 384 | 4 | 4.03 | 4.04 | 4.03 | 5.79 | 5.79 | 5.73 |
471+ | 384 | 8 | 7.20 | 7.22 | 7.20 | 11.11 | 11.14 | 10.99 |
472+ | 384 | 12 | 9.19 | 9.20 | 9.19 | 15.47 | 15.63 | 15.39 |
473+ | 384 | 16 | 12.36 | 12.38 | 12.35 | 21.18 | 21.19 | 21.00 |
474+ | 384 | 24 | 17.77 | 17.95 | 17.68 | 31.41 | 31.42 | 30.90 |
475+ | 384 | 32 | 23.36 | 23.37 | 23.20 | 41.40 | 41.43 | 40.90 |
476+ | 384 | 64 | 45.60 | 45.61 | 45.26 | 80.07 | 80.25 | 79.50 |
477+ | 384 | 128 | 89.25 | 89.30 | 88.57 | 157.38 | 157.76 | 156.31 |
480478
481479# #### Megatron Large with Sparsity
482480
483481| Sequence Length | Batch Size | INT8 QAT Latency (ms) | | |
484482| -----------------| ------------| -----------------| -----------------| ---------|
485483| | | 95th Percentile | 99th Percentile | Average |
486- | 128 | 1 | 1.14 | 1.44 | 1.14 |
487- | 128 | 2 | 1.45 | 1.46 | 1.45 |
488- | 128 | 4 | 1.8 | 1.8 | 1.8 |
489- | 128 | 8 | 2.57 | 2.57 | 2.56 |
490- | 128 | 12 | 3.16 | 3.17 | 3.16 |
491- | 128 | 16 | 4.08 | 4.09 | 4.08 |
492- | 128 | 24 | 5.07 | 5.08 | 5.07 |
493- | 128 | 32 | 6.93 | 6.95 | 6.88 |
494- | 128 | 64 | 11.73 | 11.74 | 11.71 |
495- | 128 | 128 | 21.47 | 21.48 | 21.28 |
496- | 384 | 1 | 1.72 | 1.73 | 1.72 |
497- | 384 | 2 | 2.26 | 2.27 | 2.26 |
498- | 384 | 4 | 3.68 | 3.69 | 3.68 |
499- | 384 | 8 | 5.92 | 5.93 | 5.91 |
500- | 384 | 12 | 8.27 | 8.28 | 8.26 |
501- | 384 | 16 | 10.46 | 10.47 | 10.45 |
502- | 384 | 24 | 14.77 | 14.78 | 14.75 |
503- | 384 | 32 | 18.82 | 18.83 | 18.8 |
504- | 384 | 64 | 36.16 | 36.19 | 35.88 |
505- | 384 | 128 | 69.07 | 69.32 | 68.61 |
506-
484+ | 128 | 1 | 1.29 | 1.54 | 1.29 |
485+ | 128 | 2 | 1.35 | 1.71 | 1.35 |
486+ | 128 | 4 | 1.79 | 2.14 | 1.79 |
487+ | 128 | 8 | 2.54 | 2.54 | 2.53 |
488+ | 128 | 12 | 2.93 | 2.93 | 2.92 |
489+ | 128 | 16 | 3.95 | 3.95 | 3.94 |
490+ | 128 | 24 | 4.93 | 4.94 | 4.92 |
491+ | 128 | 32 | 7.13 | 7.14 | 7.12 |
492+ | 128 | 64 | 11.64 | 11.64 | 11.62 |
493+ | 128 | 128 | 21.29 | 21.46 | 21.16 |
494+ | 384 | 1 | 1.71 | 1.72 | 1.71 |
495+ | 384 | 2 | 2.24 | 2.25 | 2.23 |
496+ | 384 | 4 | 3.43 | 3.44 | 3.43 |
497+ | 384 | 8 | 5.77 | 5.77 | 5.76 |
498+ | 384 | 12 | 8.39 | 8.39 | 8.37 |
499+ | 384 | 16 | 10.38 | 10.39 | 10.36 |
500+ | 384 | 24 | 14.69 | 14.70 | 14.67 |
501+ | 384 | 32 | 18.68 | 18.82 | 18.66 |
502+ | 384 | 64 | 35.88 | 35.89 | 35.70 |
503+ | 384 | 128 | 68.71 | 68.73 | 68.16 |
507504
508505# ### Inference performance: NVIDIA A30
509506
@@ -514,76 +511,76 @@ Results were obtained by running `scripts/inference_benchmark.sh --gpu Ampere` o
514511| Sequence Length | Batch Size | INT8 Latency (ms) | | | FP16 Latency (ms) | | |
515512| -----------------| ------------| -----------------| -----------------| ---------| -----------------| -----------------| ---------|
516513| | | 95th Percentile | 99th Percentile | Average | 95th Percentile | 99th Percentile | Average |
517- | 128 | 1 | 0.59 | 0.89 | 0.6 | 1.19 | 1.19 | 0.82 |
518- | 128 | 2 | 0.75 | 1.13 | 0.75 | 1.01 | 1.01 | 1.01 |
519- | 128 | 4 | 1.04 | 1.04 | 1.04 | 1.52 | 1.53 | 1.51 |
520- | 128 | 8 | 1.47 | 1.48 | 1.45 | 2.48 | 2.5 | 2.48 |
521- | 128 | 12 | 1.97 | 1.97 | 1.94 | 3.59 | 3.66 | 3.54 |
522- | 128 | 16 | 2.42 | 2.43 | 2.4 | 4.49 | 4.51 | 4.44 |
523- | 128 | 24 | 3.58 | 3.61 | 3.52 | 6.89 | 7.01 | 6.82 |
524- | 128 | 32 | 4.5 | 4.55 | 4.49 | 8.76 | 8.79 | 8.67 |
525- | 128 | 64 | 8.74 | 8.82 | 8.68 | 17.4 | 17.41 | 17.23 |
526- | 128 | 128 | 17.01 | 17.2 | 16.88 | 34.0 | 34.32 | 33.86 |
527- | 384 | 1 | 1.31 | 1.7 | 1.32 | 1.66 | 1.67 | 1.66 |
528- | 384 | 2 | 1.66 | 1.66 | 1.66 | 2.39 | 2.4 | 2.36 |
529- | 384 | 4 | 2.3 | 2.31 | 2.29 | 3.9 | 3.96 | 3.87 |
530- | 384 | 8 | 4.34 | 4.35 | 4.28 | 7.62 | 7.67 | 7.5 |
531- | 384 | 12 | 6.17 | 6.24 | 6.11 | 10.68 | 10.76 | 10.59 |
532- | 384 | 16 | 8.25 | 8.27 | 8.18 | 14.58 | 14.67 | 14.53 |
533- | 384 | 24 | 11.96 | 12.04 | 11.93 | 21.5 | 21.53 | 21.26 |
534- | 384 | 32 | 15.76 | 15.77 | 15.64 | 28.35 | 28.5 | 28.07 |
535- | 384 | 64 | 31.09 | 31.34 | 30.93 | 54.91 | 55.46 | 54.69 |
536- | 384 | 128 | 61.67 | 62.0 | 60.93 | 108.85 | 109.18 | 108.18 |
514+ | 128 | 1 | 0.91 | 0.92 | 0.62 | 1.18 | 1.18 | 0.82 |
515+ | 128 | 2 | 1.13 | 1.13 | 0.77 | 1.07 | 1.07 | 0.97 |
516+ | 128 | 4 | 1.04 | 1.57 | 1.05 | 1.46 | 2.11 | 1.44 |
517+ | 128 | 8 | 1.46 | 1.49 | 1.44 | 2.41 | 2.41 | 2.40 |
518+ | 128 | 12 | 1.94 | 1.94 | 1.94 | 3.42 | 3.45 | 3.40 |
519+ | 128 | 16 | 2.40 | 2.46 | 2.37 | 4.33 | 4.41 | 4.28 |
520+ | 128 | 24 | 3.54 | 3.59 | 3.48 | 6.59 | 6.60 | 6.50 |
521+ | 128 | 32 | 4.46 | 4.50 | 4.43 | 8.49 | 8.55 | 8.37 |
522+ | 128 | 64 | 8.68 | 8.75 | 8.57 | 16.65 | 16.67 | 16.47 |
523+ | 128 | 128 | 16.81 | 16.83 | 16.63 | 32.40 | 32.52 | 32.04 |
524+ | 384 | 1 | 1.31 | 1.32 | 1.31 | 1.62 | 1.64 | 1.63 |
525+ | 384 | 2 | 1.66 | 1.66 | 1.66 | 2.27 | 2.27 | 2.26 |
526+ | 384 | 4 | 2.32 | 2.32 | 2.30 | 3.79 | 3.87 | 3.72 |
527+ | 384 | 8 | 4.26 | 4.26 | 4.24 | 7.26 | 7.31 | 7.17 |
528+ | 384 | 12 | 6.10 | 6.13 | 6.04 | 10.35 | 10.43 | 10.23 |
529+ | 384 | 16 | 8.17 | 8.18 | 8.08 | 13.93 | 14.05 | 13.85 |
530+ | 384 | 24 | 11.91 | 11.98 | 11.82 | 20.46 | 20.57 | 20.25 |
531+ | 384 | 32 | 15.50 | 15.64 | 15.48 | 27.06 | 27.17 | 26.81 |
532+ | 384 | 64 | 31.03 | 31.18 | 30.63 | 52.44 | 52.48 | 52.05 |
533+ | 384 | 128 | 61.10 | 61.13 | 60.50 | 103.38 | 103.64 | 102.87 |
537534
538535# #### BERT Large
539536
540537| Sequence Length | Batch Size | INT8 Latency (ms) | | | FP16 Latency (ms) | | |
541538| -----------------| ------------| -----------------| -----------------| ---------| -----------------| -----------------| ---------|
542539| | | 95th Percentile | 99th Percentile | Average | 95th Percentile | 99th Percentile | Average |
543- | 128 | 1 | 1.47 | 1.47 | 1.47 | 2.02 | 2.04 | 2.02 |
544- | 128 | 2 | 1.83 | 1.84 | 1.83 | 2.86 | 2.86 | 2.84 |
545- | 128 | 4 | 2.71 | 2.71 | 2.69 | 4.77 | 4.8 | 4.69 |
546- | 128 | 8 | 4.33 | 4.37 | 4.29 | 8.47 | 8.53 | 8.42 |
547- | 128 | 12 | 5.71 | 5.76 | 5.62 | 10.94 | 11.02 | 10.84 |
548- | 128 | 16 | 7.67 | 7.76 | 7.64 | 15.08 | 15.17 | 15.06 |
549- | 128 | 24 | 10.63 | 10.68 | 10.51 | 21.32 | 21.38 | 21.12 |
550- | 128 | 32 | 14.19 | 14.26 | 14.06 | 29.42 | 29.45 | 29.04 |
551- | 128 | 64 | 26.95 | 26.97 | 26.69 | 56.09 | 56.38 | 55.71 |
552- | 128 | 128 | 52.86 | 52.98 | 52.32 | 109.89 | 110.09 | 109.01 |
553- | 384 | 1 | 3.34 | 3.34 | 3.33 | 4.56 | 4.59 | 4.53 |
554- | 384 | 2 | 4.24 | 4.25 | 4.21 | 6.82 | 6.86 | 6.75 |
555- | 384 | 4 | 7.33 | 7.33 | 7.25 | 12.33 | 12.34 | 12.21 |
556- | 384 | 8 | 12.92 | 13.0 | 12.88 | 23.39 | 23.45 | 23.17 |
557- | 384 | 12 | 18.75 | 18.88 | 18.6 | 34.75 | 35.07 | 34.59 |
558- | 384 | 16 | 24.32 | 24.45 | 24.13 | 45.67 | 45.79 | 45.26 |
559- | 384 | 24 | 35.99 | 36.3 | 35.66 | 67.12 | 67.72 | 66.85 |
560- | 384 | 32 | 47.53 | 47.56 | 47.04 | 88.88 | 89.31 | 88.39 |
561- | 384 | 64 | 92.13 | 92.64 | 91.92 | 175.91 | 176.4 | 174.94 |
562- | 384 | 128 | 181.87 | 182.29 | 180.87 | 346.39 | 346.88 | 345.32 |
540+ | 128 | 1 | 1.49 | 1.49 | 1.48 | 2.03 | 2.03 | 2.02 |
541+ | 128 | 2 | 1.83 | 1.84 | 1.82 | 2.79 | 2.79 | 2.76 |
542+ | 128 | 4 | 2.70 | 2.70 | 2.68 | 4.35 | 4.40 | 4.31 |
543+ | 128 | 8 | 4.50 | 4.52 | 4.47 | 8.07 | 8.17 | 8.01 |
544+ | 128 | 12 | 5.67 | 5.69 | 5.62 | 10.67 | 10.75 | 10.53 |
545+ | 128 | 16 | 8.08 | 8.13 | 7.95 | 14.86 | 14.86 | 14.72 |
546+ | 128 | 24 | 10.59 | 10.60 | 10.47 | 20.71 | 20.73 | 20.47 |
547+ | 128 | 32 | 14.16 | 14.21 | 14.03 | 28.21 | 28.37 | 27.98 |
548+ | 128 | 64 | 26.77 | 26.95 | 26.66 | 54.03 | 54.33 | 53.43 |
549+ | 128 | 128 | 52.65 | 52.78 | 52.12 | 106.15 | 106.75 | 105.37 |
550+ | 384 | 1 | 3.20 | 3.21 | 3.20 | 4.19 | 4.19 | 4.17 |
551+ | 384 | 2 | 4.26 | 4.26 | 4.22 | 6.61 | 6.63 | 6.56 |
552+ | 384 | 4 | 7.56 | 7.64 | 7.55 | 12.04 | 12.05 | 11.93 |
553+ | 384 | 8 | 13.01 | 13.07 | 12.84 | 22.81 | 22.89 | 22.56 |
554+ | 384 | 12 | 18.73 | 18.82 | 18.56 | 33.47 | 33.62 | 33.43 |
555+ | 384 | 16 | 24.41 | 24.51 | 24.16 | 44.45 | 44.47 | 44.03 |
556+ | 384 | 24 | 35.83 | 36.19 | 35.53 | 65.53 | 65.79 | 64.91 |
557+ | 384 | 32 | 47.34 | 47.52 | 46.86 | 85.92 | 86.16 | 85.15 |
558+ | 384 | 64 | 92.68 | 93.00 | 91.86 | 169.51 | 170.03 | 168.46 |
559+ | 384 | 128 | 181.91 | 182.29 | 181.02 | 334.01 | 334.51 | 332.81 |
563560
564561# #### Megatron Large with Sparsity
565562
566563| Sequence Length | Batch Size | INT8 QAT Latency (ms) | | |
567564| -----------------| ------------| -----------------| -----------------| ---------|
568565| | | 95th Percentile | 99th Percentile | Average |
569- | 128 | 1 | 1.42 | 1.42 | 1.42 |
566+ | 128 | 1 | 1.46 | 1.47 | 1.45 |
570567| 128 | 2 | 1.88 | 1.88 | 1.87 |
571- | 128 | 4 | 2.71 | 2.72 | 2.7 |
572- | 128 | 8 | 4.16 | 4.17 | 4.16 |
573- | 128 | 12 | 5.3 | 5.34 | 5.27 |
574- | 128 | 16 | 7.44 | 7.5 | 7.36 |
575- | 128 | 24 | 10.01 | 10.05 | 9.91 |
576- | 128 | 32 | 13.14 | 13.15 | 13.1 |
577- | 128 | 64 | 24.61 | 24.73 | 24.46 |
578- | 128 | 128 | 46.66 | 46.83 | 46.58 |
579- | 384 | 1 | 2.37 | 2.38 | 2.37 |
580- | 384 | 2 | 3.87 | 3.88 | 3.86 |
581- | 384 | 4 | 6.14 | 6.17 | 6.08 |
582- | 384 | 8 | 11.61 | 11.64 | 11.54 |
583- | 384 | 12 | 16.04 | 16.11 | 15.95 |
584- | 384 | 16 | 21.24 | 21.33 | 21.1 |
585- | 384 | 24 | 30.48 | 30.61 | 30.23 |
586- | 384 | 32 | 40.79 | 40.97 | 40.46 |
587- | 384 | 64 | 78.04 | 78.41 | 77.51 |
588- | 384 | 128 | 151.33 | 151.62 | 150.76 |
568+ | 128 | 4 | 2.74 | 2.74 | 2.73 |
569+ | 128 | 8 | 4.11 | 4.12 | 4.10 |
570+ | 128 | 12 | 5.29 | 5.35 | 5.25 |
571+ | 128 | 16 | 7.52 | 7.57 | 7.50 |
572+ | 128 | 24 | 10.11 | 10.19 | 10.06 |
573+ | 128 | 32 | 12.85 | 12.90 | 12.80 |
574+ | 128 | 64 | 24.50 | 24.52 | 24.26 |
575+ | 128 | 128 | 46.24 | 46.57 | 45.92 |
576+ | 384 | 1 | 2.35 | 2.36 | 2.35 |
577+ | 384 | 2 | 3.90 | 3.91 | 3.89 |
578+ | 384 | 4 | 6.14 | 6.15 | 6.08 |
579+ | 384 | 8 | 11.74 | 11.76 | 11.64 |
580+ | 384 | 12 | 15.86 | 15.88 | 15.74 |
581+ | 384 | 16 | 21.21 | 21.27 | 21.05 |
582+ | 384 | 24 | 30.03 | 30.04 | 29.89 |
583+ | 384 | 32 | 40.20 | 40.22 | 40.05 |
584+ | 384 | 64 | 76.82 | 77.11 | 76.52 |
585+ | 384 | 128 | 149.54 | 149.80 | 148.78 |
589586
0 commit comments