Skip to content

Commit 0e417ef

Browse files
authored
[AMD] NFC: Explain waves_per_eu with comments (#5426)
Add a comment to clarify the usage of waves_per_eu.
1 parent ca5c797 commit 0e417ef

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

third_party/amd/backend/compiler.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,13 @@ def make_llir(src, metadata, options):
345345
# The public kernel should be kernel 0.
346346
fns[0].set_calling_conv(amd.CALLING_CONV_AMDGPU_KERNEL)
347347
fns[0].add_fn_attr("amdgpu-flat-work-group-size", f"1,{options.num_warps*options.warp_size}")
348+
# LLVM AMDGPU backend supports the attribute "amdgpu-waves-per-eu"="<min>[, <max>]".
349+
# This attribute may be attached to a kernel function definition and is an optimization hint.
350+
# <min> parameter specifies the requested minimum number of waves per EU, and optional <max> parameter
351+
# specifies the requested maximum number of waves per EU (must be greater than <min> if specified).
352+
# If <max> is omitted, then there is no restriction on the maximum number of waves per EU other than
353+
# the one dictated by the hardware for which the kernel is compiled. Passing 0, 0 as <min>, <max>
354+
# implies the default behavior (no limits).
348355
fns[0].add_fn_attr("amdgpu-waves-per-eu", f"{options.waves_per_eu}")
349356
denormal_mode = "preserve-sign" if options.allow_flush_denorm else "ieee"
350357
fns[0].add_fn_attr("denormal-fp-math-f32", denormal_mode)

0 commit comments

Comments
 (0)