Skip to content

Commit 3bfde11

Browse files
performance: update tg dispatch size heuristic
If workgroup dimension x is 1, use y to ajust for divisible by dispatch size. Related-To: NEO-7927, GSD-5417 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com> Source: a463a2e
1 parent e88d48f commit 3bfde11

File tree

2 files changed

+41
-3
lines changed

2 files changed

+41
-3
lines changed

shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,14 @@ void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCR
6565
} else {
6666
tgDispatchSizeSelected = 2;
6767
}
68-
if (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1) {
68+
if (walkerCmd.getThreadGroupIdXDimension() > 1 && (walkerCmd.getThreadGroupIdYDimension() > 1 || walkerCmd.getThreadGroupIdZDimension() > 1)) {
6969
while (walkerCmd.getThreadGroupIdXDimension() % tgDispatchSizeSelected != 0) {
7070
tgDispatchSizeSelected /= 2;
7171
}
72+
} else if (walkerCmd.getThreadGroupIdYDimension() > 1 && walkerCmd.getThreadGroupIdZDimension() > 1) {
73+
while (walkerCmd.getThreadGroupIdYDimension() % tgDispatchSizeSelected != 0) {
74+
tgDispatchSizeSelected /= 2;
75+
}
7276
}
7377
if (tgDispatchSizeSelected == 8) {
7478
interfaceDescriptor.setThreadGroupDispatchSize(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8);

shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNumberOfThreadsInThreadGroupAn
592592
{
593593
walkerCmd.setThreadGroupIdXDimension(1);
594594
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd);
595-
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize());
595+
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize());
596596
}
597597
walkerCmd.setThreadGroupIdYDimension(1);
598598
walkerCmd.setThreadGroupIdZDimension(2);
@@ -609,7 +609,41 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenNumberOfThreadsInThreadGroupAn
609609
{
610610
walkerCmd.setThreadGroupIdXDimension(1);
611611
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd);
612-
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1, iddArg.getThreadGroupDispatchSize());
612+
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize());
613+
}
614+
walkerCmd.setThreadGroupIdYDimension(1);
615+
walkerCmd.setThreadGroupIdZDimension(1);
616+
{
617+
walkerCmd.setThreadGroupIdXDimension(4);
618+
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd);
619+
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize());
620+
}
621+
{
622+
walkerCmd.setThreadGroupIdXDimension(2);
623+
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd);
624+
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize());
625+
}
626+
{
627+
walkerCmd.setThreadGroupIdXDimension(1);
628+
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd);
629+
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize());
630+
}
631+
walkerCmd.setThreadGroupIdXDimension(1);
632+
walkerCmd.setThreadGroupIdZDimension(2);
633+
{
634+
walkerCmd.setThreadGroupIdYDimension(4);
635+
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd);
636+
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_4, iddArg.getThreadGroupDispatchSize());
637+
}
638+
{
639+
walkerCmd.setThreadGroupIdYDimension(2);
640+
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd);
641+
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_2, iddArg.getThreadGroupDispatchSize());
642+
}
643+
{
644+
walkerCmd.setThreadGroupIdYDimension(1);
645+
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, *pDevice, hwInfo, threadGroupCount, numGrf, walkerCmd);
646+
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_8, iddArg.getThreadGroupDispatchSize());
613647
}
614648
}
615649

0 commit comments

Comments
 (0)