Skip to content

Commit 1344629

Browse files
Move barrier programming to Encode class
Related-To: NEO-4576 Change-Id: I34b93b3118528b449c4e1b81826f9784633377a9 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
1 parent f38d597 commit 1344629

File tree

11 files changed

+45
-45
lines changed

11 files changed

+45
-45
lines changed

opencl/source/device_queue/device_queue_hw_bdw_plus.inl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
*
66
*/
77

8+
#include "shared/source/command_container/command_encoder.h"
9+
810
#include "opencl/source/cl_device/cl_device.h"
911
#include "opencl/source/device_queue/device_queue_hw_base.inl"
1012
#include "opencl/source/program/block_kernel_manager.h"
@@ -203,9 +205,9 @@ void DeviceQueueHw<GfxFamily>::setupIndirectState(IndirectHeap &surfaceStateHeap
203205
pIDDestination[blockIndex + i].setKernelStartPointerHigh(blockKernelStartPointer >> 32);
204206
pIDDestination[blockIndex + i].setKernelStartPointer(static_cast<uint32_t>(blockKernelStartPointer));
205207
pIDDestination[blockIndex + i].setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL);
206-
HardwareCommandsHelper<GfxFamily>::programBarrierEnable(&pIDDestination[blockIndex + i],
207-
pBlockInfo->patchInfo.executionEnvironment->HasBarriers,
208-
parentKernel->getDevice().getHardwareInfo());
208+
EncodeDispatchKernel<GfxFamily>::programBarrierEnable(&pIDDestination[blockIndex + i],
209+
pBlockInfo->patchInfo.executionEnvironment->HasBarriers,
210+
parentKernel->getDevice().getHardwareInfo());
209211

210212
// Set offset to sampler states, block's DHSOffset is added by scheduler
211213
pIDDestination[blockIndex + i].setSamplerStatePointer(static_cast<uint32_t>(pBlockInfo->getBorderColorStateSize()));

opencl/source/helpers/hardware_commands_helper.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
155155
static void programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize);
156156
static void programMiAtomic(MI_ATOMIC &atomic, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize);
157157
static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress);
158-
static void programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
159158
static void adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo);
160159

161160
static const size_t alignInterfaceDescriptorData = 64 * sizeof(uint8_t);

opencl/source/helpers/hardware_commands_helper_base.inl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,9 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
191191
static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(HwHelperHw<GfxFamily>::get().computeSlmValues(kernel.slmTotalSize));
192192

193193
interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize);
194-
programBarrierEnable(&interfaceDescriptor, kernel.getKernelInfo().patchInfo.executionEnvironment->HasBarriers,
195-
kernel.getDevice().getHardwareInfo());
194+
EncodeDispatchKernel<GfxFamily>::programBarrierEnable(&interfaceDescriptor,
195+
kernel.getKernelInfo().patchInfo.executionEnvironment->HasBarriers,
196+
kernel.getDevice().getHardwareInfo());
196197

197198
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(&interfaceDescriptor, preemptionMode);
198199
HardwareCommandsHelper<GfxFamily>::adjustInterfaceDescriptorData(&interfaceDescriptor, kernel.getDevice().getHardwareInfo());

opencl/source/helpers/hardware_commands_helper_bdw_plus.inl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,11 +164,6 @@ void HardwareCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(Line
164164
*pipeControl = cmd;
165165
}
166166

167-
template <typename GfxFamily>
168-
void HardwareCommandsHelper<GfxFamily>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) {
169-
pInterfaceDescriptor->setBarrierEnable(value);
170-
}
171-
172167
template <typename GfxFamily>
173168
void HardwareCommandsHelper<GfxFamily>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo) {}
174169
} // namespace NEO

opencl/test/unit_test/gen_common/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ set(IGDRCL_SRCS_tests_gen_common
99
${CMAKE_CURRENT_SOURCE_DIR}/exclude_tests/exclude_test_declare.cpp
1010
${CMAKE_CURRENT_SOURCE_DIR}/exclude_tests/exclude_test_exclude.cpp
1111
${CMAKE_CURRENT_SOURCE_DIR}/gen_commands_common_validation.h
12-
${CMAKE_CURRENT_SOURCE_DIR}/hw_cmds_tests.cpp
1312
${CMAKE_CURRENT_SOURCE_DIR}/matchers.h
1413
${NEO_SHARED_TEST_DIRECTORY}/unit_test/test_macros/header${BRANCH_DIR_SUFFIX}/test.h
1514
)

opencl/test/unit_test/gen_common/hw_cmds_tests.cpp

Lines changed: 0 additions & 32 deletions
This file was deleted.

shared/source/command_container/command_encoder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ struct EncodeDispatchKernel {
5757
bool inlineDataProgrammingRequired,
5858
bool isIndirect,
5959
uint32_t requiredWorkGroupOrder);
60+
61+
static void programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
6062
};
6163

6264
template <typename GfxFamily>

shared/source/command_container/command_encoder_base.inl

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
6666
auto numThreadsPerThreadGroup = dispatchInterface->getNumThreadsPerThreadGroup();
6767
idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
6868

69-
idd.setBarrierEnable(kernelDescriptor.kernelAttributes.flags.usesBarriers);
69+
EncodeDispatchKernel<Family>::programBarrierEnable(&idd,
70+
kernelDescriptor.kernelAttributes.hasBarriers,
71+
container.getDevice()->getHardwareInfo());
7072
auto slmSize = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(
7173
HwHelperHw<Family>::get().computeSlmValues(dispatchInterface->getSlmTotalSize()));
7274
idd.setSharedLocalMemorySize(
@@ -339,6 +341,13 @@ void EncodeDispatchKernel<GfxFamily>::encodeThreadData(WALKER_TYPE &walkerCmd,
339341
walkerCmd.setBottomExecutionMask(maxDword);
340342
}
341343

344+
template <typename GfxFamily>
345+
void EncodeDispatchKernel<GfxFamily>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor,
346+
uint32_t value,
347+
const HardwareInfo &hwInfo) {
348+
pInterfaceDescriptor->setBarrierEnable(value);
349+
}
350+
342351
template <typename GfxFamily>
343352
void EncodeMiFlushDW<GfxFamily>::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd) {}
344353

shared/source/kernel/kernel_descriptor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct KernelDescriptor final {
4646
uint32_t perThreadScratchSize[2] = {0U, 0U};
4747
uint32_t perThreadPrivateMemorySize = 0U;
4848
uint32_t perThreadSystemThreadSurfaceSize = 0U;
49+
uint32_t hasBarriers = 0u;
4950
uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U};
5051
uint16_t crossThreadDataSize = 0U;
5152
uint16_t perThreadDataSize = 0U;

shared/source/kernel/kernel_descriptor_from_patchtokens.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
4949
dst.kernelAttributes.simdSize = execEnv.LargestCompiledSIMDSize;
5050
dst.kernelAttributes.flags.usesDeviceSideEnqueue = (0 != execEnv.HasDeviceEnqueue);
5151
dst.kernelAttributes.flags.usesBarriers = (0 != execEnv.HasBarriers);
52+
dst.kernelAttributes.hasBarriers = execEnv.HasBarriers;
5253
dst.kernelAttributes.flags.requiresDisabledMidThreadPreemption = (0 != execEnv.DisableMidThreadPreemption);
5354
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
5455
dst.kernelAttributes.flags.usesFencesForReadWriteImages = (0 != execEnv.UsesFencesForReadWriteImages);

0 commit comments

Comments
 (0)