Skip to content

Commit 7ea0a11

Browse files
Unify programming of partition registers
Related-To: NEO-6262 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
1 parent 49d1e04 commit 7ea0a11

31 files changed

+226
-75
lines changed

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -105,26 +105,27 @@ void programEventL3Flush(ze_event_handle_t hEvent,
105105
event->setPacketsInUse(event->getPacketsInUse() + 1);
106106
}
107107

108+
auto &cmdListStream = *commandContainer.getCommandStream();
108109
NEO::PipeControlArgs args;
109110
args.dcFlushEnable = true;
111+
110112
if (partitionCount > 1) {
111113
args.workloadPartitionOffset = true;
112-
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(*commandContainer.getCommandStream(),
113-
NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
114-
static_cast<uint32_t>(event->getSinglePacketSize()),
115-
true);
114+
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(cmdListStream,
115+
static_cast<uint32_t>(event->getSinglePacketSize()));
116116
}
117+
117118
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
118-
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
119-
eventAddress, Event::STATE_SIGNALED,
119+
cmdListStream,
120+
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
121+
eventAddress,
122+
Event::STATE_SIGNALED,
120123
commandContainer.getDevice()->getHardwareInfo(),
121124
args);
122125

123126
if (partitionCount > 1) {
124-
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(*commandContainer.getCommandStream(),
125-
NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
126-
CommonConstants::partitionAddressOffset,
127-
true);
127+
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(cmdListStream,
128+
CommonConstants::partitionAddressOffset);
128129
}
129130
}
130131

@@ -219,7 +220,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
219220
}
220221
}
221222

222-
auto isMultiOsContextCapable = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(device->getNEODevice()->getDeviceBitfield(),
223+
auto isMultiOsContextCapable = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(neoDevice->getDeviceBitfield(),
223224
!isCooperative);
224225
updateStreamProperties(*kernel, isMultiOsContextCapable, isCooperative);
225226

@@ -251,6 +252,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
251252
event->setPacketsInUse(partitionCount);
252253
}
253254
if (L3FlushEnable) {
255+
size_t estimatedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(neoDevice->getHardwareInfo());
256+
if (partitionCount > 1) {
257+
estimatedSize += 2 * NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
258+
}
259+
increaseCommandStreamSpace(estimatedSize);
254260
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
255261
}
256262
}
@@ -307,27 +313,21 @@ template <GFXCORE_FAMILY gfxCoreFamily>
307313
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
308314

309315
const uint64_t workPartitionAllocationGpuVa = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
310-
size_t estimatedSizeRequired = sizeof(typename GfxFamily::MI_LOAD_REGISTER_MEM) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
316+
size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getRegisterConfigurationSize();
311317
increaseCommandStreamSpace(estimatedSizeRequired);
312318

313-
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer,
314-
NEO::PartitionRegisters<GfxFamily>::wparidCCSOffset,
315-
workPartitionAllocationGpuVa);
316-
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(commandContainer,
317-
NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
318-
partitionDataSize,
319-
true);
319+
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(*commandContainer.getCommandStream(),
320+
workPartitionAllocationGpuVa,
321+
partitionDataSize);
320322
}
321323

322324
template <GFXCORE_FAMILY gfxCoreFamily>
323325
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
324326

325-
const size_t estimatedSizeRequired = sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
327+
const size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
326328
increaseCommandStreamSpace(estimatedSizeRequired);
327-
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(commandContainer,
328-
NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
329-
CommonConstants::partitionAddressOffset,
330-
true);
329+
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
330+
CommonConstants::partitionAddressOffset);
331331
}
332332

333333
template <GFXCORE_FAMILY gfxCoreFamily>

level_zero/core/source/cmdqueue/cmdqueue_hw.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ struct CommandQueueHw : public CommandQueueImp {
5252

5353
bool getPreemptionCmdProgramming() override;
5454
void patchCommands(CommandList &commandList, uint64_t scratchAddress);
55+
56+
size_t getPartitionProgrammingSize();
57+
void programPartitionConfiguration(NEO::LinearStream &stream);
5558
};
5659

5760
} // namespace L0

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
270270

271271
linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
272272
if (partitionCount > 1) {
273-
linearStreamSizeEstimate += sizeof(MI_LOAD_REGISTER_MEM) + sizeof(MI_LOAD_REGISTER_IMM);
273+
linearStreamSizeEstimate += getPartitionProgrammingSize();
274274
}
275275

276276
size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign);
@@ -420,14 +420,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
420420
commandQueuePreemptionMode = statePreemption;
421421

422422
if (partitionCount > 1) {
423-
uint64_t workPartitionAddress = csr->getWorkPartitionAllocationGpuAddress();
424-
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(child,
425-
NEO::PartitionRegisters<GfxFamily>::wparidCCSOffset,
426-
workPartitionAddress);
427-
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(child,
428-
NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
429-
CommonConstants::partitionAddressOffset,
430-
true);
423+
programPartitionConfiguration(child);
431424
}
432425

433426
if (hFence) {

level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,13 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
123123
UNRECOVERABLE_IF(!commandsToPatch.empty());
124124
}
125125

126+
template <GFXCORE_FAMILY gfxCoreFamily>
127+
size_t CommandQueueHw<gfxCoreFamily>::getPartitionProgrammingSize() {
128+
return 0;
129+
}
130+
131+
template <GFXCORE_FAMILY gfxCoreFamily>
132+
void CommandQueueHw<gfxCoreFamily>::programPartitionConfiguration(NEO::LinearStream &stream) {
133+
}
134+
126135
} // namespace L0

level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,4 +153,20 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
153153
}
154154
}
155155

156+
template <GFXCORE_FAMILY gfxCoreFamily>
157+
size_t CommandQueueHw<gfxCoreFamily>::getPartitionProgrammingSize() {
158+
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
159+
return NEO::ImplicitScalingDispatch<GfxFamily>::getRegisterConfigurationSize();
160+
}
161+
162+
template <GFXCORE_FAMILY gfxCoreFamily>
163+
void CommandQueueHw<gfxCoreFamily>::programPartitionConfiguration(NEO::LinearStream &stream) {
164+
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
165+
166+
uint64_t workPartitionAddress = csr->getWorkPartitionAllocationGpuAddress();
167+
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(stream,
168+
workPartitionAddress,
169+
CommonConstants::partitionAddressOffset);
170+
}
171+
156172
} // namespace L0

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2108,5 +2108,27 @@ HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAnd
21082108
ASSERT_EQ(ZE_RESULT_SUCCESS, res);
21092109
commandQueue->destroy();
21102110
}
2111+
2112+
HWTEST2_F(CommandQueueSynchronizeTest, givenBasePlatformsWhenProgrammingPartitionRegistersThenExpectNoAction, CommandQueueSBASupport) {
2113+
ze_result_t returnValue;
2114+
ze_command_queue_desc_t desc = {};
2115+
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
2116+
2117+
auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, csr, &desc);
2118+
returnValue = commandQueue->initialize(false, false);
2119+
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
2120+
2121+
constexpr size_t expectedSize = 0;
2122+
EXPECT_EQ(expectedSize, commandQueue->getPartitionProgrammingSize());
2123+
2124+
size_t usedBefore = commandQueue->commandStream->getUsed();
2125+
commandQueue->programPartitionConfiguration(*commandQueue->commandStream);
2126+
size_t usedAfter = commandQueue->commandStream->getUsed();
2127+
2128+
EXPECT_EQ(expectedSize, usedAfter - usedBefore);
2129+
2130+
commandQueue->destroy();
2131+
}
2132+
21112133
} // namespace ult
21122134
} // namespace L0

opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1420,7 +1420,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, WhenProgrammingActiveP
14201420
size_t expectedCmdSize = 0;
14211421
EXPECT_EQ(expectedCmdSize, commandStreamReceiver.getCmdSizeForActivePartitionConfig());
14221422
size_t usedBefore = commandStreamReceiver.commandStream.getUsed();
1423-
commandStreamReceiver.programActivePartitionConfig();
1423+
commandStreamReceiver.programActivePartitionConfig(commandStreamReceiver.commandStream);
14241424
size_t usedAfter = commandStreamReceiver.commandStream.getUsed();
14251425
EXPECT_EQ(usedBefore, usedAfter);
14261426
}

shared/source/command_container/implicit_scaling.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,15 @@ struct ImplicitScalingDispatch {
6767
bool apiSelfCleanup,
6868
bool useSecondaryBatchBuffer);
6969

70+
static size_t getRegisterConfigurationSize();
71+
static void dispatchRegisterConfiguration(LinearStream &commandStream,
72+
uint64_t workPartitionSurfaceAddress,
73+
uint32_t addressOffset);
74+
75+
static size_t getOffsetRegisterSize();
76+
static void dispatchOffsetRegister(LinearStream &commandStream,
77+
uint32_t addressOffset);
78+
7079
private:
7180
static bool pipeControlStallRequired;
7281
};

shared/source/command_container/implicit_scaling_xehp_and_later.inl

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*
66
*/
77

8+
#include "shared/source/command_container/command_encoder.h"
89
#include "shared/source/command_container/implicit_scaling.h"
910
#include "shared/source/command_container/walker_partition_xehp_and_later.h"
1011
#include "shared/source/command_stream/linear_stream.h"
@@ -172,4 +173,34 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(LinearStream &c
172173
commandStream.getSpace(totalProgrammedSize);
173174
}
174175

176+
template <typename GfxFamily>
177+
inline size_t ImplicitScalingDispatch<GfxFamily>::getRegisterConfigurationSize() {
178+
return EncodeSetMMIO<GfxFamily>::sizeMEM +
179+
getOffsetRegisterSize();
180+
}
181+
182+
template <typename GfxFamily>
183+
inline void ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(LinearStream &commandStream,
184+
uint64_t workPartitionSurfaceAddress,
185+
uint32_t addressOffset) {
186+
EncodeSetMMIO<GfxFamily>::encodeMEM(commandStream,
187+
PartitionRegisters<GfxFamily>::wparidCCSOffset,
188+
workPartitionSurfaceAddress);
189+
dispatchOffsetRegister(commandStream, addressOffset);
190+
}
191+
192+
template <typename GfxFamily>
193+
inline size_t ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize() {
194+
return EncodeSetMMIO<GfxFamily>::sizeIMM;
195+
}
196+
197+
template <typename GfxFamily>
198+
inline void ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(LinearStream &commandStream,
199+
uint32_t addressOffset) {
200+
EncodeSetMMIO<GfxFamily>::encodeIMM(commandStream,
201+
PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
202+
addressOffset,
203+
true);
204+
}
205+
175206
} // namespace NEO

shared/source/command_stream/command_stream_receiver_hw.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
132132
TagAllocatorBase *getTimestampPacketAllocator() override;
133133

134134
void postInitFlagsSetup() override;
135+
void programActivePartitionConfig(LinearStream &csr);
135136

136137
protected:
137138
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);
@@ -150,7 +151,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
150151
void programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream);
151152
void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags);
152153
void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags);
153-
void programActivePartitionConfig();
154+
void programActivePartitionConfigFlushTask(LinearStream &csr);
154155

155156
void programEnginePrologue(LinearStream &csr);
156157
size_t getCmdSizeForPrologue() const;

0 commit comments

Comments
 (0)