Skip to content

Commit 36d350c

Browse files
jchodorCompute-Runtime-Automation
authored andcommitted
Zebin per_thread_memory_buffers
Change-Id: I66074ac9f1d5b1417dfad5c044149e86ab9aad1d
1 parent 86b1332 commit 36d350c

File tree

4 files changed

+631
-5
lines changed

4 files changed

+631
-5
lines changed

shared/source/device_binary_format/elf/zebin_elf.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ static constexpr ConstStringRef executionEnv("execution_env");
8989
static constexpr ConstStringRef payloadArguments("payload_arguments");
9090
static constexpr ConstStringRef bindingTableIndices("binding_table_indices");
9191
static constexpr ConstStringRef perThreadPayloadArguments("per_thread_payload_arguments");
92+
static constexpr ConstStringRef perThreadMemoryBuffers("per_thread_memory_buffers");
9293

9394
namespace ExecutionEnv {
9495
static constexpr ConstStringRef actualKernelStartOffset("actual_kernel_start_offset");
@@ -164,6 +165,22 @@ static constexpr ConstStringRef packedLocalIds("packed_local_ids");
164165
static constexpr ConstStringRef localId("local_id");
165166
} // namespace ArgType
166167
} // namespace PerThreadPayloadArgument
168+
169+
namespace PerThreadMemoryBuffer {
170+
static constexpr ConstStringRef allocationType("type");
171+
static constexpr ConstStringRef memoryUsage("usage");
172+
static constexpr ConstStringRef size("size");
173+
namespace AllocationType {
174+
static constexpr ConstStringRef global("global");
175+
static constexpr ConstStringRef scratch("scratch");
176+
static constexpr ConstStringRef slm("slm");
177+
} // namespace AllocationType
178+
namespace MemoryUsage {
179+
static constexpr ConstStringRef privateSpace("private_space");
180+
static constexpr ConstStringRef spillFillSpace("spill_fill_space");
181+
static constexpr ConstStringRef singleSpace("single_space");
182+
} // namespace MemoryUsage
183+
} // namespace PerThreadMemoryBuffer
167184
} // namespace Kernel
168185
} // namespace Tags
169186

@@ -329,6 +346,32 @@ struct BindingTableEntryBaseT {
329346
};
330347
} // namespace BindingTableEntry
331348

349+
namespace PerThreadMemoryBuffer {
350+
enum AllocationType : uint8_t {
351+
AllocationTypeUnknown = 0,
352+
AllocationTypeGlobal,
353+
AllocationTypeScratch,
354+
AllocationTypeSlm
355+
};
356+
357+
enum MemoryUsage : uint8_t {
358+
MemoryUsageUnknown = 0,
359+
MemoryUsagePrivateSpace,
360+
MemoryUsageSpillFillSpace,
361+
MemoryUsageSingleSpace
362+
};
363+
364+
using SizeT = int32_t;
365+
using AllocationTypeT = AllocationType;
366+
using MemoryUsageT = MemoryUsage;
367+
368+
struct PerThreadMemoryBufferBaseT {
369+
AllocationType allocationType = AllocationTypeUnknown;
370+
MemoryUsageT memoryUsage = MemoryUsageUnknown;
371+
SizeT size = 0U;
372+
};
373+
} // namespace PerThreadMemoryBuffer
374+
332375
} // namespace Kernel
333376

334377
} // namespace Types

shared/source/device_binary_format/zebin_decoder.cpp

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ void extractZeInfoKernelSections(const NEO::Yaml::YamlParser &parser, const NEO:
116116
outZeInfoKernelSections.perThreadPayloadArgumentsNd.push_back(&kernelMetadataNd);
117117
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::bindingTableIndices == key) {
118118
outZeInfoKernelSections.bindingTableIndicesNd.push_back(&kernelMetadataNd);
119+
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::perThreadMemoryBuffers == key) {
120+
outZeInfoKernelSections.perThreadMemoryBuffersNd.push_back(&kernelMetadataNd);
119121
} else {
120122
outWarning.append("DeviceBinaryFormat::Zebin::" + NEO::Elf::SectionsNamesZebin::zeInfo.str() + " : Unknown entry \"" + parser.readKey(kernelMetadataNd).str() + "\" in context of : " + context.str() + "\n");
121123
}
@@ -128,6 +130,7 @@ DecodeError validateZeInfoKernelSectionsCount(const ZeInfoKernelSections &outZeI
128130
valid &= validateZebinSectionsCountAtMost(outZeInfoKernelSections.payloadArgumentsNd, NEO::Elf::ZebinKernelMetadata::Tags::Kernel::payloadArguments, 1U, outErrReason, outWarning);
129131
valid &= validateZebinSectionsCountAtMost(outZeInfoKernelSections.perThreadPayloadArgumentsNd, NEO::Elf::ZebinKernelMetadata::Tags::Kernel::perThreadPayloadArguments, 1U, outErrReason, outWarning);
130132
valid &= validateZebinSectionsCountAtMost(outZeInfoKernelSections.bindingTableIndicesNd, NEO::Elf::ZebinKernelMetadata::Tags::Kernel::bindingTableIndices, 1U, outErrReason, outWarning);
133+
valid &= validateZebinSectionsCountAtMost(outZeInfoKernelSections.perThreadMemoryBuffersNd, NEO::Elf::ZebinKernelMetadata::Tags::Kernel::perThreadMemoryBuffers, 1U, outErrReason, outWarning);
131134

132135
return valid ? DecodeError::Success : DecodeError::InvalidBinary;
133136
}
@@ -306,6 +309,54 @@ bool readEnumChecked(const Yaml::Token *token, NEO::Elf::ZebinKernelMetadata::Ty
306309
return true;
307310
}
308311

312+
bool readEnumChecked(const Yaml::Token *token, NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::AllocationType &out,
313+
ConstStringRef context, std::string &outErrReason) {
314+
if (nullptr == token) {
315+
return false;
316+
}
317+
318+
using namespace NEO::Elf::ZebinKernelMetadata::Tags::Kernel::PerThreadMemoryBuffer::AllocationType;
319+
using AllocType = NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::AllocationType;
320+
auto tokenValue = token->cstrref();
321+
322+
if (global == tokenValue) {
323+
out = AllocType::AllocationTypeGlobal;
324+
} else if (scratch == tokenValue) {
325+
out = AllocType::AllocationTypeScratch;
326+
} else if (slm == tokenValue) {
327+
out = AllocType::AllocationTypeSlm;
328+
} else {
329+
outErrReason.append("DeviceBinaryFormat::Zebin::" + NEO::Elf::SectionsNamesZebin::zeInfo.str() + " : Unhandled \"" + tokenValue.str() + "\" per-thread memory buffer allocation type in context of " + context.str() + "\n");
330+
return false;
331+
}
332+
333+
return true;
334+
}
335+
336+
bool readEnumChecked(const Yaml::Token *token, NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::MemoryUsage &out,
337+
ConstStringRef context, std::string &outErrReason) {
338+
if (nullptr == token) {
339+
return false;
340+
}
341+
342+
using namespace NEO::Elf::ZebinKernelMetadata::Tags::Kernel::PerThreadMemoryBuffer::MemoryUsage;
343+
using Usage = NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::MemoryUsage;
344+
auto tokenValue = token->cstrref();
345+
346+
if (privateSpace == tokenValue) {
347+
out = Usage::MemoryUsagePrivateSpace;
348+
} else if (spillFillSpace == tokenValue) {
349+
out = Usage::MemoryUsageSpillFillSpace;
350+
} else if (singleSpace == tokenValue) {
351+
out = Usage::MemoryUsageSingleSpace;
352+
} else {
353+
outErrReason.append("DeviceBinaryFormat::Zebin::" + NEO::Elf::SectionsNamesZebin::zeInfo.str() + " : Unhandled \"" + tokenValue.str() + "\" per-thread memory buffer usage type in context of " + context.str() + "\n");
354+
return false;
355+
}
356+
357+
return true;
358+
}
359+
309360
DecodeError readZeInfoPerThreadPayloadArguments(const NEO::Yaml::YamlParser &parser, const NEO::Yaml::Node &node,
310361
ZeInfoPerThreadPayloadArguments &outPerThreadPayloadArguments,
311362
ConstStringRef context,
@@ -401,6 +452,32 @@ DecodeError readZeInfoBindingTableIndices(const NEO::Yaml::YamlParser &parser, c
401452
return validBindingTableEntries ? DecodeError::Success : DecodeError::InvalidBinary;
402453
}
403454

455+
DecodeError readZeInfoPerThreadMemoryBuffers(const NEO::Yaml::YamlParser &parser, const NEO::Yaml::Node &node,
456+
ZeInfoPerThreadMemoryBuffers &outPerThreadMemoryBuffers,
457+
ConstStringRef context,
458+
std::string &outErrReason, std::string &outWarning) {
459+
bool validBuffer = true;
460+
for (const auto &perThreadMemoryBufferNd : parser.createChildrenRange(node)) {
461+
outPerThreadMemoryBuffers.resize(outPerThreadMemoryBuffers.size() + 1);
462+
auto &perThreadMemoryBufferMetadata = *outPerThreadMemoryBuffers.rbegin();
463+
for (const auto &perThreadMemoryBufferMemberNd : parser.createChildrenRange(perThreadMemoryBufferNd)) {
464+
auto key = parser.readKey(perThreadMemoryBufferMemberNd);
465+
if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::PerThreadMemoryBuffer::allocationType == key) {
466+
auto allocationTypeToken = parser.getValueToken(perThreadMemoryBufferMemberNd);
467+
validBuffer &= readEnumChecked(allocationTypeToken, perThreadMemoryBufferMetadata.allocationType, context, outErrReason);
468+
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::PerThreadMemoryBuffer::memoryUsage == key) {
469+
auto memoryUsageToken = parser.getValueToken(perThreadMemoryBufferMemberNd);
470+
validBuffer &= readEnumChecked(memoryUsageToken, perThreadMemoryBufferMetadata.memoryUsage, context, outErrReason);
471+
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::PerThreadMemoryBuffer::size == key) {
472+
validBuffer &= readZeInfoValueChecked(parser, perThreadMemoryBufferMemberNd, perThreadMemoryBufferMetadata.size, context, outErrReason);
473+
} else {
474+
outWarning.append("DeviceBinaryFormat::Zebin::" + NEO::Elf::SectionsNamesZebin::zeInfo.str() + " : Unknown entry \"" + key.str() + "\" for per-thread memory buffer in context of " + context.str() + "\n");
475+
}
476+
}
477+
}
478+
return validBuffer ? DecodeError::Success : DecodeError::InvalidBinary;
479+
}
480+
404481
template <typename ElSize, size_t Len>
405482
bool setVecArgIndicesBasedOnSize(CrossThreadDataOffset (&vec)[Len], size_t vecSize, CrossThreadDataOffset baseOffset) {
406483
switch (vecSize) {
@@ -423,7 +500,7 @@ NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Type
423500
std::string &outErrReason, std::string &outWarning) {
424501
switch (src.argType) {
425502
default:
426-
outErrReason.append("DeviceBinaryFormat::Zebin : Invalid arg type in per thread data section in context of : " + dst.kernelMetadata.kernelName + ".\n");
503+
outErrReason.append("DeviceBinaryFormat::Zebin : Invalid arg type in per-thread data section in context of : " + dst.kernelMetadata.kernelName + ".\n");
427504
return DecodeError::InvalidBinary;
428505
case NEO::Elf::ZebinKernelMetadata::Types::Kernel::ArgTypeLocalId: {
429506
if (src.offset != 0) {
@@ -603,6 +680,34 @@ NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Type
603680
return DecodeError::Success;
604681
}
605682

683+
NEO::DecodeError populateKernelDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::PerThreadMemoryBufferBaseT &src, NEO::KernelDescriptor &dst,
684+
std::string &outErrReason, std::string &outWarning) {
685+
using namespace NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer;
686+
using namespace NEO::Elf::ZebinKernelMetadata::Tags::Kernel::PerThreadMemoryBuffer::AllocationType;
687+
using namespace NEO::Elf::ZebinKernelMetadata::Tags::Kernel::PerThreadMemoryBuffer::MemoryUsage;
688+
switch (src.allocationType) {
689+
default:
690+
outErrReason.append("DeviceBinaryFormat::Zebin : Invalid per-thread memory buffer allocation type in context of : " + dst.kernelMetadata.kernelName + ".\n");
691+
return DecodeError::InvalidBinary;
692+
case AllocationTypeGlobal:
693+
if (MemoryUsagePrivateSpace != src.memoryUsage) {
694+
outErrReason.append("DeviceBinaryFormat::Zebin : Invalid per-thread memory buffer memory usage type for " + global.str() + " allocation type in context of : " + dst.kernelMetadata.kernelName + ". Expected : " + privateSpace.str() + ".\n");
695+
return DecodeError::InvalidBinary;
696+
}
697+
698+
dst.kernelAttributes.perThreadPrivateMemorySize = src.size;
699+
break;
700+
case AllocationTypeScratch:
701+
if (0 != dst.kernelAttributes.perThreadScratchSize[0]) {
702+
outErrReason.append("DeviceBinaryFormat::Zebin : Invalid duplicated scratch buffer entry in context of : " + dst.kernelMetadata.kernelName + ".\n");
703+
return DecodeError::InvalidBinary;
704+
}
705+
dst.kernelAttributes.perThreadScratchSize[0] = src.size;
706+
break;
707+
}
708+
return DecodeError::Success;
709+
}
710+
606711
NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<NEO::Elf::EI_CLASS_64> &elf, NEO::ZebinSections &zebinSections,
607712
NEO::Yaml::YamlParser &yamlParser, const NEO::Yaml::Node &kernelNd, std::string &outErrReason, std::string &outWarning) {
608713
auto kernelInfo = std::make_unique<NEO::KernelInfo>();
@@ -642,6 +747,15 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<N
642747
}
643748
}
644749

750+
ZeInfoPerThreadMemoryBuffers perThreadMemoryBuffers;
751+
if (false == zeInfokernelSections.perThreadMemoryBuffersNd.empty()) {
752+
auto perThreadMemoryBuffersErr = readZeInfoPerThreadMemoryBuffers(yamlParser, *zeInfokernelSections.perThreadMemoryBuffersNd[0], perThreadMemoryBuffers,
753+
kernelDescriptor.kernelMetadata.kernelName, outErrReason, outWarning);
754+
if (DecodeError::Success != perThreadMemoryBuffersErr) {
755+
return perThreadMemoryBuffersErr;
756+
}
757+
}
758+
645759
kernelDescriptor.kernelAttributes.hasBarriers = execEnv.barrierCount;
646760
kernelDescriptor.kernelAttributes.flags.usesBarriers = (kernelDescriptor.kernelAttributes.hasBarriers > 0U);
647761
kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = execEnv.disableMidThreadPreemption;
@@ -684,6 +798,13 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<N
684798
}
685799
}
686800

801+
for (const auto &memBuff : perThreadMemoryBuffers) {
802+
auto decodeErr = populateKernelDescriptor(memBuff, kernelDescriptor, outErrReason, outWarning);
803+
if (DecodeError::Success != decodeErr) {
804+
return decodeErr;
805+
}
806+
}
807+
687808
if (NEO::DebugManager.flags.ZebinAppendElws.get()) {
688809
kernelDescriptor.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[0] = alignDown(crossThreadDataSize + 12, 32);
689810
kernelDescriptor.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[1] = kernelDescriptor.payloadMappings.dispatchTraits.enqueuedLocalWorkSize[0] + 4;

shared/source/device_binary_format/zebin_decoder.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ struct ZeInfoKernelSections {
3333
UniqueNode payloadArgumentsNd;
3434
UniqueNode bindingTableIndicesNd;
3535
UniqueNode perThreadPayloadArgumentsNd;
36+
UniqueNode perThreadMemoryBuffersNd;
3637
};
3738

3839
DecodeError extractZebinSections(NEO::Elf::Elf<Elf::EI_CLASS_64> &elf, ZebinSections &out, std::string &outErrReason, std::string &outWarning);
@@ -50,6 +51,10 @@ bool readEnumChecked(const Yaml::Token *token, NEO::Elf::ZebinKernelMetadata::Ty
5051
ConstStringRef context, std::string &outErrReason);
5152
bool readEnumChecked(const Yaml::Token *token, NEO::Elf::ZebinKernelMetadata::Types::Kernel::PayloadArgument::AccessType &out,
5253
ConstStringRef context, std::string &outErrReason);
54+
bool readEnumChecked(const Yaml::Token *token, NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::AllocationType &out,
55+
ConstStringRef context, std::string &outErrReason);
56+
bool readEnumChecked(const Yaml::Token *token, NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::MemoryUsage &out,
57+
ConstStringRef context, std::string &outErrReason);
5358

5459
using ZeInfoPerThreadPayloadArguments = StackVec<NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadPayloadArgument::PerThreadPayloadArgumentBaseT, 2>;
5560
DecodeError readZeInfoPerThreadPayloadArguments(const NEO::Yaml::YamlParser &parser, const NEO::Yaml::Node &node,
@@ -70,6 +75,12 @@ DecodeError readZeInfoBindingTableIndices(const NEO::Yaml::YamlParser &parser, c
7075
ConstStringRef context,
7176
std::string &outErrReason, std::string &outWarning);
7277

78+
using ZeInfoPerThreadMemoryBuffers = StackVec<NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::PerThreadMemoryBufferBaseT, 8>;
79+
DecodeError readZeInfoPerThreadMemoryBuffers(const NEO::Yaml::YamlParser &parser, const NEO::Yaml::Node &node,
80+
ZeInfoPerThreadMemoryBuffers &outPerThreadMemoryBuffers,
81+
ConstStringRef context,
82+
std::string &outErrReason, std::string &outWarning);
83+
7384
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadPayloadArgument::PerThreadPayloadArgumentBaseT &src, NEO::KernelDescriptor &dst,
7485
std::string &outErrReason, std::string &outWarning);
7586

0 commit comments

Comments
 (0)