Skip to content

Commit a20c0b7

Browse files
Add multidev black box test
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
1 parent 31dbc04 commit a20c0b7

File tree

3 files changed

+293
-6
lines changed

3 files changed

+293
-6
lines changed

level_zero/core/test/black_box_tests/CMakeLists.txt

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ set(TEST_TARGETS
2121
zello_immediate
2222
zello_ipc_copy_dma_buf
2323
zello_ipc_copy_dma_buf_p2p
24+
zello_multidev
2425
zello_printf
2526
zello_scratch
2627
zello_timestamp
@@ -66,14 +67,16 @@ foreach(TEST_NAME ${TEST_TARGETS})
6667
)
6768
endforeach()
6869

69-
target_link_libraries(zello_world_jitc_ocloc PUBLIC ocloc_lib)
70-
target_link_libraries(zello_scratch PUBLIC ocloc_lib)
71-
target_link_libraries(zello_fence PUBLIC ocloc_lib)
72-
target_link_libraries(zello_printf PUBLIC ocloc_lib)
73-
target_link_libraries(zello_image_view PUBLIC ocloc_lib)
74-
target_link_libraries(zello_dynamic_link PUBLIC ocloc_lib)
7570
target_link_libraries(zello_commandlist_immediate PUBLIC ocloc_lib)
71+
target_link_libraries(zello_dynamic_link PUBLIC ocloc_lib)
7672
target_link_libraries(zello_dyn_local_arg PUBLIC ocloc_lib)
73+
target_link_libraries(zello_fence PUBLIC ocloc_lib)
74+
target_link_libraries(zello_multidev PUBLIC ocloc_lib)
75+
target_link_libraries(zello_image_view PUBLIC ocloc_lib)
76+
target_link_libraries(zello_printf PUBLIC ocloc_lib)
77+
target_link_libraries(zello_world_jitc_ocloc PUBLIC ocloc_lib)
78+
target_link_libraries(zello_scratch PUBLIC ocloc_lib)
79+
7780
if(UNIX)
7881
target_link_libraries(zello_world_global_work_offset PUBLIC ocloc_lib)
7982
endif()

level_zero/core/test/black_box_tests/common/zello_common.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,49 @@ static inline void teardown(ze_context_handle_t context, ze_command_queue_handle
281281
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
282282
}
283283

284+
inline void printDeviceProperties(const ze_device_properties_t &props) {
285+
if (verbose) {
286+
std::cout << "Device : "
287+
<< "\n"
288+
<< " * name : " << props.name << "\n"
289+
<< " * type : " << ((props.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
290+
<< " * vendorId : " << props.vendorId << "\n"
291+
<< " * deviceId : " << props.deviceId << "\n"
292+
<< " * subdeviceId : " << props.subdeviceId << "\n"
293+
<< " * coreClockRate : " << props.coreClockRate << "\n"
294+
<< " * maxMemAllocSize : " << props.maxMemAllocSize << "\n"
295+
<< " * maxHardwareContexts : " << props.maxHardwareContexts << "\n"
296+
<< " * isSubdevice : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE)) << "\n"
297+
<< " * eccMemorySupported : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_PROPERTY_FLAG_ECC)) << "\n"
298+
<< " * onDemandPageFaultsSupported : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING)) << "\n"
299+
<< " * maxCommandQueuePriority : " << props.maxCommandQueuePriority << "\n"
300+
<< " * numThreadsPerEU : " << props.numThreadsPerEU << "\n"
301+
<< " * numEUsPerSubslice : " << props.numEUsPerSubslice << "\n"
302+
<< " * numSubslicesPerSlice : " << props.numSubslicesPerSlice << "\n"
303+
<< " * numSlices : " << props.numSlices << "\n"
304+
<< " * physicalEUSimdWidth : " << props.physicalEUSimdWidth << "\n"
305+
<< " * timerResolution : " << props.timerResolution << "\n";
306+
}
307+
}
308+
309+
inline void printCacheProperties(uint32_t index, const ze_device_cache_properties_t &props) {
310+
if (verbose) {
311+
std::cout << "Cache properties: \n"
312+
<< index << "\n"
313+
<< " * User Cache Control : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_CACHE_PROPERTY_FLAG_USER_CONTROL)) << "\n"
314+
<< " * cache size : " << props.cacheSize << "\n";
315+
}
316+
}
317+
318+
inline void printP2PProperties(const ze_device_p2p_properties_t &props, bool canAccessPeer, uint32_t device0Index, uint32_t device1Index) {
319+
if (verbose) {
320+
std::cout << " * P2P Properties device " << device0Index << " to peer " << device1Index << "\n";
321+
std::cout << "\t* accessSupported: " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS)) << "\n";
322+
std::cout << "\t* atomicsSupported: " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS)) << "\n";
323+
std::cout << "\t* canAccessPeer: " << std::boolalpha << static_cast<bool>(canAccessPeer) << "\n";
324+
}
325+
}
326+
284327
inline const std::vector<const char *> &getResourcesSearchLocations() {
285328
static std::vector<const char *> locations {
286329
"test_files/spv_modules/",
Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
/*
2+
* Copyright (C) 2022 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#include "zello_common.h"
9+
#include "zello_compile.h"
10+
11+
#include <fstream>
12+
#include <iostream>
13+
#include <memory>
14+
15+
bool verbose = false;
16+
17+
int main(int argc, char *argv[]) {
18+
verbose = isVerbose(argc, argv);
19+
20+
// Set-up
21+
constexpr size_t allocSize = 4096;
22+
constexpr size_t bytesPerThread = sizeof(char);
23+
constexpr size_t numThreads = allocSize / bytesPerThread;
24+
std::vector<ze_module_handle_t> module;
25+
std::vector<ze_device_handle_t> devices;
26+
std::vector<std::string> deviceNames;
27+
std::vector<ze_kernel_handle_t> kernel;
28+
std::vector<ze_command_queue_handle_t> cmdQueue;
29+
std::vector<ze_command_list_handle_t> cmdList;
30+
void *srcBuffer = nullptr;
31+
void *dstBuffer = nullptr;
32+
bool outputValidationSuccessful = false;
33+
34+
ze_context_handle_t context = nullptr;
35+
ze_driver_handle_t driverHandle = nullptr;
36+
devices = zelloInitContextAndGetDevices(context, driverHandle);
37+
uint32_t deviceCount = (uint32_t)devices.size();
38+
39+
// Get subdevices for each device and add to total count of devices
40+
for (uint32_t i = 0; i < deviceCount; i++) {
41+
uint32_t count = 0;
42+
SUCCESS_OR_TERMINATE(zeDeviceGetSubDevices(devices[i], &count, nullptr));
43+
44+
deviceCount += count;
45+
devices.resize(deviceCount);
46+
47+
SUCCESS_OR_TERMINATE(zeDeviceGetSubDevices(devices[i], &count,
48+
devices.data() + (deviceCount - count)));
49+
}
50+
51+
deviceNames.resize(devices.size());
52+
53+
for (uint32_t i = 0; i < deviceCount; i++) {
54+
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
55+
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(devices[i], &deviceProperties));
56+
printDeviceProperties(deviceProperties);
57+
58+
deviceNames[i].assign(deviceProperties.name, strlen(deviceProperties.name));
59+
60+
uint32_t cachePropertiesCount = 0;
61+
SUCCESS_OR_TERMINATE(zeDeviceGetCacheProperties(devices[i], &cachePropertiesCount, nullptr));
62+
63+
std::vector<ze_device_cache_properties_t> cacheProperties;
64+
cacheProperties.resize(cachePropertiesCount);
65+
SUCCESS_OR_TERMINATE(zeDeviceGetCacheProperties(devices[i], &cachePropertiesCount, cacheProperties.data()));
66+
67+
for (uint32_t cacheIndex = 0; cacheIndex < cachePropertiesCount; cacheIndex++) {
68+
printCacheProperties(cacheIndex, cacheProperties[cacheIndex]);
69+
}
70+
71+
ze_device_p2p_properties_t deviceP2PProperties = {ZE_STRUCTURE_TYPE_DEVICE_P2P_PROPERTIES};
72+
for (uint32_t j = 0; j < deviceCount; j++) {
73+
if (j == i)
74+
continue;
75+
SUCCESS_OR_TERMINATE(zeDeviceGetP2PProperties(devices[i], devices[j], &deviceP2PProperties));
76+
ze_bool_t canAccessPeer = false;
77+
SUCCESS_OR_TERMINATE(zeDeviceCanAccessPeer(devices[i], devices[j], &canAccessPeer));
78+
printP2PProperties(deviceP2PProperties, canAccessPeer, i, j);
79+
if (canAccessPeer == false) {
80+
std::cout << "Device " << i << " cannot access " << j << "\n";
81+
std::terminate();
82+
}
83+
}
84+
}
85+
86+
module.resize(deviceCount);
87+
cmdQueue.resize(deviceCount);
88+
cmdList.resize(deviceCount);
89+
kernel.resize(deviceCount);
90+
91+
std::string buildLog;
92+
auto moduleBinary = compileToSpirV(const_cast<const char *>(memcpyBytesTestKernelSrc), "", buildLog);
93+
if (buildLog.size() > 0) {
94+
std::cout << "Build log " << buildLog;
95+
}
96+
SUCCESS_OR_TERMINATE((0 == moduleBinary.size()));
97+
98+
// init everything
99+
for (uint32_t i = 0; i < deviceCount; i++) {
100+
std::cout << "Creating objects for device " << i << " " << deviceNames[i] << "\n";
101+
ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
102+
cmdQueueDesc.pNext = nullptr;
103+
cmdQueueDesc.flags = 0;
104+
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
105+
cmdQueueDesc.ordinal = getCommandQueueOrdinal(devices[i]);
106+
cmdQueueDesc.index = 0;
107+
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
108+
SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, devices[i], &cmdQueueDesc, &cmdQueue[i]));
109+
110+
ze_command_list_desc_t cmdListDesc = {};
111+
cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
112+
cmdListDesc.pNext = nullptr;
113+
cmdListDesc.flags = 0;
114+
SUCCESS_OR_TERMINATE(zeCommandListCreate(context, devices[i], &cmdListDesc, &cmdList[i]));
115+
116+
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
117+
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
118+
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(moduleBinary.data());
119+
moduleDesc.inputSize = moduleBinary.size();
120+
SUCCESS_OR_TERMINATE(zeModuleCreate(context, devices[i], &moduleDesc, &module[i], nullptr));
121+
122+
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
123+
kernelDesc.pKernelName = "memcpy_bytes";
124+
SUCCESS_OR_TERMINATE(zeKernelCreate(module[i], &kernelDesc, &kernel[i]));
125+
}
126+
127+
// ITERATE OVER DEVICES and Launch the function
128+
for (uint32_t i = 0; i < deviceCount; i++) {
129+
std::cout << "Launching kernels for device " << i << " " << deviceNames[i] << "\n";
130+
uint32_t groupSizeX = 32u;
131+
uint32_t groupSizeY = 1u;
132+
uint32_t groupSizeZ = 1u;
133+
SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel[i], numThreads, 1U, 1U,
134+
&groupSizeX, &groupSizeY, &groupSizeZ));
135+
SUCCESS_OR_TERMINATE_BOOL(numThreads % groupSizeX == 0);
136+
if (verbose) {
137+
std::cout << "Group size : (" << groupSizeX << ", " << groupSizeY << ", " << groupSizeZ
138+
<< ")" << std::endl;
139+
}
140+
SUCCESS_OR_TERMINATE(
141+
zeKernelSetGroupSize(kernel[i], groupSizeX, groupSizeY, groupSizeZ));
142+
143+
// Alloc buffers
144+
srcBuffer = nullptr;
145+
dstBuffer = nullptr;
146+
147+
ze_device_mem_alloc_desc_t deviceDesc = {};
148+
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
149+
deviceDesc.ordinal = i;
150+
deviceDesc.flags = 0;
151+
deviceDesc.pNext = nullptr;
152+
153+
ze_host_mem_alloc_desc_t hostDesc = {};
154+
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
155+
hostDesc.pNext = nullptr;
156+
hostDesc.flags = 0;
157+
158+
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc,
159+
allocSize, 1, devices[i], &srcBuffer));
160+
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc,
161+
allocSize, 1, devices[i], &dstBuffer));
162+
163+
// Init data and copy to device
164+
uint8_t initDataSrc[allocSize];
165+
memset(initDataSrc, 7, sizeof(initDataSrc));
166+
uint8_t initDataDst[allocSize];
167+
memset(initDataDst, 3, sizeof(initDataDst));
168+
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
169+
cmdList[i], srcBuffer, initDataSrc, sizeof(initDataSrc), nullptr, 0, nullptr));
170+
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
171+
cmdList[i], dstBuffer, initDataDst, sizeof(initDataDst), nullptr, 0, nullptr));
172+
173+
// copying of data must finish before running the user function
174+
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList[i], nullptr, 0, nullptr));
175+
176+
// Set function args and get ready to dispatch
177+
SUCCESS_OR_TERMINATE(
178+
zeKernelSetArgumentValue(kernel[i], 0, sizeof(dstBuffer), &dstBuffer));
179+
SUCCESS_OR_TERMINATE(
180+
zeKernelSetArgumentValue(kernel[i], 1, sizeof(srcBuffer), &srcBuffer));
181+
182+
ze_group_count_t dispatchTraits;
183+
dispatchTraits.groupCountX = numThreads / groupSizeX;
184+
dispatchTraits.groupCountY = 1u;
185+
dispatchTraits.groupCountZ = 1u;
186+
if (verbose) {
187+
std::cerr << "Number of groups : (" << dispatchTraits.groupCountX << ", "
188+
<< dispatchTraits.groupCountY << ", " << dispatchTraits.groupCountZ << ")"
189+
<< std::endl;
190+
}
191+
SUCCESS_OR_TERMINATE_BOOL(dispatchTraits.groupCountX * groupSizeX == allocSize);
192+
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(
193+
cmdList[i], kernel[i], &dispatchTraits, nullptr, 0, nullptr));
194+
195+
// Barrier to complete function
196+
uint8_t readBackData[allocSize];
197+
memset(readBackData, 2, sizeof(readBackData));
198+
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList[i], nullptr, 0, nullptr));
199+
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
200+
cmdList[i], readBackData, dstBuffer, sizeof(readBackData), nullptr, 0, nullptr));
201+
202+
// Dispatch and wait
203+
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList[i]));
204+
SUCCESS_OR_TERMINATE(
205+
zeCommandQueueExecuteCommandLists(cmdQueue[i], 1, &cmdList[i], nullptr));
206+
auto synchronizationResult = zeCommandQueueSynchronize(cmdQueue[i], std::numeric_limits<uint64_t>::max());
207+
SUCCESS_OR_WARNING(synchronizationResult);
208+
209+
// Validate
210+
outputValidationSuccessful = true;
211+
for (size_t i = 0; i < allocSize; ++i) {
212+
outputValidationSuccessful &=
213+
((unsigned char)(initDataSrc[i]) == (unsigned char)readBackData[i]);
214+
}
215+
216+
// Release Mem
217+
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
218+
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
219+
220+
// Break immediately if output validation is false
221+
if (!outputValidationSuccessful) {
222+
break;
223+
}
224+
}
225+
226+
for (uint32_t i = 0; i < deviceCount; i++) {
227+
std::cout << "Freeing objects for device " << i << " " << deviceNames[i] << "\n";
228+
SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel[i]));
229+
SUCCESS_OR_TERMINATE(zeModuleDestroy(module[i]));
230+
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList[i]));
231+
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue[i]));
232+
}
233+
234+
bool aubMode = isAubMode(argc, argv);
235+
if (aubMode == false) {
236+
std::cout << "\nZello Multidev Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED")
237+
<< std::endl;
238+
}
239+
int resultOnFailure = aubMode ? 0 : 1;
240+
return outputValidationSuccessful ? 0 : resultOnFailure;
241+
}

0 commit comments

Comments
 (0)