Skip to content

Commit ccc6526

Browse files
authored
[PTISDK] synchronization implementation (#442)
1 parent 4de8724 commit ccc6526

File tree

12 files changed

+716
-75
lines changed

12 files changed

+716
-75
lines changed

sdk/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.11.0
1+
0.12.0

sdk/include/pti/pti_view.h

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,35 @@ extern "C" {
3030
* passed to ptiViewEnable/ptiViewDisable
3131
*/
3232
typedef enum _pti_view_kind {
33-
PTI_VIEW_INVALID = 0, //!< Invalid
34-
PTI_VIEW_DEVICE_GPU_KERNEL = 1, //!< Device kernels
35-
PTI_VIEW_DEVICE_CPU_KERNEL = 2, //!< Host (CPU) kernels
36-
PTI_VIEW_DRIVER_API = 3, //!< Driver (aka back-end) API tracing
37-
PTI_VIEW_RESERVED = 4, //!< For future use
38-
PTI_VIEW_COLLECTION_OVERHEAD = 5, //!< Collection overhead
39-
PTI_VIEW_RUNTIME_API = 6, //!< Runtime(Sycl, other) API tracing
40-
PTI_VIEW_EXTERNAL_CORRELATION = 7, //!< Correlation of external operations
41-
PTI_VIEW_DEVICE_GPU_MEM_COPY = 8, //!< Memory copies between Host and Device
42-
PTI_VIEW_DEVICE_GPU_MEM_FILL = 9, //!< Device memory fills
43-
PTI_VIEW_DEVICE_GPU_MEM_COPY_P2P = 10, //!< Peer to Peer Memory copies between Devices.
33+
PTI_VIEW_INVALID = 0, //!< Invalid
34+
PTI_VIEW_DEVICE_GPU_KERNEL = 1, //!< Device kernels
35+
PTI_VIEW_DEVICE_CPU_KERNEL = 2, //!< Host (CPU) kernels
36+
PTI_VIEW_DRIVER_API = 3, //!< Driver (aka back-end) API tracing
37+
PTI_VIEW_RESERVED = 4, //!< For future use
38+
PTI_VIEW_COLLECTION_OVERHEAD = 5, //!< Collection overhead
39+
PTI_VIEW_RUNTIME_API = 6, //!< Runtime(Sycl, other) API tracing
40+
PTI_VIEW_EXTERNAL_CORRELATION = 7, //!< Correlation of external operations
41+
PTI_VIEW_DEVICE_GPU_MEM_COPY = 8, //!< Memory copies between Host and Device
42+
PTI_VIEW_DEVICE_GPU_MEM_FILL = 9, //!< Device memory fills
43+
PTI_VIEW_DEVICE_GPU_MEM_COPY_P2P = 10, //!< Peer to Peer Memory copies between Devices.
44+
PTI_VIEW_DEVICE_SYNCHRONIZATION = 11, //!< synchronization operations on host and GPU.
4445
} pti_view_kind;
4546

47+
/**
48+
* @brief Synchronization types:
49+
* Type marked as *_GPU_* note the synchronization start/complete on device (e.g Barriers).
50+
* Type marked as *_HOST_* note the synchronization start/end on host (e.g. Fence).
51+
*/
52+
typedef enum _pti_view_synchronization_type {
53+
PTI_VIEW_SYNCHRONIZATION_TYPE_UNKNOWN = 0, //!< Unknown synchronization type
54+
PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_EXECUTION = 1, //!< Barrier execution and global memory synchronization type
55+
PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_MEMORY = 2, //!< Barrier memory range coherency synchronization type
56+
PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_FENCE= 3, //!< Fence coarse grain execution synchronization type
57+
PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_EVENT = 4, //!< Event host synchronization type
58+
PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_COMMAND_LIST = 5, //!< Commandlist host synchronization type
59+
PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_COMMAND_QUEUE = 6, //!< CommandQueue host synchronization type
60+
} pti_view_synchronization_type;
61+
4662
/**
4763
* @brief Memory types
4864
*/
@@ -119,6 +135,8 @@ typedef void* pti_backend_queue_t; //!< Backend queue handle
119135

120136
typedef void* pti_backend_ctx_t; //!< Backend context handle
121137

138+
typedef void* pti_backend_evt_t; //!< Backend event handle
139+
122140
/**
123141
* @brief Base View record type
124142
*/
@@ -159,6 +177,27 @@ typedef struct pti_view_record_kernel {
159177
uint32_t _sycl_invocation_id; //!< SYCL Invocation ID
160178
} pti_view_record_kernel;
161179

180+
/**
181+
* @brief Synchronization View record type
182+
*/
183+
typedef struct pti_view_record_synchronization{
184+
pti_view_record_base _view_kind; //!< Base record
185+
pti_view_synchronization_type _synch_type; //!< Synchronization type
186+
pti_backend_ctx_t _context_handle; //!< Context handle
187+
pti_backend_ctx_t _queue_handle; //!< Queue handle
188+
pti_backend_evt_t _event_handle; //!< Event handle synchronization api is called with.
189+
uint64_t _start_timestamp; //!< For host synchronization types: function enter timestamp
190+
//!< For gpu synchronization types: synch start timestamp on device
191+
uint64_t _end_timestamp; //!< For host synchronization types: function exit timestamp
192+
//!< For gpu synchronization types: synch complete timestamp on device
193+
uint32_t _thread_id; //!< Thread ID of function call
194+
uint32_t _correlation_id; //!< ID that correlates this record with records of other Views
195+
uint32_t _number_wait_events; //!< For relevent event synch types (eg. Barriers)
196+
uint32_t _return_code; //!< L0/OCL synch api onexit return type - cast to specific driver code type
197+
uint32_t _api_id; //!< Id of this synch api call
198+
pti_api_group_id _api_group; //!< Defines api api_group this record was collected in (L0,Sycl,OCL, etc).
199+
} pti_view_record_synchronization;
200+
162201
/**
163202
* @brief Memory Copy Operation View record type
164203
*/

sdk/samples/samples_utilities/samples_utils.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,48 @@ inline void DumpRecord(pti_view_record_api* record) {
256256
std::cout << "Api Correlation Id: " << record->_correlation_id << '\n';
257257
}
258258

259+
inline void DumpRecord(pti_view_record_synchronization* record) {
260+
if (NULL == record) return;
261+
switch (record->_synch_type) {
262+
case pti_view_synchronization_type::PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_EXECUTION: {
263+
std::cout << "Barrier Synch Type: Execution Barrier\n";
264+
}; break;
265+
case pti_view_synchronization_type::PTI_VIEW_SYNCHRONIZATION_TYPE_GPU_BARRIER_MEMORY: {
266+
std::cout << "Barrier Synch Type: Memory Coherency Barrier\n";
267+
}; break;
268+
case pti_view_synchronization_type::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_FENCE: {
269+
std::cout << "Fence Synch Type: Execution\n";
270+
}; break;
271+
case pti_view_synchronization_type::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_EVENT: {
272+
std::cout << "Event Synch Type: Host\n";
273+
}; break;
274+
case pti_view_synchronization_type::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_COMMAND_LIST: {
275+
std::cout << "CommandList Synch Type: Host\n";
276+
}; break;
277+
case pti_view_synchronization_type::PTI_VIEW_SYNCHRONIZATION_TYPE_HOST_COMMAND_QUEUE: {
278+
std::cout << "CommandQueue Synch Type: Host\n";
279+
}; break;
280+
default:
281+
break;
282+
}
283+
std::cout << "Synch Start Time: " << record->_start_timestamp << '\n';
284+
std::cout << " Synch End Time: " << record->_end_timestamp << '\n';
285+
std::cout << " Synch Duration: " << record->_end_timestamp - record->_start_timestamp << "ns \n";
286+
std::cout << "Synch Thread Id: " << record->_thread_id << '\n';
287+
std::cout << "Synch Correlation Id: " << record->_correlation_id << '\n';
288+
std::cout << "Synch BE Queue Handle: " << record->_queue_handle << '\n';
289+
std::cout << "Synch BE Context Handle: " << record->_context_handle << '\n';
290+
std::cout << "Synch BE Event Handle: " << record->_event_handle << '\n';
291+
std::cout << "Synch BE Number Wait Events: " << record->_number_wait_events << '\n';
292+
std::cout << "Synch Api Function CBID: " << record->_api_id << '\n';
293+
std::cout << "Synch Api Group ID: " << record->_api_group << '\n';
294+
std::cout << "Synch Api Return Code: " << record->_return_code << '\n';
295+
const char* api_name = nullptr;
296+
PTI_THROW(
297+
ptiViewGetApiIdName(pti_api_group_id::PTI_API_GROUP_LEVELZERO, record->_api_id, &api_name));
298+
std::cout << "Synch Api Function Name: " << api_name << '\n';
299+
}
300+
259301
inline void DumpRecord(pti_view_record_overhead* record) {
260302
if (NULL == record) return;
261303
std::cout << "Overhead Kind : " << ptiViewOverheadKindToString(record->_overhead_kind) << '\n';

sdk/src/gen_tracing_callbacks.py

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@ def gen_exit_callback(
633633
ze_gen_func_list,
634634
existing_apiid_dict,
635635
regen_api_files,
636+
synchronization_viewkind_api_list,
636637
):
637638
cat_key = ("driver", "levelzero")
638639
existing_apiids = existing_apiid_dict.get(cat_key, {})
@@ -643,6 +644,7 @@ def gen_exit_callback(
643644

644645
f.write(" [[maybe_unused]] uint64_t end_time_host = 0;\n")
645646
f.write(" end_time_host = utils::GetTime();\n")
647+
f.write(" ze_instance_data.end_time_host = end_time_host;\n")
646648

647649
cb = get_kernel_tracing_callback("OnExit" + func[2:])
648650

@@ -660,18 +662,37 @@ def gen_exit_callback(
660662
f.write(" kids.push_back(ze_instance_data.kid);\n")
661663
f.write(" }\n")
662664

665+
if func in synchronization_viewkind_api_list:
666+
f.write(" uint64_t synch_corrid = UniCorrId::GetUniCorrId();\n")
667+
663668
if cb != "":
664669
f.write(" if (collector->options_.kernel_tracing) { \n")
665670
if (func in submission_func_list) or (func in synchronize_func_list_on_exit):
666-
f.write(
667-
" "
668-
+ cb
669-
+ "(params, result, global_data, instance_user_data, &kids); \n"
670-
)
671+
if func in synchronization_viewkind_api_list:
672+
f.write(
673+
" "
674+
+ cb
675+
+ "(params, result, global_data, instance_user_data, &kids, synch_corrid); \n"
676+
)
677+
else:
678+
f.write(
679+
" "
680+
+ cb
681+
+ "(params, result, global_data, instance_user_data, &kids); \n"
682+
)
671683
else:
672-
f.write(
673-
" " + cb + "(params, result, global_data, instance_user_data); \n"
674-
)
684+
if func in synchronization_viewkind_api_list:
685+
f.write(
686+
" "
687+
+ cb
688+
+ "(params, result, global_data, instance_user_data, synch_corrid); \n"
689+
)
690+
else:
691+
f.write(
692+
" "
693+
+ cb
694+
+ "(params, result, global_data, instance_user_data); \n"
695+
)
675696
f.write(" }\n")
676697

677698
f.write("\n")
@@ -723,7 +744,10 @@ def gen_exit_callback(
723744
f.write(" rec.cid_ = UniCorrId::GetUniCorrId();\n")
724745
f.write(" }\n")
725746
else:
726-
f.write(" rec.cid_ = UniCorrId::GetUniCorrId();\n")
747+
if func in synchronization_viewkind_api_list:
748+
f.write(" rec.cid_ = synch_corrid;\n")
749+
else:
750+
f.write(" rec.cid_ = UniCorrId::GetUniCorrId();\n")
727751
if func in km_rt_func_list:
728752
f.write(" sycl_data_kview.cid_ = 0;\n")
729753
f.write(" sycl_data_mview.cid_ = 0;\n")
@@ -750,6 +774,7 @@ def gen_callbacks(
750774
ze_gen_func_list,
751775
existing_apiid_dict,
752776
regen_api_files,
777+
synchronization_viewkind_api_list,
753778
):
754779
for func in func_param_dict.keys():
755780
# print ("+++ Function : ", func)
@@ -779,6 +804,7 @@ def gen_callbacks(
779804
ze_gen_func_list,
780805
existing_apiid_dict,
781806
regen_api_files,
807+
synchronization_viewkind_api_list,
782808
)
783809
f.write("}\n")
784810
f.write("\n")
@@ -926,6 +952,7 @@ def main():
926952
"zeCommandListAppendMemoryFill",
927953
"zeCommandListAppendBarrier",
928954
"zeCommandListAppendMemoryRangesBarrier",
955+
# "zeContextSystemBarrier",
929956
"zeCommandListAppendMemoryCopyRegion",
930957
"zeCommandListAppendMemoryCopyFromContext",
931958
"zeCommandListAppendImageCopy",
@@ -945,6 +972,7 @@ def main():
945972
"zeKernelSetGroupSize",
946973
"zeKernelDestroy",
947974
"zeEventHostSynchronize",
975+
"zeFenceCreate",
948976
"zeFenceHostSynchronize",
949977
"zeEventQueryStatus",
950978
"zeCommandListHostSynchronize",
@@ -998,6 +1026,16 @@ def main():
9981026
"zeCommandQueueSynchronize",
9991027
]
10001028

1029+
synchronization_viewkind_api_list = [
1030+
"zeFenceHostSynchronize",
1031+
"zeCommandListAppendBarrier",
1032+
"zeCommandListAppendMemoryRangesBarrier",
1033+
# "zeContextSystemBarrier",
1034+
"zeEventHostSynchronize",
1035+
"zeCommandQueueSynchronize",
1036+
"zeCommandListHostSynchronize",
1037+
]
1038+
10011039
exclude_from_epilogue_list = []
10021040

10031041
exclude_from_prologue_list = ["zeCommandListHostSynchronize"]
@@ -1053,6 +1091,7 @@ def main():
10531091
ze_gen_func_list,
10541092
existing_apiid_dict,
10551093
regen_api_files,
1094+
synchronization_viewkind_api_list,
10561095
)
10571096
gen_api(
10581097
dst_file,

0 commit comments

Comments
 (0)