3434 * cpp - DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm - P - o gfx11.sp3
3535 * sp3 gfx11.sp3 - hex gfx11.hex
3636 *
37- * gfx12:
38- * cpp - DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm - P - o gfx12.sp3
39- * sp3 gfx12.sp3 - hex gfx12.hex
4037 * /
4138
4239#define CHIP_NAVI10 26
4340#define CHIP_SIENNA_CICHLID 30
4441#define CHIP_PLUM_BONITO 36
45- #define CHIP_GFX12 37
4642
4743#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
4844#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
4945#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
5046#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
51- #define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO && ASIC_FAMILY < CHIP_GFX12 )
47+ #define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO)
5248#define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY= 0 , for debugger
5349#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
5450
55- #if ASIC_FAMILY < CHIP_GFX12
5651#define S_COHERENCE glc: 1
5752#define V_COHERENCE slc: 1 glc: 1
5853#define S_WAITCNT_0 s_waitcnt 0
59- #else
60- #define S_COHERENCE scope:SCOPE_SYS
61- #define V_COHERENCE scope:SCOPE_SYS
62- #define S_WAITCNT_0 s_wait_idle
63-
64- #define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO
65- #define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI
66- #define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC
67- #define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC
68- #define HW_REG_MODE HW_REG_WAVE_MODE
69- #endif
7054
71- #if ASIC_FAMILY < CHIP_GFX12
7255var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
7356var SQ_WAVE_STATUS_HALT_MASK = 0x2000
7457var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
@@ -81,21 +64,6 @@ var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_E
8164var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK
8265var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
8366var S_SAVE_PC_HI_HT_MASK = 0x01000000
84- #else
85- var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
86- var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
87- var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
88- var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
89- var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
90- var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
91- var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
92- var SQ_WAVE_STATUS_WAVE64_SIZE = 1
93- var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
94- var S_STATUS_HWREG = HW_REG_WAVE_STATE_PRIV
95- var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
96- var S_STATUS_HALT_MASK = SQ_WAVE_STATE_PRIV_HALT_MASK
97- var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
98- #endif
9967
10068var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
10169var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
@@ -110,7 +78,6 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
11078var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
11179#endif
11280
113- #if ASIC_FAMILY < CHIP_GFX12
11481var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
11582var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
11683var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
@@ -161,39 +128,6 @@ var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
161128var S_TRAPSTS_HWREG = HW_REG_TRAPSTS
162129var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK
163130var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
164- #else
165- var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
166- var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
167- var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
168- var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
169- var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
170- var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
171- var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
172- var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
173- var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
174- var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
175- var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
176- var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
177- var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
178- var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
179-
180- var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV
181- var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
182- var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
183- var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
184- SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
185- SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
186- SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
187- SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
188- SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
189- var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
190- var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
191- var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
192- var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
193- var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
194- var BARRIER_STATE_SIGNAL_OFFSET = 16
195- var BARRIER_STATE_VALID_OFFSET = 0
196- #endif
197131
198132// bits [ 31 : 24 ] unused by SPI debug data
199133var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
@@ -305,11 +239,7 @@ L_TRAP_NO_BARRIER:
305239
306240L_HALTED:
307241 // Host trap may occur while wave is halted.
308- #if ASIC_FAMILY < CHIP_GFX12
309242 s_and_b32 ttmp2 , s_save_pc_hi , S_SAVE_PC_HI_TRAP_ID_MASK
310- #else
311- s_and_b32 ttmp2 , s_save_trapsts , SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
312- #endif
313243 s_cbranch_scc1 L_FETCH_2ND_TRAP
314244
315245L_CHECK_SAVE:
@@ -336,7 +266,6 @@ L_NOT_HALTED:
336266 // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
337267 // Maskable exceptions only cause the wave to enter the trap handler if
338268 // their respective bit in mode.excp_en is set.
339- #if ASIC_FAMILY < CHIP_GFX12
340269 s_and_b32 ttmp2 , s_save_trapsts , SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
341270 s_cbranch_scc0 L_CHECK_TRAP_ID
342271
@@ -349,17 +278,6 @@ L_NOT_ADDR_WATCH:
349278 s_lshl_b32 ttmp2 , ttmp2 , SQ_WAVE_MODE_EXCP_EN_SHIFT
350279 s_and_b32 ttmp2 , ttmp2 , ttmp3
351280 s_cbranch_scc1 L_FETCH_2ND_TRAP
352- #else
353- s_getreg_b32 ttmp2 , hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
354- s_and_b32 ttmp3 , s_save_trapsts , SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
355- s_cbranch_scc0 L_NOT_ADDR_WATCH
356- s_or_b32 ttmp2 , ttmp2 , SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
357-
358- L_NOT_ADDR_WATCH:
359- s_getreg_b32 ttmp3 , hwreg(HW_REG_WAVE_TRAP_CTRL)
360- s_and_b32 ttmp2 , ttmp3 , ttmp2
361- s_cbranch_scc1 L_FETCH_2ND_TRAP
362- #endif
363281
364282L_CHECK_TRAP_ID:
365283 // Check trap_id != 0
@@ -369,13 +287,8 @@ L_CHECK_TRAP_ID:
369287#if SINGLE_STEP_MISSED_WORKAROUND
370288 // Prioritize single step exception over context save.
371289 // Second - level trap will halt wave and RFE , re - entering for SAVECTX.
372- #if ASIC_FAMILY < CHIP_GFX12
373290 s_getreg_b32 ttmp2 , hwreg(HW_REG_MODE)
374291 s_and_b32 ttmp2 , ttmp2 , SQ_WAVE_MODE_DEBUG_EN_MASK
375- #else
376- // WAVE_TRAP_CTRL is already in ttmp3.
377- s_and_b32 ttmp3 , ttmp3 , SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
378- #endif
379292 s_cbranch_scc1 L_FETCH_2ND_TRAP
380293#endif
381294
@@ -425,12 +338,7 @@ L_NO_NEXT_TRAP:
425338 s_cbranch_scc1 L_TRAP_CASE
426339
427340 // Host trap will not cause trap re - entry.
428- #if ASIC_FAMILY < CHIP_GFX12
429341 s_and_b32 ttmp2 , s_save_pc_hi , S_SAVE_PC_HI_HT_MASK
430- #else
431- s_getreg_b32 ttmp2 , hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
432- s_and_b32 ttmp2 , ttmp2 , SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
433- #endif
434342 s_cbranch_scc1 L_EXIT_TRAP
435343 s_or_b32 s_save_status , s_save_status , S_STATUS_HALT_MASK
436344
@@ -457,16 +365,7 @@ L_EXIT_TRAP:
457365 s_and_b64 exec , exec , exec // Restore STATUS.EXECZ , not writable by s_setreg_b32
458366 s_and_b64 vcc , vcc , vcc // Restore STATUS.VCCZ , not writable by s_setreg_b32
459367
460- #if ASIC_FAMILY < CHIP_GFX12
461368 s_setreg_b32 hwreg(S_STATUS_HWREG) , s_save_status
462- #else
463- // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
464- // Only restore fields which the trap handler changes.
465- s_lshr_b32 s_save_status , s_save_status , SQ_WAVE_STATE_PRIV_SCC_SHIFT
466- s_setreg_b32 hwreg(S_STATUS_HWREG , SQ_WAVE_STATE_PRIV_SCC_SHIFT , \
467- SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1 ) , s_save_status
468- #endif
469-
470369 s_rfe_b64 [ ttmp0 , ttmp1 ]
471370
472371L_SAVE:
@@ -478,14 +377,6 @@ L_SAVE:
478377 s_endpgm
479378L_HAVE_VGPRS:
480379#endif
481- #if ASIC_FAMILY >= CHIP_GFX12
482- s_getreg_b32 s_save_tmp , hwreg(HW_REG_WAVE_STATUS)
483- s_bitcmp1_b32 s_save_tmp , SQ_WAVE_STATUS_NO_VGPRS_SHIFT
484- s_cbranch_scc0 L_HAVE_VGPRS
485- s_endpgm
486- L_HAVE_VGPRS:
487- #endif
488-
489380 s_and_b32 s_save_pc_hi , s_save_pc_hi , 0x0000ffff //pc [ 47 : 32 ]
490381 s_mov_b32 s_save_tmp , 0
491382 s_setreg_b32 hwreg(S_TRAPSTS_HWREG , S_TRAPSTS_SAVE_CONTEXT_SHIFT , 1 ) , s_save_tmp //clear saveCtx bit
@@ -671,19 +562,6 @@ L_SAVE_HWREG:
671562 s_mov_b32 m0 , 0x0 //Next lane of v2 to write to
672563#endif
673564
674- #if ASIC_FAMILY >= CHIP_GFX12
675- // Ensure no further changes to barrier or LDS state.
676- // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
677- s_barrier_signal - 2
678- s_barrier_wait - 2
679-
680- // Re - read final state of BARRIER_COMPLETE field for save.
681- s_getreg_b32 s_save_tmp , hwreg(S_STATUS_HWREG)
682- s_and_b32 s_save_tmp , s_save_tmp , SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
683- s_andn2_b32 s_save_status , s_save_status , SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
684- s_or_b32 s_save_status , s_save_status , s_save_tmp
685- #endif
686-
687565 write_hwreg_to_mem(s_save_m0 , s_save_buf_rsrc0 , s_save_mem_offset)
688566 write_hwreg_to_mem(s_save_pc_lo , s_save_buf_rsrc0 , s_save_mem_offset)
689567 s_andn2_b32 s_save_tmp , s_save_pc_hi , S_SAVE_PC_HI_FIRST_WAVE_MASK
@@ -707,21 +585,6 @@ L_SAVE_HWREG:
707585 s_getreg_b32 s_save_m0 , hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI)
708586 write_hwreg_to_mem(s_save_m0 , s_save_buf_rsrc0 , s_save_mem_offset)
709587
710- #if ASIC_FAMILY >= CHIP_GFX12
711- s_getreg_b32 s_save_m0 , hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
712- write_hwreg_to_mem(s_save_m0 , s_save_buf_rsrc0 , s_save_mem_offset)
713-
714- s_getreg_b32 s_save_m0 , hwreg(HW_REG_WAVE_TRAP_CTRL)
715- write_hwreg_to_mem(s_save_m0 , s_save_buf_rsrc0 , s_save_mem_offset)
716-
717- s_getreg_b32 s_save_tmp , hwreg(HW_REG_WAVE_STATUS)
718- write_hwreg_to_mem(s_save_tmp , s_save_buf_rsrc0 , s_save_mem_offset)
719-
720- s_get_barrier_state s_save_tmp , - 1
721- s_wait_kmcnt ( 0 )
722- write_hwreg_to_mem(s_save_tmp , s_save_buf_rsrc0 , s_save_mem_offset)
723- #endif
724-
725588#if NO_SQC_STORE
726589 // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
727590 s_mov_b32 exec_lo , 0xFFFF
@@ -814,9 +677,7 @@ L_SAVE_LDS_NORMAL:
814677 s_and_b32 s_save_alloc_size , s_save_alloc_size , 0xFFFFFFFF //lds_size is zero?
815678 s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
816679
817- #if ASIC_FAMILY < CHIP_GFX12
818680 s_barrier // LDS is used? wait for other waves in the same TG
819- #endif
820681 s_and_b32 s_save_tmp , s_save_pc_hi , S_SAVE_PC_HI_FIRST_WAVE_MASK
821682 s_cbranch_scc0 L_SAVE_LDS_DONE
822683
@@ -1081,11 +942,6 @@ L_RESTORE:
1081942 s_mov_b32 s_restore_buf_rsrc2 , 0 //NUM_RECORDS initial value = 0 ( in bytes)
1082943 s_mov_b32 s_restore_buf_rsrc3 , S_RESTORE_BUF_RSRC_WORD3_MISC
1083944
1084- #if ASIC_FAMILY >= CHIP_GFX12
1085- // Save s_restore_spi_init_hi for later use.
1086- s_mov_b32 s_restore_spi_init_hi_save , s_restore_spi_init_hi
1087- #endif
1088-
1089945 //determine it is wave32 or wave64
1090946 get_wave_size2(s_restore_size)
1091947
@@ -1320,9 +1176,7 @@ L_RESTORE_SGPR:
13201176 // s_barrier with MODE.DEBUG_EN= 1 , STATUS.PRIV= 1 incorrectly asserts debug exception.
13211177 // Clear DEBUG_EN before and restore MODE after the barrier.
13221178 s_setreg_imm32_b32 hwreg(HW_REG_MODE) , 0
1323- #if ASIC_FAMILY < CHIP_GFX12
13241179 s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
1325- #endif
13261180
13271181 / * restore HW registers * /
13281182L_RESTORE_HWREG:
@@ -1334,11 +1188,6 @@ L_RESTORE_HWREG:
13341188
13351189 s_mov_b32 s_restore_buf_rsrc2 , 0x1000000 //NUM_RECORDS in bytes
13361190
1337- #if ASIC_FAMILY >= CHIP_GFX12
1338- // Restore s_restore_spi_init_hi before the saved value gets clobbered.
1339- s_mov_b32 s_restore_spi_init_hi , s_restore_spi_init_hi_save
1340- #endif
1341-
13421191 read_hwreg_from_mem(s_restore_m0 , s_restore_buf_rsrc0 , s_restore_mem_offset)
13431192 read_hwreg_from_mem(s_restore_pc_lo , s_restore_buf_rsrc0 , s_restore_mem_offset)
13441193 read_hwreg_from_mem(s_restore_pc_hi , s_restore_buf_rsrc0 , s_restore_mem_offset)
@@ -1358,44 +1207,6 @@ L_RESTORE_HWREG:
13581207
13591208 s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI) , s_restore_flat_scratch
13601209
1361- #if ASIC_FAMILY >= CHIP_GFX12
1362- read_hwreg_from_mem(s_restore_tmp , s_restore_buf_rsrc0 , s_restore_mem_offset)
1363- S_WAITCNT_0
1364- s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER) , s_restore_tmp
1365-
1366- read_hwreg_from_mem(s_restore_tmp , s_restore_buf_rsrc0 , s_restore_mem_offset)
1367- S_WAITCNT_0
1368- s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL) , s_restore_tmp
1369-
1370- // Only the first wave needs to restore the workgroup barrier.
1371- s_and_b32 s_restore_tmp , s_restore_spi_init_hi , S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
1372- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
1373-
1374- // Skip over WAVE_STATUS , since there is no state to restore from it
1375- s_add_u32 s_restore_mem_offset , s_restore_mem_offset , 4
1376-
1377- read_hwreg_from_mem(s_restore_tmp , s_restore_buf_rsrc0 , s_restore_mem_offset)
1378- S_WAITCNT_0
1379-
1380- s_bitcmp1_b32 s_restore_tmp , BARRIER_STATE_VALID_OFFSET
1381- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
1382-
1383- // extract the saved signal count from s_restore_tmp
1384- s_lshr_b32 s_restore_tmp , s_restore_tmp , BARRIER_STATE_SIGNAL_OFFSET
1385-
1386- // We need to call s_barrier_signal repeatedly to restore the signal
1387- // count of the work group barrier. The member count is already
1388- // initialized with the number of waves in the work group.
1389- L_BARRIER_RESTORE_LOOP:
1390- s_and_b32 s_restore_tmp , s_restore_tmp , s_restore_tmp
1391- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
1392- s_barrier_signal - 1
1393- s_add_i32 s_restore_tmp , s_restore_tmp , - 1
1394- s_branch L_BARRIER_RESTORE_LOOP
1395-
1396- L_SKIP_BARRIER_RESTORE:
1397- #endif
1398-
13991210 s_mov_b32 m0 , s_restore_m0
14001211 s_mov_b32 exec_lo , s_restore_exec_lo
14011212 s_mov_b32 exec_hi , s_restore_exec_hi
@@ -1453,13 +1264,6 @@ L_RETURN_WITHOUT_PRIV:
14531264
14541265 s_setreg_b32 hwreg(S_STATUS_HWREG) , s_restore_status // SCC is included , which is changed by previous salu
14551266
1456- #if ASIC_FAMILY >= CHIP_GFX12
1457- // Make barrier and LDS state visible to all waves in the group.
1458- // STATE_PRIV.BARRIER_COMPLETE may change after this point.
1459- s_barrier_signal - 2
1460- s_barrier_wait - 2
1461- #endif
1462-
14631267 s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
14641268
14651269L_END_PGM:
@@ -1598,11 +1402,7 @@ function get_hwreg_size_bytes
15981402end
15991403
16001404function get_wave_size2(s_reg)
1601- #if ASIC_FAMILY < CHIP_GFX12
16021405 s_getreg_b32 s_reg , hwreg(HW_REG_IB_STS2 , SQ_WAVE_IB_STS2_WAVE64_SHIFT , SQ_WAVE_IB_STS2_WAVE64_SIZE)
1603- #else
1604- s_getreg_b32 s_reg , hwreg(HW_REG_WAVE_STATUS , SQ_WAVE_STATUS_WAVE64_SHIFT , SQ_WAVE_STATUS_WAVE64_SIZE)
1605- #endif
16061406 s_lshl_b32 s_reg , s_reg , S_WAVE_SIZE
16071407end
16081408
0 commit comments