@@ -88,6 +88,12 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
8888 break ;
8989 }
9090
91+ in32_t thread_last = num_threads - 1 ;
92+ for (; thread_last >= 0 ; --thread_last) {
93+ if (warp.tmask .test (thread_last))
94+ break ;
95+ }
96+
9197 std::vector<reg_data_t [3 ]> rsdata (num_threads);
9298 std::vector<reg_data_t > rddata (num_threads);
9399
@@ -661,7 +667,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
661667 continue ;
662668 rddata[t].i = next_pc;
663669 }
664- next_pc = rsdata[thread_start ][0 ].i + immsrc;
670+ next_pc = rsdata[thread_last ][0 ].i + immsrc;
665671 trace->fetch_stall = true ;
666672 rd_write = true ;
667673 break ;
@@ -1306,7 +1312,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
13061312 trace->fetch_stall = true ;
13071313 next_tmask.reset ();
13081314 for (uint32_t t = 0 ; t < num_threads; ++t) {
1309- next_tmask.set (t, rsdata.at (thread_start )[0 ].i & (1 << t));
1315+ next_tmask.set (t, rsdata.at (thread_last )[0 ].i & (1 << t));
13101316 }
13111317 } break ;
13121318 case 1 : {
@@ -1316,7 +1322,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
13161322 trace->used_iregs .set (rsrc0);
13171323 trace->used_iregs .set (rsrc1);
13181324 trace->fetch_stall = true ;
1319- trace->data = std::make_shared<SFUTraceData>(rsdata.at (thread_start )[0 ].i , rsdata.at (thread_start )[1 ].i );
1325+ trace->data = std::make_shared<SFUTraceData>(rsdata.at (thread_last )[0 ].i , rsdata.at (thread_last )[1 ].i );
13201326 } break ;
13211327 case 2 : {
13221328 // SPLIT
@@ -1366,7 +1372,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
13661372 trace->used_iregs .set (rsrc0);
13671373 trace->fetch_stall = true ;
13681374
1369- auto stack_ptr = warp.ireg_file .at (thread_start ).at (rsrc0);
1375+ auto stack_ptr = warp.ireg_file .at (thread_last ).at (rsrc0);
13701376 if (stack_ptr != warp.ipdom_stack .size ()) {
13711377 if (warp.ipdom_stack .empty ()) {
13721378 std::cout << " IPDOM stack is empty!\n " << std::flush;
@@ -1386,7 +1392,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
13861392 trace->used_iregs .set (rsrc0);
13871393 trace->used_iregs .set (rsrc1);
13881394 trace->fetch_stall = true ;
1389- trace->data = std::make_shared<SFUTraceData>(rsdata[thread_start ][0 ].i , rsdata[thread_start ][1 ].i );
1395+ trace->data = std::make_shared<SFUTraceData>(rsdata[thread_last ][0 ].i , rsdata[thread_last ][1 ].i );
13901396 } break ;
13911397 case 5 : {
13921398 // PRED
@@ -1404,7 +1410,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
14041410 if (pred.any ()) {
14051411 next_tmask &= pred;
14061412 } else {
1407- next_tmask = warp.ireg_file .at (thread_start ).at (rsrc1);
1413+ next_tmask = warp.ireg_file .at (thread_last ).at (rsrc1);
14081414 }
14091415 } break ;
14101416 default :
0 commit comments