Skip to content

Commit 9b79d60

Browse files
committed
minor update
1 parent d99aaf3 commit 9b79d60

File tree

5 files changed

+23
-16
lines changed

5 files changed

+23
-16
lines changed

ci/travis_run.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def monitor(stop_event):
3636
sys.stdout.flush()
3737
elapsed_time = 0
3838

39-
def execute_verbose(command):
39+
def execute(command):
4040
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
4141
while True:
4242
output = process.stdout.readline()
@@ -49,15 +49,9 @@ def execute_verbose(command):
4949
process.stdout.flush()
5050
ret = process.poll()
5151
if ret is not None:
52-
print(" + exitcode="+str(ret))
5352
return ret
5453
return -1
5554

56-
def execute(command):
57-
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
58-
ret = process.wait()
59-
return ret
60-
6155
def main(argv):
6256
if not argv:
6357
print("Usage: travis_run.py <command>")

hw/rtl/core/VX_trace.vh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@
8282
`INST_ALU_XOR: `TRACE(level, ("XOR"));
8383
`INST_ALU_OR: `TRACE(level, ("OR"));
8484
`INST_ALU_AND: `TRACE(level, ("AND"));
85-
`INST_ALU_CZEQ: `TRACE(level, ("CZEQ"));
86-
`INST_ALU_CZNE: `TRACE(level, ("CZNE"));
85+
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
86+
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
8787
default: `TRACE(level, ("?"));
8888
endcase
8989
end

sim/simx/execute.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
8888
break;
8989
}
9090

91+
in32_t thread_last = num_threads - 1;
92+
for (; thread_last >= 0; --thread_last) {
93+
if (warp.tmask.test(thread_last))
94+
break;
95+
}
96+
9197
std::vector<reg_data_t[3]> rsdata(num_threads);
9298
std::vector<reg_data_t> rddata(num_threads);
9399

@@ -661,7 +667,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
661667
continue;
662668
rddata[t].i = next_pc;
663669
}
664-
next_pc = rsdata[thread_start][0].i + immsrc;
670+
next_pc = rsdata[thread_last][0].i + immsrc;
665671
trace->fetch_stall = true;
666672
rd_write = true;
667673
break;
@@ -1306,7 +1312,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
13061312
trace->fetch_stall = true;
13071313
next_tmask.reset();
13081314
for (uint32_t t = 0; t < num_threads; ++t) {
1309-
next_tmask.set(t, rsdata.at(thread_start)[0].i & (1 << t));
1315+
next_tmask.set(t, rsdata.at(thread_last)[0].i & (1 << t));
13101316
}
13111317
} break;
13121318
case 1: {
@@ -1316,7 +1322,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
13161322
trace->used_iregs.set(rsrc0);
13171323
trace->used_iregs.set(rsrc1);
13181324
trace->fetch_stall = true;
1319-
trace->data = std::make_shared<SFUTraceData>(rsdata.at(thread_start)[0].i, rsdata.at(thread_start)[1].i);
1325+
trace->data = std::make_shared<SFUTraceData>(rsdata.at(thread_last)[0].i, rsdata.at(thread_last)[1].i);
13201326
} break;
13211327
case 2: {
13221328
// SPLIT
@@ -1366,7 +1372,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
13661372
trace->used_iregs.set(rsrc0);
13671373
trace->fetch_stall = true;
13681374

1369-
auto stack_ptr = warp.ireg_file.at(thread_start).at(rsrc0);
1375+
auto stack_ptr = warp.ireg_file.at(thread_last).at(rsrc0);
13701376
if (stack_ptr != warp.ipdom_stack.size()) {
13711377
if (warp.ipdom_stack.empty()) {
13721378
std::cout << "IPDOM stack is empty!\n" << std::flush;
@@ -1386,7 +1392,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
13861392
trace->used_iregs.set(rsrc0);
13871393
trace->used_iregs.set(rsrc1);
13881394
trace->fetch_stall = true;
1389-
trace->data = std::make_shared<SFUTraceData>(rsdata[thread_start][0].i, rsdata[thread_start][1].i);
1395+
trace->data = std::make_shared<SFUTraceData>(rsdata[thread_last][0].i, rsdata[thread_last][1].i);
13901396
} break;
13911397
case 5: {
13921398
// PRED
@@ -1404,7 +1410,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
14041410
if (pred.any()) {
14051411
next_tmask &= pred;
14061412
} else {
1407-
next_tmask = warp.ireg_file.at(thread_start).at(rsrc1);
1413+
next_tmask = warp.ireg_file.at(thread_last).at(rsrc1);
14081414
}
14091415
} break;
14101416
default:

tests/regression/Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ all:
1414
$(MAKE) -C vecaddx
1515
$(MAKE) -C sgemmx
1616
$(MAKE) -C conv3x
17+
$(MAKE) -C sgemm2x
1718

1819
run-simx:
1920
$(MAKE) -C basic run-simx
@@ -28,6 +29,7 @@ run-simx:
2829
$(MAKE) -C vecaddx run-simx
2930
$(MAKE) -C sgemmx run-simx
3031
$(MAKE) -C conv3x run-simx
32+
$(MAKE) -C sgemm2x run-simx
3133

3234
run-rtlsim:
3335
$(MAKE) -C basic run-rtlsim
@@ -42,6 +44,7 @@ run-rtlsim:
4244
$(MAKE) -C vecaddx run-rtlsim
4345
$(MAKE) -C sgemmx run-rtlsim
4446
$(MAKE) -C conv3x run-rtlsim
47+
$(MAKE) -C sgemm2x run-rtlsim
4548

4649
run-opae:
4750
$(MAKE) -C basic run-opae
@@ -56,6 +59,7 @@ run-opae:
5659
$(MAKE) -C vecaddx run-opae
5760
$(MAKE) -C sgemmx run-opae
5861
$(MAKE) -C conv3x run-opae
62+
$(MAKE) -C sgemm2x run-opae
5963

6064
clean:
6165
$(MAKE) -C basic clean
@@ -70,6 +74,7 @@ clean:
7074
$(MAKE) -C vecaddx clean
7175
$(MAKE) -C sgemmx clean
7276
$(MAKE) -C conv3x clean
77+
$(MAKE) -C sgemm2x clean
7378

7479
clean-all:
7580
$(MAKE) -C basic clean-all
@@ -84,3 +89,4 @@ clean-all:
8489
$(MAKE) -C vecaddx clean-all
8590
$(MAKE) -C sgemmx clean-all
8691
$(MAKE) -C conv3x clean-all
92+
$(MAKE) -C sgemm2x clean-all

tests/regression/sgemm2x/main.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ int main(int argc, char *argv[]) {
159159
uint32_t buf_size = size_sq * sizeof(TYPE);
160160

161161
uint32_t group_size = tile_size * tile_size;
162-
uint32_t num_groups = (size * size) / group_size;
162+
uint32_t num_groups = size_sq / group_size;
163163
uint32_t local_mem = 2 * group_size * sizeof(TYPE);
164164

165165
std::cout << "data type: " << Comparator<TYPE>::type_str() << std::endl;
@@ -177,6 +177,7 @@ int main(int argc, char *argv[]) {
177177
// check work group occupancy
178178
uint32_t max_barriers, max_localmem;
179179
RT_CHECK(vx_check_occupancy(device, group_size, &max_barriers, &max_localmem));
180+
std::cout << "occupancy: max_barriers=" << max_barriers << ", max_localmem=" << max_localmem << " bytes" << std::endl;
180181
RT_CHECK(max_barriers < 2);
181182
RT_CHECK(max_localmem < local_mem);
182183

0 commit comments

Comments
 (0)