Skip to content

Commit 97a7d09

Browse files
committed
ir: FIX eliminateCopies with loops, fix Slot replacements
1 parent 91182f0 commit 97a7d09

File tree

4 files changed

+143
-83
lines changed

4 files changed

+143
-83
lines changed

ir/context.c2

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -503,11 +503,11 @@ public fn Ref Context.addStackSlot(Context* c, u32 align, Ref size) {
503503
u32 idx = c.b.slot_idx;
504504
c.b.slot_idx++;
505505
assert(c.b.slot_idx < 65536); // to fit in u16
506+
// TODO use Temp (or convert after SSA creation)
507+
//Ref out.init(RefKind.Temp, c.b.tmp_info.instructions.getCount());
506508
Ref out.init(RefKind.Slot, idx);
507509
Instr* i = c.b.tmp_info.instructions.add();
508510

509-
// TODO pass instr idx (global)
510-
511511
InstrKind kind;
512512
switch (align) {
513513
case 1:
@@ -604,7 +604,7 @@ fn void Context.finalizeFunction(Context* c, SymbolId id) {
604604
checkDest(c.b.tmp_info, 0);
605605

606606
//dump_function(c.b.tmp_info, name);
607-
if (print) c.print_function(symbol, c.b.tmp_info, "after check dest");
607+
//if (print) c.print_function(symbol, c.b.tmp_info, "after check dest");
608608

609609
// move used blocks to new list, drop unused blocks
610610

@@ -717,6 +717,7 @@ fn void Context.finalizeFunction(Context* c, SymbolId id) {
717717

718718
//dump_function(fi2, name);
719719
//c.generate_graphviz(fi2, name);
720+
// TODO only print if changed
720721
if (print) c.print_function(symbol, fi2, "after reorder");
721722

722723
c.create_ssa(symbol, fi2, name, print);
@@ -732,12 +733,14 @@ fn void Context.finalizeFunction(Context* c, SymbolId id) {
732733
//dump_function(fi2, name);
733734
if (print) c.print_function(symbol, fi2, "after eliminate copies");
734735

735-
736736
//c.propagateConstants(fi2);
737737
//if (print) c.print_function(symbol, fi2, "after propagate constants");
738-
t.fillUsage(fi2);
739-
//if (print) c.print_function(symbol, fi2, "after fill usage");
738+
t.removeUnused(fi2);
739+
//if (print) c.print_function(symbol, fi2, "after remove unused");
740740

741+
// propagate constants
742+
// also do simple optimizations: remove add 0, mult 1, mult 0, etc
743+
// TODO
741744

742745
t.removeNone(fi2);
743746
if (print) c.print_function(symbol, fi2, "after remove none");

ir/instr.c2

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -123,21 +123,6 @@ fn bool Instr.isArithmetic(const Instr* i) @(unused) {
123123
return k >= InstrKind.Add && k <= InstrKind.Shl;
124124
}
125125

126-
fn bool Instr.isAlloc(const Instr* i) {
127-
InstrKind k = i.getKind();
128-
return k >= InstrKind.Alloc1 && k <= InstrKind.Alloc8;
129-
}
130-
131-
fn bool Instr.isLoad(const Instr* i) {
132-
InstrKind k = i.getKind();
133-
return k >= InstrKind.Load1 && k <= InstrKind.Load8;
134-
}
135-
136-
fn bool Instr.isLoadOrStoreAlloc(const Instr* i) {
137-
InstrKind k = i.getKind();
138-
return k >= InstrKind.Load1 && k <= InstrKind.Alloc8;
139-
}
140-
141126
fn bool Instr.isPhi(const Instr* i) {
142127
InstrKind k = i.getKind();
143128
return k == InstrKind.Phi;

ir/slot_collector.c2

Lines changed: 91 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ fn BlockIndex* SlotCollector.getBlock(const SlotCollector* c, BlockId blk_id) {
6161
fn void SlotCollector.create(SlotCollector* c) {
6262
memset(c, 0, sizeof(SlotCollector));
6363
// TEMP hardcoded max
64+
6465
c.reads = stdlib.malloc(SlotMax * sizeof(ReadSlotAccess));
6566
c.writes = stdlib.malloc(SlotMax * sizeof(WriteSlotAccess));
6667
c.writes2 = stdlib.malloc(SlotMax * sizeof(WriteSlotAccess));
@@ -84,6 +85,9 @@ fn void SlotCollector.fill(SlotCollector* c, FunctionInfo* info) {
8485
c.num_blocks = info.blocks.getCount();
8586
c.info = info;
8687

88+
// find all slots used outside load/stores, dont remove those
89+
// also convert slot x -> temp y for non-removed allocs
90+
8791
for (u32 i=0; i<c.num_blocks; i++) {
8892
c.checkBlock(i);
8993
}
@@ -116,13 +120,33 @@ fn const WriteSlotAccess* SlotCollector.findWrite(const SlotCollector* c, BlockI
116120
return nil;
117121
}
118122

123+
fn void SlotCollector.checkRef(const SlotCollector* c, Ref* r, u32 start) {
124+
if (!r.isSlot()) return;
125+
126+
u16 slot = cast<u16>(r.value);
127+
// check for writes
128+
for (u32 i=start; i<c.write_idx; i++) {
129+
if (c.writes[i].slot == slot) {
130+
// replace slot-ref with write-ref
131+
*r = c.writes[i].ref;
132+
return;
133+
}
134+
}
135+
}
136+
119137
fn void SlotCollector.checkBlock(SlotCollector* c, BlockId blk_id) {
120138
/*
121-
Algorithm:
122-
- store first read per slot in block (if not after write)
123-
- store last write per slot
124-
- replace slot writes with None
125-
- replace slot read with last write
139+
Algorithm, for each block:
140+
- store last store per slot, replace slots (if we have a write)
141+
replace with copy:
142+
store8 %1, s0 -> %12 = copy %1
143+
last write s0: %12
144+
- store first load per slot in block (if not after write)
145+
- replace slot read with last write (or leave)
146+
- replace slot writes with with write+copy (or leave)
147+
148+
After this step on all blocks, re-visit all blocks:
149+
- recurse into reads/writes
126150
*/
127151
Block* b = c.info.blocks.get(blk_id);
128152
BlockIndex* bi = &c.indexes[blk_id];
@@ -135,30 +159,38 @@ fn void SlotCollector.checkBlock(SlotCollector* c, BlockId blk_id) {
135159
Instr* ii = c.info.instructions.get(b.instr.start);
136160
for (u32 i=0; i<b.instr.count; i++) {
137161
Instr* cur = &ii[i];
138-
if (!cur.isLoadOrStoreAlloc()) continue;
139162

140163
u16 idx = cast<u16>(b.instr.start + i);
141164

142-
bool found = false;
143-
if (cur.isLoad()) {
165+
switch(cur.getKind()) {
166+
case Load1:
167+
case Load2:
168+
case Load4:
169+
case Load8:
144170
if (!cur.args[0].isSlot()) continue;
145171

172+
// if non-removed alloc slot, replace with Temp
173+
146174
// convert to Copy
147175
cur.instrBits.kind = InstrKind.Copy;
148176

177+
// TODO could use bit-vector for reads/writes
178+
bool found = false;
149179
u16 slot = cast<u16>(cur.args[0].value);
150180
//printf("[%d] load %d (%s %d)\n", i, slot, cur.args[0].getKindName(), cur.args[0].value);
151181
// ignore if already written here. Cannot use findWrite yet!
152182
for (u32 j=bi.wr_start; j<c.write_idx; j++) {
153183
if (c.writes[j].slot == slot) {
154-
found = true;
155184
// replace slot-ref with write-ref
156185
cur.args[0] = c.writes[j].ref;
186+
found = true;
157187
break;
158188
}
159189
}
160190
if (found) continue;
161191

192+
// for reads without a write this block, just convert to copy <slot> and leave for later
193+
162194
// only capture first read
163195
for (u32 j=bi.rd_start; j<c.read_idx; j++) {
164196
if (c.reads[j].slot == slot) {
@@ -173,19 +205,24 @@ fn void SlotCollector.checkBlock(SlotCollector* c, BlockId blk_id) {
173205
ra.done = 0;
174206
ra.slot = slot;
175207
ra.instr_idx = idx;
176-
} else if (cur.isAlloc()) {
177-
cur.clear();
178-
if (i + 1 < b.instr.count) {
179-
Instr* next = &ii[i+1];
180-
if (next.isComment()) {
181-
next.clear();
182-
i++;
183-
}
184-
}
185-
} else {
208+
break;
209+
case Store1:
210+
case Store2:
211+
case Store4:
212+
case Store8:
186213
if (!cur.args[1].isSlot()) continue;
187214

215+
// if non-removed alloc slot, replace with Temp
216+
188217
u16 slot = cast<u16>(cur.args[1].value);
218+
219+
// convert into Copy, convert Slot to Temp, store that as last write
220+
cur.instrBits.kind = InstrKind.Copy;
221+
cur.instrBits.has_result = 1;
222+
cur.args[1].kind = RefKind.None;
223+
cur.args[1].value = 0;
224+
225+
189226
WriteSlotAccess* wa = nil;
190227
// overwrite if already written (track last write)
191228
for (u32 j=bi.wr_start; j<c.write_idx; j++) {
@@ -201,10 +238,26 @@ fn void SlotCollector.checkBlock(SlotCollector* c, BlockId blk_id) {
201238
wa.slot = slot;
202239
c.num_writes++;
203240
}
204-
205-
wa.ref = cur.args[0];
241+
wa.ref = { RefKind.Temp, idx };
206242
//printf("[%d] store %d (%s %d)\n", i, slot, wa.ref.getKindName(), wa.ref.value);
207-
243+
#if 0
244+
cur.clear();
245+
if (i + 1 < b.instr.count) {
246+
Instr* next = &ii[i+1];
247+
if (next.isComment()) {
248+
next.clear();
249+
i++;
250+
}
251+
}
252+
#endif
253+
break;
254+
case Alloc1:
255+
case Alloc2:
256+
case Alloc4:
257+
case Alloc8:
258+
// check if used outside load/store (eg in add for array index)
259+
// only remove if only used as load/store (eg. can be a register)
260+
// TODO could mark this in earlier pass
208261
cur.clear();
209262
if (i + 1 < b.instr.count) {
210263
Instr* next = &ii[i+1];
@@ -213,6 +266,22 @@ fn void SlotCollector.checkBlock(SlotCollector* c, BlockId blk_id) {
213266
i++;
214267
}
215268
}
269+
break;
270+
case Call:
271+
if (cur.args[1].isRefList()) {
272+
Ref* refs = c.info.refs.get(cur.args[1].value);
273+
u32 r = 0;
274+
while (refs[r].kind != RefKind.None) {
275+
c.checkRef(&refs[r], bi.wr_start);
276+
r++;
277+
}
278+
}
279+
break;
280+
// TODO can phi (from ternary operator) have Slot Refs?
281+
default:
282+
c.checkRef(&cur.args[0], bi.wr_start);
283+
c.checkRef(&cur.args[0], bi.wr_start);
284+
break;
216285
}
217286
}
218287

ir/ssa.c2

Lines changed: 43 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@ fn void Context.create_ssa(Context* c, const Symbol* symbol, FunctionInfo* fi, c
130130
t.revlist.build(fi);
131131
t.active_blocks.reset(num_blocks);
132132

133-
if (print) c.print_function(symbol, fi, "after collector.fill");
134133
//t.collector.dump();
135134
//t.revlist.dump();
135+
if (print) c.print_function(symbol, fi, "after collector.fill");
136136

137137
/*
138138
Algorithm for PHI insertion
@@ -314,60 +314,63 @@ fn void Tools.eliminateCopies(Tools* t, FunctionInfo* fi) {
314314
// otherwise mark in prev run and apply here
315315
t.copies.clear();
316316

317+
// Needs 2 loops, one to fill copylist, remove copies, 2nd to replace
317318
// Note: the filling could be done in previous loop already!
319+
318320
Instr* all = fi.instructions.get(0);
319-
// TODO should be done for each basic block individually? (loops)
321+
320322
for (u32 i=0; i<fi.instructions.getCount(); i++) {
321323
Instr* instr = &all[i];
322324
if (instr.isCopy()) {
323325
t.copies.add(i, instr.args[0]);
324326
//printf("add %d %s\n", i, instr.args[0].str());
325-
// TODO also check if args is already in list for nested copies
326327
instr.clear();
327-
} else {
328-
if (instr.isPhi()) {
329-
PhiClause* clauses = fi.phis.get(instr.phi_clauses.start);
330-
for (u32 j=0; j<instr.phi_clauses.count; j++) {
331-
Ref* r = &clauses[j].ref;
332-
if (r.isTemp()) {
333-
Ref* copy = t.copies.find(r.value);
334-
if (copy) {
335-
//printf("replace %s with %s\n", r.str(), copy.str());
336-
*r = *copy;
337-
}
338-
}
339-
}
340-
} else {
341-
Ref* r = &instr.args[0];
328+
}
329+
}
330+
for (u32 i=0; i<fi.instructions.getCount(); i++) {
331+
Instr* instr = &all[i];
332+
if (instr.isPhi()) {
333+
PhiClause* clauses = fi.phis.get(instr.phi_clauses.start);
334+
for (u32 j=0; j<instr.phi_clauses.count; j++) {
335+
Ref* r = &clauses[j].ref;
342336
if (r.isTemp()) {
343337
Ref* copy = t.copies.find(r.value);
344338
if (copy) {
345339
//printf("replace %s with %s\n", r.str(), copy.str());
346-
instr.args[0] = *copy;
340+
*r = *copy;
347341
}
348342
}
349-
r = &instr.args[1];
350-
if (r.isTemp()) {
351-
Ref* copy = t.copies.find(r.value);
352-
if (copy) {
353-
//printf("replace %s with %s\n", r.str(), copy.str());
354-
instr.args[1] = *copy;
355-
}
356-
} else if (r.isRefList()) { // can only occur on args[1]
357-
Ref* refs = fi.refs.get(r.value);
358-
u32 idx = 0;
359-
while (1) {
360-
Ref* r2 = &refs[idx];
361-
if (r2.isTemp()) {
362-
Ref* copy = t.copies.find(r2.value);
363-
if (copy) {
364-
//printf("replace %s with %s\n", refs[idx].str(), copy.str());
365-
refs[idx] = *copy;
366-
}
343+
}
344+
} else {
345+
Ref* r = &instr.args[0];
346+
if (r.isTemp()) {
347+
Ref* copy = t.copies.find(r.value);
348+
if (copy) {
349+
//printf("replace %s with %s\n", r.str(), copy.str());
350+
instr.args[0] = *copy;
351+
}
352+
}
353+
r = &instr.args[1];
354+
if (r.isTemp()) {
355+
Ref* copy = t.copies.find(r.value);
356+
if (copy) {
357+
//printf("replace %s with %s\n", r.str(), copy.str());
358+
instr.args[1] = *copy;
359+
}
360+
} else if (r.isRefList()) { // can only occur on args[1]
361+
Ref* refs = fi.refs.get(r.value);
362+
u32 idx = 0;
363+
while (1) {
364+
Ref* r2 = &refs[idx];
365+
if (r2.isTemp()) {
366+
Ref* copy = t.copies.find(r2.value);
367+
if (copy) {
368+
//printf("replace %s with %s\n", refs[idx].str(), copy.str());
369+
refs[idx] = *copy;
367370
}
368-
if (!r2.isValid()) break;
369-
idx++;
370371
}
372+
if (!r2.isValid()) break;
373+
idx++;
371374
}
372375
}
373376
}
@@ -379,7 +382,7 @@ fn void Tools.checkRef(Tools* t, Ref r) {
379382
if (r.isTemp()) t.usage.incr(r.value);
380383
}
381384

382-
fn void Tools.fillUsage(Tools* t, FunctionInfo* fi) {
385+
fn void Tools.removeUnused(Tools* t, FunctionInfo* fi) {
383386
t.usage.clear(fi.instructions.getCount());
384387

385388
// Usage: counter both usage and if it has a result.

0 commit comments

Comments
 (0)