@@ -61,6 +61,7 @@ fn BlockIndex* SlotCollector.getBlock(const SlotCollector* c, BlockId blk_id) {
6161fn void SlotCollector.create(SlotCollector* c) {
6262 memset(c, 0, sizeof(SlotCollector));
6363 // TEMP hardcoded max
64+
6465 c.reads = stdlib.malloc(SlotMax * sizeof(ReadSlotAccess));
6566 c.writes = stdlib.malloc(SlotMax * sizeof(WriteSlotAccess));
6667 c.writes2 = stdlib.malloc(SlotMax * sizeof(WriteSlotAccess));
@@ -84,6 +85,9 @@ fn void SlotCollector.fill(SlotCollector* c, FunctionInfo* info) {
8485 c.num_blocks = info.blocks.getCount();
8586 c.info = info;
8687
88+ // find all slots used outside load/stores, dont remove those
89+ // also convert slot x -> temp y for non-removed allocs
90+
8791 for (u32 i=0; i<c.num_blocks; i++) {
8892 c.checkBlock(i);
8993 }
@@ -116,13 +120,33 @@ fn const WriteSlotAccess* SlotCollector.findWrite(const SlotCollector* c, BlockI
116120 return nil;
117121}
118122
123+ fn void SlotCollector.checkRef(const SlotCollector* c, Ref* r, u32 start) {
124+ if (!r.isSlot()) return;
125+
126+ u16 slot = cast<u16>(r.value);
127+ // check for writes
128+ for (u32 i=start; i<c.write_idx; i++) {
129+ if (c.writes[i].slot == slot) {
130+ // replace slot-ref with write-ref
131+ *r = c.writes[i].ref;
132+ return;
133+ }
134+ }
135+ }
136+
119137fn void SlotCollector.checkBlock(SlotCollector* c, BlockId blk_id) {
120138 /*
121- Algorithm:
122- - store first read per slot in block (if not after write)
123- - store last write per slot
124- - replace slot writes with None
125- - replace slot read with last write
139+ Algorithm, for each block:
140+ - store last store per slot, replace slots (if we have a write)
141+ replace with copy:
142+ store8 %1, s0 -> %12 = copy %1
143+ last write s0: %12
144+ - store first load per slot in block (if not after write)
145+ - replace slot read with last write (or leave)
146+ - replace slot writes with with write+copy (or leave)
147+
148+ After this step on all blocks, re-visit all blocks:
149+ - recurse into reads/writes
126150 */
127151 Block* b = c.info.blocks.get(blk_id);
128152 BlockIndex* bi = &c.indexes[blk_id];
@@ -135,30 +159,38 @@ fn void SlotCollector.checkBlock(SlotCollector* c, BlockId blk_id) {
135159 Instr* ii = c.info.instructions.get(b.instr.start);
136160 for (u32 i=0; i<b.instr.count; i++) {
137161 Instr* cur = &ii[i];
138- if (!cur.isLoadOrStoreAlloc()) continue;
139162
140163 u16 idx = cast<u16>(b.instr.start + i);
141164
142- bool found = false;
143- if (cur.isLoad()) {
165+ switch(cur.getKind()) {
166+ case Load1:
167+ case Load2:
168+ case Load4:
169+ case Load8:
144170 if (!cur.args[0].isSlot()) continue;
145171
172+ // if non-removed alloc slot, replace with Temp
173+
146174 // convert to Copy
147175 cur.instrBits.kind = InstrKind.Copy;
148176
177+ // TODO could use bit-vector for reads/writes
178+ bool found = false;
149179 u16 slot = cast<u16>(cur.args[0].value);
150180 //printf("[%d] load %d (%s %d)\n", i, slot, cur.args[0].getKindName(), cur.args[0].value);
151181 // ignore if already written here. Cannot use findWrite yet!
152182 for (u32 j=bi.wr_start; j<c.write_idx; j++) {
153183 if (c.writes[j].slot == slot) {
154- found = true;
155184 // replace slot-ref with write-ref
156185 cur.args[0] = c.writes[j].ref;
186+ found = true;
157187 break;
158188 }
159189 }
160190 if (found) continue;
161191
192+ // for reads without a write this block, just convert to copy <slot> and leave for later
193+
162194 // only capture first read
163195 for (u32 j=bi.rd_start; j<c.read_idx; j++) {
164196 if (c.reads[j].slot == slot) {
@@ -173,19 +205,24 @@ fn void SlotCollector.checkBlock(SlotCollector* c, BlockId blk_id) {
173205 ra.done = 0;
174206 ra.slot = slot;
175207 ra.instr_idx = idx;
176- } else if (cur.isAlloc()) {
177- cur.clear();
178- if (i + 1 < b.instr.count) {
179- Instr* next = &ii[i+1];
180- if (next.isComment()) {
181- next.clear();
182- i++;
183- }
184- }
185- } else {
208+ break;
209+ case Store1:
210+ case Store2:
211+ case Store4:
212+ case Store8:
186213 if (!cur.args[1].isSlot()) continue;
187214
215+ // if non-removed alloc slot, replace with Temp
216+
188217 u16 slot = cast<u16>(cur.args[1].value);
218+
219+ // convert into Copy, convert Slot to Temp, store that as last write
220+ cur.instrBits.kind = InstrKind.Copy;
221+ cur.instrBits.has_result = 1;
222+ cur.args[1].kind = RefKind.None;
223+ cur.args[1].value = 0;
224+
225+
189226 WriteSlotAccess* wa = nil;
190227 // overwrite if already written (track last write)
191228 for (u32 j=bi.wr_start; j<c.write_idx; j++) {
@@ -201,10 +238,26 @@ fn void SlotCollector.checkBlock(SlotCollector* c, BlockId blk_id) {
201238 wa.slot = slot;
202239 c.num_writes++;
203240 }
204-
205- wa.ref = cur.args[0];
241+ wa.ref = { RefKind.Temp, idx };
206242 //printf("[%d] store %d (%s %d)\n", i, slot, wa.ref.getKindName(), wa.ref.value);
207-
243+ #if 0
244+ cur.clear();
245+ if (i + 1 < b.instr.count) {
246+ Instr* next = &ii[i+1];
247+ if (next.isComment()) {
248+ next.clear();
249+ i++;
250+ }
251+ }
252+ #endif
253+ break;
254+ case Alloc1:
255+ case Alloc2:
256+ case Alloc4:
257+ case Alloc8:
258+ // check if used outside load/store (eg in add for array index)
259+ // only remove if only used as load/store (eg. can be a register)
260+ // TODO could mark this in earlier pass
208261 cur.clear();
209262 if (i + 1 < b.instr.count) {
210263 Instr* next = &ii[i+1];
@@ -213,6 +266,22 @@ fn void SlotCollector.checkBlock(SlotCollector* c, BlockId blk_id) {
213266 i++;
214267 }
215268 }
269+ break;
270+ case Call:
271+ if (cur.args[1].isRefList()) {
272+ Ref* refs = c.info.refs.get(cur.args[1].value);
273+ u32 r = 0;
274+ while (refs[r].kind != RefKind.None) {
275+ c.checkRef(&refs[r], bi.wr_start);
276+ r++;
277+ }
278+ }
279+ break;
280+ // TODO can phi (from ternary operator) have Slot Refs?
281+ default:
282+ c.checkRef(&cur.args[0], bi.wr_start);
283+ c.checkRef(&cur.args[0], bi.wr_start);
284+ break;
216285 }
217286 }
218287
0 commit comments