Skip to content

Commit 69f3986

Browse files
committed
Air.Legalize: revert to loops for scalarizations
I had tried unrolling the loops to avoid requiring the `vector_store_elem` instruction, but it's arguably a problem to generate O(N) code for an operation on `@Vector(N, T)`. In addition, that lowering emitted a lot of `.aggregate_init` instructions, which is itself a quite difficult operation to codegen. This requires reintroducing runtime vector indexing internally. However, I've put it in a couple of instructions which are intended only for use by `Air.Legalize`, named `legalize_vec_elem_val` (like `array_elem_val`, but for indexing a vector with a runtime-known index) and `legalize_vec_store_elem` (like the old `vector_store_elem` instruction). These are explicitly documented as *not* being emitted by Sema, so need only be implemented by backends if they actually use an `Air.Legalize.Feature` which emits them (otherwise they can be marked as `unreachable`).
1 parent 99a7884 commit 69f3986

File tree

14 files changed

+1548
-301
lines changed

14 files changed

+1548
-301
lines changed

src/Air.zig

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -660,8 +660,8 @@ pub const Inst = struct {
660660
/// Given a pointer to a slice, return a pointer to the pointer of the slice.
661661
/// Uses the `ty_op` field.
662662
ptr_slice_ptr_ptr,
663-
/// Given an (array value or vector value) and element index,
664-
/// return the element value at that index.
663+
/// Given an (array value or vector value) and element index, return the element value at
664+
/// that index. If the lhs is a vector value, the index is guaranteed to be comptime-known.
665665
/// Result type is the element type of the array operand.
666666
/// Uses the `bin_op` field.
667667
array_elem_val,
@@ -915,6 +915,26 @@ pub const Inst = struct {
915915
/// Operand is unused and set to Ref.none
916916
work_group_id,
917917

918+
// The remaining instructions are not emitted by Sema. They are only emitted by `Legalize`,
919+
// depending on the enabled features. As such, backends can consider them `unreachable` if
920+
// they do not enable the relevant legalizations.
921+
922+
/// Given a pointer to a vector, a runtime-known index, and a scalar value, store the value
923+
/// into the vector at the given index. Zig does not support this operation, but `Legalize`
924+
/// may emit it when scalarizing vector operations.
925+
///
926+
/// Uses the `pl_op` field with payload `Bin`. `operand` is the vector pointer. `lhs` is the
927+
/// element index of type `usize`. `rhs` is the element value. Result is always void.
928+
legalize_vec_store_elem,
929+
/// Given a vector value and a runtime-known index, return the element value at that index.
930+
/// This instruction is similar to `array_elem_val`; the only difference is that the index
931+
/// here is runtime-known, which is usually not allowed for vectors. `Legalize` may emit
932+
/// this instruction when scalarizing vector operations.
933+
///
934+
/// Uses the `bin_op` field. `lhs` is the vector pointer. `rhs` is the element index. Result
935+
/// type is the vector element type.
936+
legalize_vec_elem_val,
937+
918938
pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag {
919939
switch (op) {
920940
.lt => return if (optimized) .cmp_lt_optimized else .cmp_lt,
@@ -1681,6 +1701,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
16811701
.prefetch,
16821702
.set_err_return_trace,
16831703
.c_va_end,
1704+
.legalize_vec_store_elem,
16841705
=> return .void,
16851706

16861707
.slice_len,
@@ -1699,7 +1720,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
16991720
return .fromInterned(ip.funcTypeReturnType(callee_ty.toIntern()));
17001721
},
17011722

1702-
.slice_elem_val, .ptr_elem_val, .array_elem_val => {
1723+
.slice_elem_val, .ptr_elem_val, .array_elem_val, .legalize_vec_elem_val => {
17031724
const ptr_ty = air.typeOf(datas[@intFromEnum(inst)].bin_op.lhs, ip);
17041725
return ptr_ty.childTypeIp(ip);
17051726
},
@@ -1857,6 +1878,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
18571878
.intcast_safe,
18581879
.int_from_float_safe,
18591880
.int_from_float_optimized_safe,
1881+
.legalize_vec_store_elem,
18601882
=> true,
18611883

18621884
.add,
@@ -2002,6 +2024,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
20022024
.work_item_id,
20032025
.work_group_size,
20042026
.work_group_id,
2027+
.legalize_vec_elem_val,
20052028
=> false,
20062029

20072030
.is_non_null_ptr, .is_null_ptr, .is_non_err_ptr, .is_err_ptr => air.typeOf(data.un_op, ip).isVolatilePtrIp(ip),

src/Air/Legalize.zig

Lines changed: 818 additions & 288 deletions
Large diffs are not rendered by default.

src/Air/Liveness.zig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,7 @@ fn analyzeInst(
458458
.memset_safe,
459459
.memcpy,
460460
.memmove,
461+
.legalize_vec_elem_val,
461462
=> {
462463
const o = inst_datas[@intFromEnum(inst)].bin_op;
463464
return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none });
@@ -769,6 +770,12 @@ fn analyzeInst(
769770
const pl_op = inst_datas[@intFromEnum(inst)].pl_op;
770771
return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, .none, .none });
771772
},
773+
774+
.legalize_vec_store_elem => {
775+
const pl_op = inst_datas[@intFromEnum(inst)].pl_op;
776+
const bin = a.air.extraData(Air.Bin, pl_op.payload).data;
777+
return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, bin.lhs, bin.rhs });
778+
},
772779
}
773780
}
774781

src/Air/Liveness/Verify.zig

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
272272
.memset_safe,
273273
.memcpy,
274274
.memmove,
275+
.legalize_vec_elem_val,
275276
=> {
276277
const bin_op = data[@intFromEnum(inst)].bin_op;
277278
try self.verifyInstOperands(inst, .{ bin_op.lhs, bin_op.rhs, .none });
@@ -577,6 +578,11 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
577578

578579
try self.verifyInst(inst);
579580
},
581+
.legalize_vec_store_elem => {
582+
const pl_op = data[@intFromEnum(inst)].pl_op;
583+
const bin = self.air.extraData(Air.Bin, pl_op.payload).data;
584+
try self.verifyInstOperands(inst, .{ pl_op.operand, bin.lhs, bin.rhs });
585+
},
580586
}
581587
}
582588
}

src/Air/print.zig

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ const Writer = struct {
171171
.memmove,
172172
.memset,
173173
.memset_safe,
174+
.legalize_vec_elem_val,
174175
=> try w.writeBinOp(s, inst),
175176

176177
.is_null,
@@ -331,6 +332,7 @@ const Writer = struct {
331332
.reduce, .reduce_optimized => try w.writeReduce(s, inst),
332333
.cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst),
333334
.runtime_nav_ptr => try w.writeRuntimeNavPtr(s, inst),
335+
.legalize_vec_store_elem => try w.writeLegalizeVecStoreElem(s, inst),
334336

335337
.work_item_id,
336338
.work_group_size,
@@ -508,6 +510,18 @@ const Writer = struct {
508510
try w.writeOperand(s, inst, 2, pl_op.operand);
509511
}
510512

513+
fn writeLegalizeVecStoreElem(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void {
514+
const pl_op = w.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
515+
const bin = w.air.extraData(Air.Bin, pl_op.payload).data;
516+
517+
try w.writeOperand(s, inst, 0, pl_op.operand);
518+
try s.writeAll(", ");
519+
try w.writeOperand(s, inst, 1, bin.lhs);
520+
try s.writeAll(", ");
521+
try w.writeOperand(s, inst, 2, bin.rhs);
522+
try s.writeAll(", ");
523+
}
524+
511525
fn writeShuffleOne(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void {
512526
const unwrapped = w.air.unwrapShuffleOne(w.pt.zcu, inst);
513527
try w.writeType(s, unwrapped.result_ty);

src/Air/types_resolved.zig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
8888
.atomic_store_monotonic,
8989
.atomic_store_release,
9090
.atomic_store_seq_cst,
91+
.legalize_vec_elem_val,
9192
=> {
9293
if (!checkRef(data.bin_op.lhs, zcu)) return false;
9394
if (!checkRef(data.bin_op.rhs, zcu)) return false;
@@ -322,6 +323,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
322323

323324
.select,
324325
.mul_add,
326+
.legalize_vec_store_elem,
325327
=> {
326328
const bin = air.extraData(Air.Bin, data.pl_op.payload).data;
327329
if (!checkRef(data.pl_op.operand, zcu)) return false;

src/Sema.zig

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15930,16 +15930,21 @@ fn zirOverflowArithmetic(
1593015930
}
1593115931
}
1593215932
// If either of the arguments is one, the result is the other and no overflow occured.
15933-
const scalar_one = try pt.intValue(dest_ty.scalarType(zcu), 1);
15934-
const vec_one = try sema.splat(dest_ty, scalar_one);
15935-
if (maybe_lhs_val) |lhs_val| {
15936-
if (!lhs_val.isUndef(zcu) and try sema.compareAll(lhs_val, .eq, vec_one, dest_ty)) {
15937-
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs };
15933+
const dest_scalar_ty = dest_ty.scalarType(zcu);
15934+
const dest_scalar_int = dest_scalar_ty.intInfo(zcu);
15935+
// We could still be working with i1, where '1' is not a legal value!
15936+
if (!(dest_scalar_int.bits == 1 and dest_scalar_int.signedness == .signed)) {
15937+
const scalar_one = try pt.intValue(dest_scalar_ty, 1);
15938+
const vec_one = try sema.splat(dest_ty, scalar_one);
15939+
if (maybe_lhs_val) |lhs_val| {
15940+
if (!lhs_val.isUndef(zcu) and try sema.compareAll(lhs_val, .eq, vec_one, dest_ty)) {
15941+
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs };
15942+
}
1593815943
}
15939-
}
15940-
if (maybe_rhs_val) |rhs_val| {
15941-
if (!rhs_val.isUndef(zcu) and try sema.compareAll(rhs_val, .eq, vec_one, dest_ty)) {
15942-
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs };
15944+
if (maybe_rhs_val) |rhs_val| {
15945+
if (!rhs_val.isUndef(zcu) and try sema.compareAll(rhs_val, .eq, vec_one, dest_ty)) {
15946+
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs };
15947+
}
1594315948
}
1594415949
}
1594515950

src/codegen/aarch64/Select.zig

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,10 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void {
134134
var air_inst_index = air_body[air_body_index];
135135
const initial_def_order_len = isel.def_order.count();
136136
air_tag: switch (air_tags[@intFromEnum(air_inst_index)]) {
137+
// No "scalarize" legalizations are enabled, so these instructions never appear.
138+
.legalize_vec_elem_val => unreachable,
139+
.legalize_vec_store_elem => unreachable,
140+
137141
.arg,
138142
.ret_addr,
139143
.frame_addr,
@@ -950,6 +954,11 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory,
950954
};
951955
air_tag: switch (air.next().?) {
952956
else => |air_tag| return isel.fail("unimplemented {t}", .{air_tag}),
957+
958+
// No "scalarize" legalizations are enabled, so these instructions never appear.
959+
.legalize_vec_elem_val => unreachable,
960+
.legalize_vec_store_elem => unreachable,
961+
953962
.arg => {
954963
const arg_vi = isel.live_values.fetchRemove(air.inst_index).?.value;
955964
defer arg_vi.deref(isel);

src/codegen/c.zig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3325,6 +3325,10 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) Error!void {
33253325
// zig fmt: off
33263326
.inferred_alloc, .inferred_alloc_comptime => unreachable,
33273327
3328+
// No "scalarize" legalizations are enabled, so these instructions never appear.
3329+
.legalize_vec_elem_val => unreachable,
3330+
.legalize_vec_store_elem => unreachable,
3331+
33283332
.arg => try airArg(f, inst),
33293333
33303334
.breakpoint => try airBreakpoint(f),

src/codegen/llvm.zig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4886,6 +4886,11 @@ pub const FuncGen = struct {
48864886

48874887
const val: Builder.Value = switch (air_tags[@intFromEnum(inst)]) {
48884888
// zig fmt: off
4889+
4890+
// No "scalarize" legalizations are enabled, so these instructions never appear.
4891+
.legalize_vec_elem_val => unreachable,
4892+
.legalize_vec_store_elem => unreachable,
4893+
48894894
.add => try self.airAdd(inst, .normal),
48904895
.add_optimized => try self.airAdd(inst, .fast),
48914896
.add_wrap => try self.airAddWrap(inst),

0 commit comments

Comments
 (0)