master
1pt: Zcu.PerThread,
2air_instructions: std.MultiArrayList(Air.Inst),
3air_extra: std.ArrayList(u32),
4features: if (switch (dev.env) {
5 .bootstrap => @import("../codegen/c.zig").legalizeFeatures(undefined),
6 else => null,
7}) |bootstrap_features| struct {
8 fn init(features: *const Features) @This() {
9 assert(features.eql(bootstrap_features.*));
10 return .{};
11 }
12 /// `inline` to propagate comptime-known result.
13 inline fn has(_: @This(), comptime feature: Feature) bool {
14 return comptime bootstrap_features.contains(feature);
15 }
16 /// `inline` to propagate comptime-known result.
17 inline fn hasAny(_: @This(), comptime features: []const Feature) bool {
18 return comptime !bootstrap_features.intersectWith(.initMany(features)).eql(.initEmpty());
19 }
20} else struct {
21 features: *const Features,
22 /// `inline` to propagate whether `dev.check` returns.
23 inline fn init(features: *const Features) @This() {
24 dev.check(.legalize);
25 return .{ .features = features };
26 }
27 fn has(rt: @This(), comptime feature: Feature) bool {
28 return rt.features.contains(feature);
29 }
30 fn hasAny(rt: @This(), comptime features: []const Feature) bool {
31 return !rt.features.intersectWith(comptime .initMany(features)).eql(comptime .initEmpty());
32 }
33},
34
35pub const Feature = enum {
36 scalarize_add,
37 scalarize_add_safe,
38 scalarize_add_optimized,
39 scalarize_add_wrap,
40 scalarize_add_sat,
41 scalarize_sub,
42 scalarize_sub_safe,
43 scalarize_sub_optimized,
44 scalarize_sub_wrap,
45 scalarize_sub_sat,
46 scalarize_mul,
47 scalarize_mul_safe,
48 scalarize_mul_optimized,
49 scalarize_mul_wrap,
50 scalarize_mul_sat,
51 scalarize_div_float,
52 scalarize_div_float_optimized,
53 scalarize_div_trunc,
54 scalarize_div_trunc_optimized,
55 scalarize_div_floor,
56 scalarize_div_floor_optimized,
57 scalarize_div_exact,
58 scalarize_div_exact_optimized,
59 scalarize_rem,
60 scalarize_rem_optimized,
61 scalarize_mod,
62 scalarize_mod_optimized,
63 scalarize_max,
64 scalarize_min,
65 scalarize_add_with_overflow,
66 scalarize_sub_with_overflow,
67 scalarize_mul_with_overflow,
68 scalarize_shl_with_overflow,
69 scalarize_bit_and,
70 scalarize_bit_or,
71 scalarize_shr,
72 scalarize_shr_exact,
73 scalarize_shl,
74 scalarize_shl_exact,
75 scalarize_shl_sat,
76 scalarize_xor,
77 scalarize_not,
78 /// Scalarize `bitcast` from or to an array or vector type to `bitcast`s of the elements.
79 /// This does not apply if `@bitSizeOf(Elem) == 8 * @sizeOf(Elem)`.
80 /// When this feature is enabled, all remaining `bitcast`s can be lowered using the old bitcast
81 /// semantics (reinterpret memory) instead of the new bitcast semantics (copy logical bits) and
82 /// the behavior will be equivalent. However, the behavior of `@bitSize` on arrays must be
83 /// changed in `Type.zig` before enabling this feature to conform to the new bitcast semantics.
84 scalarize_bitcast,
85 scalarize_clz,
86 scalarize_ctz,
87 scalarize_popcount,
88 scalarize_byte_swap,
89 scalarize_bit_reverse,
90 scalarize_sqrt,
91 scalarize_sin,
92 scalarize_cos,
93 scalarize_tan,
94 scalarize_exp,
95 scalarize_exp2,
96 scalarize_log,
97 scalarize_log2,
98 scalarize_log10,
99 scalarize_abs,
100 scalarize_floor,
101 scalarize_ceil,
102 scalarize_round,
103 scalarize_trunc_float,
104 scalarize_neg,
105 scalarize_neg_optimized,
106 scalarize_cmp_vector,
107 scalarize_cmp_vector_optimized,
108 scalarize_fptrunc,
109 scalarize_fpext,
110 scalarize_intcast,
111 scalarize_intcast_safe,
112 scalarize_trunc,
113 scalarize_int_from_float,
114 scalarize_int_from_float_optimized,
115 scalarize_int_from_float_safe,
116 scalarize_int_from_float_optimized_safe,
117 scalarize_float_from_int,
118 scalarize_reduce,
119 scalarize_reduce_optimized,
120 scalarize_shuffle_one,
121 scalarize_shuffle_two,
122 scalarize_select,
123 scalarize_mul_add,
124
125 /// Legalize (shift lhs, (splat rhs)) -> (shift lhs, rhs)
126 unsplat_shift_rhs,
127 /// Legalize reduce of a one element vector to a bitcast.
128 reduce_one_elem_to_bitcast,
129 /// Legalize splat to a one element vector to a bitcast.
130 splat_one_elem_to_bitcast,
131
132 /// Replace `intcast_safe` with an explicit safety check which `call`s the panic function on failure.
133 /// Not compatible with `scalarize_intcast_safe`.
134 expand_intcast_safe,
135 /// Replace `int_from_float_safe` with an explicit safety check which `call`s the panic function on failure.
136 /// Not compatible with `scalarize_int_from_float_safe`.
137 expand_int_from_float_safe,
138 /// Replace `int_from_float_optimized_safe` with an explicit safety check which `call`s the panic function on failure.
139 /// Not compatible with `scalarize_int_from_float_optimized_safe`.
140 expand_int_from_float_optimized_safe,
141 /// Replace `add_safe` with an explicit safety check which `call`s the panic function on failure.
142 /// Not compatible with `scalarize_add_safe`.
143 expand_add_safe,
144 /// Replace `sub_safe` with an explicit safety check which `call`s the panic function on failure.
145 /// Not compatible with `scalarize_sub_safe`.
146 expand_sub_safe,
147 /// Replace `mul_safe` with an explicit safety check which `call`s the panic function on failure.
148 /// Not compatible with `scalarize_mul_safe`.
149 expand_mul_safe,
150
151 /// Replace `load` from a packed pointer with a non-packed `load`, `shr`, `truncate`.
152 /// Currently assumes little endian and a specific integer layout where the lsb of every integer is the lsb of the
153 /// first byte of memory until bit pointers know their backing type.
154 expand_packed_load,
155 /// Replace `store` and `store_safe` to a packed pointer with a non-packed `load`/`store`, `bit_and`, `bit_or`, and `shl`.
156 /// Currently assumes little endian and a specific integer layout where the lsb of every integer is the lsb of the
157 /// first byte of memory until bit pointers know their backing type.
158 expand_packed_store,
159 /// Replace `struct_field_val` of a packed field with a `bitcast` to integer, `shr`, `trunc`, and `bitcast` to field type.
160 expand_packed_struct_field_val,
161 /// Replace `aggregate_init` of a packed struct with a sequence of `shl_exact`, `bitcast`, `intcast`, and `bit_or`.
162 expand_packed_aggregate_init,
163
164 /// Replace all arithmetic operations on 16-bit floating-point types with calls to soft-float
165 /// routines in compiler_rt, including `fptrunc`/`fpext`/`float_from_int`/`int_from_float`
166 /// where the operand or target type is a 16-bit floating-point type. This feature implies:
167 ///
168 /// * scalarization of 16-bit float vector operations
169 /// * expansion of safety-checked 16-bit float operations
170 ///
171 /// If this feature is enabled, the following AIR instruction tags may be emitted:
172 /// * `.legalize_vec_elem_val`
173 /// * `.legalize_vec_store_elem`
174 /// * `.legalize_compiler_rt_call`
175 soft_f16,
176 /// Like `soft_f16`, but for 32-bit floating-point types.
177 soft_f32,
178 /// Like `soft_f16`, but for 64-bit floating-point types.
179 soft_f64,
180 /// Like `soft_f16`, but for 80-bit floating-point types.
181 soft_f80,
182 /// Like `soft_f16`, but for 128-bit floating-point types.
183 soft_f128,
184
185 fn scalarize(tag: Air.Inst.Tag) Feature {
186 return switch (tag) {
187 else => unreachable,
188 .add => .scalarize_add,
189 .add_safe => .scalarize_add_safe,
190 .add_optimized => .scalarize_add_optimized,
191 .add_wrap => .scalarize_add_wrap,
192 .add_sat => .scalarize_add_sat,
193 .sub => .scalarize_sub,
194 .sub_safe => .scalarize_sub_safe,
195 .sub_optimized => .scalarize_sub_optimized,
196 .sub_wrap => .scalarize_sub_wrap,
197 .sub_sat => .scalarize_sub_sat,
198 .mul => .scalarize_mul,
199 .mul_safe => .scalarize_mul_safe,
200 .mul_optimized => .scalarize_mul_optimized,
201 .mul_wrap => .scalarize_mul_wrap,
202 .mul_sat => .scalarize_mul_sat,
203 .div_float => .scalarize_div_float,
204 .div_float_optimized => .scalarize_div_float_optimized,
205 .div_trunc => .scalarize_div_trunc,
206 .div_trunc_optimized => .scalarize_div_trunc_optimized,
207 .div_floor => .scalarize_div_floor,
208 .div_floor_optimized => .scalarize_div_floor_optimized,
209 .div_exact => .scalarize_div_exact,
210 .div_exact_optimized => .scalarize_div_exact_optimized,
211 .rem => .scalarize_rem,
212 .rem_optimized => .scalarize_rem_optimized,
213 .mod => .scalarize_mod,
214 .mod_optimized => .scalarize_mod_optimized,
215 .max => .scalarize_max,
216 .min => .scalarize_min,
217 .add_with_overflow => .scalarize_add_with_overflow,
218 .sub_with_overflow => .scalarize_sub_with_overflow,
219 .mul_with_overflow => .scalarize_mul_with_overflow,
220 .shl_with_overflow => .scalarize_shl_with_overflow,
221 .bit_and => .scalarize_bit_and,
222 .bit_or => .scalarize_bit_or,
223 .shr => .scalarize_shr,
224 .shr_exact => .scalarize_shr_exact,
225 .shl => .scalarize_shl,
226 .shl_exact => .scalarize_shl_exact,
227 .shl_sat => .scalarize_shl_sat,
228 .xor => .scalarize_xor,
229 .not => .scalarize_not,
230 .bitcast => .scalarize_bitcast,
231 .clz => .scalarize_clz,
232 .ctz => .scalarize_ctz,
233 .popcount => .scalarize_popcount,
234 .byte_swap => .scalarize_byte_swap,
235 .bit_reverse => .scalarize_bit_reverse,
236 .sqrt => .scalarize_sqrt,
237 .sin => .scalarize_sin,
238 .cos => .scalarize_cos,
239 .tan => .scalarize_tan,
240 .exp => .scalarize_exp,
241 .exp2 => .scalarize_exp2,
242 .log => .scalarize_log,
243 .log2 => .scalarize_log2,
244 .log10 => .scalarize_log10,
245 .abs => .scalarize_abs,
246 .floor => .scalarize_floor,
247 .ceil => .scalarize_ceil,
248 .round => .scalarize_round,
249 .trunc_float => .scalarize_trunc_float,
250 .neg => .scalarize_neg,
251 .neg_optimized => .scalarize_neg_optimized,
252 .cmp_vector => .scalarize_cmp_vector,
253 .cmp_vector_optimized => .scalarize_cmp_vector_optimized,
254 .fptrunc => .scalarize_fptrunc,
255 .fpext => .scalarize_fpext,
256 .intcast => .scalarize_intcast,
257 .intcast_safe => .scalarize_intcast_safe,
258 .trunc => .scalarize_trunc,
259 .int_from_float => .scalarize_int_from_float,
260 .int_from_float_optimized => .scalarize_int_from_float_optimized,
261 .int_from_float_safe => .scalarize_int_from_float_safe,
262 .int_from_float_optimized_safe => .scalarize_int_from_float_optimized_safe,
263 .float_from_int => .scalarize_float_from_int,
264 .reduce => .scalarize_reduce,
265 .reduce_optimized => .scalarize_reduce_optimized,
266 .shuffle_one => .scalarize_shuffle_one,
267 .shuffle_two => .scalarize_shuffle_two,
268 .select => .scalarize_select,
269 .mul_add => .scalarize_mul_add,
270 };
271 }
272};
273
274pub const Features = std.enums.EnumSet(Feature);
275
276pub const Error = std.mem.Allocator.Error;
277
278pub fn legalize(air: *Air, pt: Zcu.PerThread, features: *const Features) Error!void {
279 assert(!features.eql(comptime .initEmpty())); // backend asked to run legalize, but no features were enabled
280 var l: Legalize = .{
281 .pt = pt,
282 .air_instructions = air.instructions.toMultiArrayList(),
283 .air_extra = air.extra,
284 .features = .init(features),
285 };
286 defer air.* = l.getTmpAir();
287 const main_extra = l.extraData(Air.Block, l.air_extra.items[@intFromEnum(Air.ExtraIndex.main_block)]);
288 try l.legalizeBody(main_extra.end, main_extra.data.body_len);
289}
290
291fn getTmpAir(l: *const Legalize) Air {
292 return .{
293 .instructions = l.air_instructions.slice(),
294 .extra = l.air_extra,
295 };
296}
297
298fn typeOf(l: *const Legalize, ref: Air.Inst.Ref) Type {
299 return l.getTmpAir().typeOf(ref, &l.pt.zcu.intern_pool);
300}
301
302fn typeOfIndex(l: *const Legalize, inst: Air.Inst.Index) Type {
303 return l.getTmpAir().typeOfIndex(inst, &l.pt.zcu.intern_pool);
304}
305
306fn extraData(l: *const Legalize, comptime T: type, index: usize) @TypeOf(Air.extraData(undefined, T, undefined)) {
307 return l.getTmpAir().extraData(T, index);
308}
309
310fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
311 // In zig1, this function needs a lot of eval branch quota, because all of the inlined feature
312 // checks are comptime-evaluated (to ensure unused features are not included in the binary).
313 @setEvalBranchQuota(4000);
314
315 const zcu = l.pt.zcu;
316 const ip = &zcu.intern_pool;
317 for (0..body_len) |body_index| {
318 const inst: Air.Inst.Index = @enumFromInt(l.air_extra.items[body_start + body_index]);
319 inst: switch (l.air_instructions.items(.tag)[@intFromEnum(inst)]) {
320 .arg => {},
321 inline .add,
322 .add_optimized,
323 .sub,
324 .sub_optimized,
325 .mul,
326 .mul_optimized,
327 .div_float,
328 .div_float_optimized,
329 .div_exact,
330 .div_exact_optimized,
331 .rem,
332 .rem_optimized,
333 .min,
334 .max,
335 => |air_tag| {
336 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
337 const ty = l.typeOf(bin_op.lhs);
338 switch (l.wantScalarizeOrSoftFloat(air_tag, ty)) {
339 .none => {},
340 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)),
341 .soft_float => continue :inst try l.compilerRtCall(
342 inst,
343 softFloatFunc(air_tag, ty, zcu),
344 &.{ bin_op.lhs, bin_op.rhs },
345 l.typeOf(bin_op.lhs),
346 ),
347 }
348 },
349 inline .div_trunc,
350 .div_trunc_optimized,
351 .div_floor,
352 .div_floor_optimized,
353 => |air_tag| {
354 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
355 switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(bin_op.lhs))) {
356 .none => {},
357 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)),
358 .soft_float => continue :inst l.replaceInst(inst, .block, try l.softFloatDivTruncFloorBlockPayload(
359 inst,
360 bin_op.lhs,
361 bin_op.rhs,
362 air_tag,
363 )),
364 }
365 },
366 inline .mod, .mod_optimized => |air_tag| {
367 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
368 switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(bin_op.lhs))) {
369 .none => {},
370 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)),
371 .soft_float => continue :inst l.replaceInst(inst, .block, try l.softFloatModBlockPayload(
372 inst,
373 bin_op.lhs,
374 bin_op.rhs,
375 )),
376 }
377 },
378 inline .add_wrap,
379 .add_sat,
380 .sub_wrap,
381 .sub_sat,
382 .mul_wrap,
383 .mul_sat,
384 .bit_and,
385 .bit_or,
386 .xor,
387 => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
388 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
389 if (l.typeOf(bin_op.lhs).isVector(zcu)) {
390 continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
391 }
392 },
393 .add_safe => if (l.features.has(.expand_add_safe)) {
394 assert(!l.features.has(.scalarize_add_safe)); // it doesn't make sense to do both
395 continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .add_with_overflow));
396 } else if (l.features.has(.scalarize_add_safe)) {
397 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
398 if (l.typeOf(bin_op.lhs).isVector(zcu)) {
399 continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
400 }
401 },
402 .sub_safe => if (l.features.has(.expand_sub_safe)) {
403 assert(!l.features.has(.scalarize_sub_safe)); // it doesn't make sense to do both
404 continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .sub_with_overflow));
405 } else if (l.features.has(.scalarize_sub_safe)) {
406 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
407 if (l.typeOf(bin_op.lhs).isVector(zcu)) {
408 continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
409 }
410 },
411 .mul_safe => if (l.features.has(.expand_mul_safe)) {
412 assert(!l.features.has(.scalarize_mul_safe)); // it doesn't make sense to do both
413 continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .mul_with_overflow));
414 } else if (l.features.has(.scalarize_mul_safe)) {
415 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
416 if (l.typeOf(bin_op.lhs).isVector(zcu)) {
417 continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
418 }
419 },
420 .ptr_add, .ptr_sub => {},
421 inline .add_with_overflow,
422 .sub_with_overflow,
423 .mul_with_overflow,
424 .shl_with_overflow,
425 => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
426 const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
427 if (ty_pl.ty.toType().fieldType(0, zcu).isVector(zcu)) {
428 continue :inst l.replaceInst(inst, .block, try l.scalarizeOverflowBlockPayload(inst));
429 }
430 },
431 .alloc => {},
432 .inferred_alloc, .inferred_alloc_comptime => unreachable,
433 .ret_ptr, .assembly => {},
434 inline .shr,
435 .shr_exact,
436 .shl,
437 .shl_exact,
438 .shl_sat,
439 => |air_tag| if (l.features.hasAny(&.{
440 .unsplat_shift_rhs,
441 .scalarize(air_tag),
442 })) {
443 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
444 if (l.typeOf(bin_op.rhs).isVector(zcu)) {
445 if (l.features.has(.unsplat_shift_rhs)) {
446 if (bin_op.rhs.toInterned()) |rhs_ip_index| switch (ip.indexToKey(rhs_ip_index)) {
447 else => {},
448 .aggregate => |aggregate| switch (aggregate.storage) {
449 else => {},
450 .repeated_elem => |splat| continue :inst l.replaceInst(inst, air_tag, .{ .bin_op = .{
451 .lhs = bin_op.lhs,
452 .rhs = Air.internedToRef(splat),
453 } }),
454 },
455 } else {
456 const rhs_inst = bin_op.rhs.toIndex().?;
457 switch (l.air_instructions.items(.tag)[@intFromEnum(rhs_inst)]) {
458 else => {},
459 .splat => continue :inst l.replaceInst(inst, air_tag, .{ .bin_op = .{
460 .lhs = bin_op.lhs,
461 .rhs = l.air_instructions.items(.data)[@intFromEnum(rhs_inst)].ty_op.operand,
462 } }),
463 }
464 }
465 }
466 if (l.features.has(comptime .scalarize(air_tag))) {
467 continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
468 }
469 }
470 },
471 inline .not,
472 .clz,
473 .ctz,
474 .popcount,
475 .byte_swap,
476 .bit_reverse,
477 .intcast,
478 .trunc,
479 => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
480 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
481 if (ty_op.ty.toType().isVector(zcu)) {
482 continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
483 }
484 },
485 .abs => {
486 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
487 switch (l.wantScalarizeOrSoftFloat(.abs, ty_op.ty.toType())) {
488 .none => {},
489 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)),
490 .soft_float => continue :inst try l.compilerRtCall(
491 inst,
492 softFloatFunc(.abs, ty_op.ty.toType(), zcu),
493 &.{ty_op.operand},
494 ty_op.ty.toType(),
495 ),
496 }
497 },
498 .fptrunc => {
499 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
500 const src_ty = l.typeOf(ty_op.operand);
501 const dest_ty = ty_op.ty.toType();
502 if (src_ty.zigTypeTag(zcu) == .vector) {
503 if (l.features.has(.scalarize_fptrunc) or
504 l.wantSoftFloatScalar(src_ty.childType(zcu)) or
505 l.wantSoftFloatScalar(dest_ty.childType(zcu)))
506 {
507 continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
508 }
509 } else if (l.wantSoftFloatScalar(src_ty) or l.wantSoftFloatScalar(dest_ty)) {
510 continue :inst try l.compilerRtCall(inst, l.softFptruncFunc(src_ty, dest_ty), &.{ty_op.operand}, dest_ty);
511 }
512 },
513 .fpext => {
514 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
515 const src_ty = l.typeOf(ty_op.operand);
516 const dest_ty = ty_op.ty.toType();
517 if (src_ty.zigTypeTag(zcu) == .vector) {
518 if (l.features.has(.scalarize_fpext) or
519 l.wantSoftFloatScalar(src_ty.childType(zcu)) or
520 l.wantSoftFloatScalar(dest_ty.childType(zcu)))
521 {
522 continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
523 }
524 } else if (l.wantSoftFloatScalar(src_ty) or l.wantSoftFloatScalar(dest_ty)) {
525 continue :inst try l.compilerRtCall(inst, l.softFpextFunc(src_ty, dest_ty), &.{ty_op.operand}, dest_ty);
526 }
527 },
528 inline .int_from_float, .int_from_float_optimized => |air_tag| {
529 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
530 switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(ty_op.operand))) {
531 .none => {},
532 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)),
533 .soft_float => switch (try l.softIntFromFloat(inst)) {
534 .call => |func| continue :inst try l.compilerRtCall(inst, func, &.{ty_op.operand}, ty_op.ty.toType()),
535 .block_payload => |data| continue :inst l.replaceInst(inst, .block, data),
536 },
537 }
538 },
539 .float_from_int => {
540 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
541 const dest_ty = ty_op.ty.toType();
542 switch (l.wantScalarizeOrSoftFloat(.float_from_int, dest_ty)) {
543 .none => {},
544 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)),
545 .soft_float => switch (try l.softFloatFromInt(inst)) {
546 .call => |func| continue :inst try l.compilerRtCall(inst, func, &.{ty_op.operand}, dest_ty),
547 .block_payload => |data| continue :inst l.replaceInst(inst, .block, data),
548 },
549 }
550 },
551 .bitcast => if (l.features.has(.scalarize_bitcast)) {
552 if (try l.scalarizeBitcastBlockPayload(inst)) |payload| {
553 continue :inst l.replaceInst(inst, .block, payload);
554 }
555 },
556 .intcast_safe => if (l.features.has(.expand_intcast_safe)) {
557 assert(!l.features.has(.scalarize_intcast_safe)); // it doesn't make sense to do both
558 continue :inst l.replaceInst(inst, .block, try l.safeIntcastBlockPayload(inst));
559 } else if (l.features.has(.scalarize_intcast_safe)) {
560 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
561 if (ty_op.ty.toType().isVector(zcu)) {
562 continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
563 }
564 },
565 inline .int_from_float_safe,
566 .int_from_float_optimized_safe,
567 => |air_tag| {
568 const optimized = air_tag == .int_from_float_optimized_safe;
569 const expand_feature = switch (air_tag) {
570 .int_from_float_safe => .expand_int_from_float_safe,
571 .int_from_float_optimized_safe => .expand_int_from_float_optimized_safe,
572 else => unreachable,
573 };
574 if (l.features.has(expand_feature)) {
575 assert(!l.features.has(.scalarize(air_tag)));
576 continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, optimized));
577 }
578 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
579 switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(ty_op.operand))) {
580 .none => {},
581 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)),
582 // Expand the safety check so that soft-float can rewrite the unchecked operation.
583 .soft_float => continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, optimized)),
584 }
585 },
586 .block, .loop => {
587 const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
588 const extra = l.extraData(Air.Block, ty_pl.payload);
589 try l.legalizeBody(extra.end, extra.data.body_len);
590 },
591 .repeat,
592 .br,
593 .trap,
594 .breakpoint,
595 .ret_addr,
596 .frame_addr,
597 .call,
598 .call_always_tail,
599 .call_never_tail,
600 .call_never_inline,
601 => {},
602 inline .sqrt,
603 .sin,
604 .cos,
605 .tan,
606 .exp,
607 .exp2,
608 .log,
609 .log2,
610 .log10,
611 .floor,
612 .ceil,
613 .round,
614 .trunc_float,
615 => |air_tag| {
616 const operand = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op;
617 const ty = l.typeOf(operand);
618 switch (l.wantScalarizeOrSoftFloat(air_tag, ty)) {
619 .none => {},
620 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .un_op)),
621 .soft_float => continue :inst try l.compilerRtCall(
622 inst,
623 softFloatFunc(air_tag, ty, zcu),
624 &.{operand},
625 l.typeOf(operand),
626 ),
627 }
628 },
629 inline .neg, .neg_optimized => |air_tag| {
630 const operand = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op;
631 switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(operand))) {
632 .none => {},
633 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .un_op)),
634 .soft_float => continue :inst l.replaceInst(inst, .block, try l.softFloatNegBlockPayload(inst, operand)),
635 }
636 },
637 .cmp_lt,
638 .cmp_lt_optimized,
639 .cmp_lte,
640 .cmp_lte_optimized,
641 .cmp_eq,
642 .cmp_eq_optimized,
643 .cmp_gte,
644 .cmp_gte_optimized,
645 .cmp_gt,
646 .cmp_gt_optimized,
647 .cmp_neq,
648 .cmp_neq_optimized,
649 => |air_tag| {
650 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
651 const ty = l.typeOf(bin_op.lhs);
652 if (l.wantSoftFloatScalar(ty)) {
653 continue :inst l.replaceInst(
654 inst,
655 .block,
656 try l.softFloatCmpBlockPayload(inst, ty, air_tag.toCmpOp().?, bin_op.lhs, bin_op.rhs),
657 );
658 }
659 },
660 inline .cmp_vector, .cmp_vector_optimized => |air_tag| {
661 const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
662 const payload = l.extraData(Air.VectorCmp, ty_pl.payload).data;
663 switch (l.wantScalarizeOrSoftFloat(air_tag, l.typeOf(payload.lhs))) {
664 .none => {},
665 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .cmp_vector)),
666 .soft_float => unreachable, // the operand is not a scalar
667 }
668 },
669 .cond_br => {
670 const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
671 const extra = l.extraData(Air.CondBr, pl_op.payload);
672 try l.legalizeBody(extra.end, extra.data.then_body_len);
673 try l.legalizeBody(extra.end + extra.data.then_body_len, extra.data.else_body_len);
674 },
675 .switch_br, .loop_switch_br => {
676 const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
677 const extra = l.extraData(Air.SwitchBr, pl_op.payload);
678 const hint_bag_count = std.math.divCeil(usize, extra.data.cases_len + 1, 10) catch unreachable;
679 var extra_index = extra.end + hint_bag_count;
680 for (0..extra.data.cases_len) |_| {
681 const case_extra = l.extraData(Air.SwitchBr.Case, extra_index);
682 const case_body_start = case_extra.end + case_extra.data.items_len + case_extra.data.ranges_len * 2;
683 try l.legalizeBody(case_body_start, case_extra.data.body_len);
684 extra_index = case_body_start + case_extra.data.body_len;
685 }
686 try l.legalizeBody(extra_index, extra.data.else_body_len);
687 },
688 .switch_dispatch => {},
689 .@"try", .try_cold => {
690 const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
691 const extra = l.extraData(Air.Try, pl_op.payload);
692 try l.legalizeBody(extra.end, extra.data.body_len);
693 },
694 .try_ptr, .try_ptr_cold => {
695 const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
696 const extra = l.extraData(Air.TryPtr, ty_pl.payload);
697 try l.legalizeBody(extra.end, extra.data.body_len);
698 },
699 .dbg_stmt, .dbg_empty_stmt => {},
700 .dbg_inline_block => {
701 const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
702 const extra = l.extraData(Air.DbgInlineBlock, ty_pl.payload);
703 try l.legalizeBody(extra.end, extra.data.body_len);
704 },
705 .dbg_var_ptr,
706 .dbg_var_val,
707 .dbg_arg_inline,
708 .is_null,
709 .is_non_null,
710 .is_null_ptr,
711 .is_non_null_ptr,
712 .is_err,
713 .is_non_err,
714 .is_err_ptr,
715 .is_non_err_ptr,
716 .bool_and,
717 .bool_or,
718 => {},
719 .load => if (l.features.has(.expand_packed_load)) {
720 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
721 const ptr_info = l.typeOf(ty_op.operand).ptrInfo(zcu);
722 if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) {
723 continue :inst l.replaceInst(inst, .block, try l.packedLoadBlockPayload(inst));
724 }
725 },
726 .ret, .ret_safe, .ret_load => {},
727 .store, .store_safe => if (l.features.has(.expand_packed_store)) {
728 const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
729 const ptr_info = l.typeOf(bin_op.lhs).ptrInfo(zcu);
730 if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) {
731 continue :inst l.replaceInst(inst, .block, try l.packedStoreBlockPayload(inst));
732 }
733 },
734 .unreach,
735 .optional_payload,
736 .optional_payload_ptr,
737 .optional_payload_ptr_set,
738 .wrap_optional,
739 .unwrap_errunion_payload,
740 .unwrap_errunion_err,
741 .unwrap_errunion_payload_ptr,
742 .unwrap_errunion_err_ptr,
743 .errunion_payload_ptr_set,
744 .wrap_errunion_payload,
745 .wrap_errunion_err,
746 .struct_field_ptr,
747 .struct_field_ptr_index_0,
748 .struct_field_ptr_index_1,
749 .struct_field_ptr_index_2,
750 .struct_field_ptr_index_3,
751 => {},
752 .struct_field_val => if (l.features.has(.expand_packed_struct_field_val)) {
753 const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
754 const extra = l.extraData(Air.StructField, ty_pl.payload).data;
755 switch (l.typeOf(extra.struct_operand).containerLayout(zcu)) {
756 .auto, .@"extern" => {},
757 .@"packed" => continue :inst l.replaceInst(inst, .block, try l.packedStructFieldValBlockPayload(inst)),
758 }
759 },
760 .set_union_tag,
761 .get_union_tag,
762 .slice,
763 .slice_len,
764 .slice_ptr,
765 .ptr_slice_len_ptr,
766 .ptr_slice_ptr_ptr,
767 .array_elem_val,
768 .slice_elem_val,
769 .slice_elem_ptr,
770 .ptr_elem_val,
771 .ptr_elem_ptr,
772 .array_to_slice,
773 => {},
774 inline .reduce, .reduce_optimized => |air_tag| {
775 const reduce = l.air_instructions.items(.data)[@intFromEnum(inst)].reduce;
776 const vector_ty = l.typeOf(reduce.operand);
777 if (l.features.has(.reduce_one_elem_to_bitcast)) {
778 switch (vector_ty.vectorLen(zcu)) {
779 0 => unreachable,
780 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{
781 .ty = .fromType(vector_ty.childType(zcu)),
782 .operand = reduce.operand,
783 } }),
784 else => {},
785 }
786 }
787 switch (l.wantScalarizeOrSoftFloat(air_tag, vector_ty)) {
788 .none => {},
789 .scalarize => continue :inst l.replaceInst(
790 inst,
791 .block,
792 try l.scalarizeReduceBlockPayload(inst, air_tag == .reduce_optimized),
793 ),
794 .soft_float => unreachable, // the operand is not a scalar
795 }
796 },
797 .splat => if (l.features.has(.splat_one_elem_to_bitcast)) {
798 const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
799 switch (ty_op.ty.toType().vectorLen(zcu)) {
800 0 => unreachable,
801 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{
802 .ty = ty_op.ty,
803 .operand = ty_op.operand,
804 } }),
805 else => {},
806 }
807 },
808 .shuffle_one => {
809 const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
810 switch (l.wantScalarizeOrSoftFloat(.shuffle_one, ty_pl.ty.toType())) {
811 .none => {},
812 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleOneBlockPayload(inst)),
813 .soft_float => unreachable, // the operand is not a scalar
814 }
815 },
816 .shuffle_two => {
817 const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
818 switch (l.wantScalarizeOrSoftFloat(.shuffle_two, ty_pl.ty.toType())) {
819 .none => {},
820 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleTwoBlockPayload(inst)),
821 .soft_float => unreachable, // the operand is not a scalar
822 }
823 },
824 .select => {
825 const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
826 const bin = l.extraData(Air.Bin, pl_op.payload).data;
827 switch (l.wantScalarizeOrSoftFloat(.select, l.typeOf(bin.lhs))) {
828 .none => {},
829 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .select)),
830 .soft_float => unreachable, // the operand is not a scalar
831 }
832 },
833 .memset,
834 .memset_safe,
835 .memcpy,
836 .memmove,
837 .cmpxchg_weak,
838 .cmpxchg_strong,
839 .atomic_load,
840 .atomic_store_unordered,
841 .atomic_store_monotonic,
842 .atomic_store_release,
843 .atomic_store_seq_cst,
844 .atomic_rmw,
845 .is_named_enum_value,
846 .tag_name,
847 .error_name,
848 .error_set_has_value,
849 => {},
850 .aggregate_init => if (l.features.has(.expand_packed_aggregate_init)) {
851 const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
852 const agg_ty = ty_pl.ty.toType();
853 switch (agg_ty.zigTypeTag(zcu)) {
854 else => {},
855 .@"union" => unreachable,
856 .@"struct" => switch (agg_ty.containerLayout(zcu)) {
857 .auto, .@"extern" => {},
858 .@"packed" => switch (agg_ty.structFieldCount(zcu)) {
859 0 => unreachable,
860 // An `aggregate_init` of a packed struct with 1 field is just a fancy bitcast.
861 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{
862 .ty = .fromType(agg_ty),
863 .operand = @enumFromInt(l.air_extra.items[ty_pl.payload]),
864 } }),
865 else => continue :inst l.replaceInst(inst, .block, try l.packedAggregateInitBlockPayload(inst)),
866 },
867 },
868 }
869 },
870 .union_init, .prefetch => {},
871 .mul_add => {
872 const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
873 const ty = l.typeOf(pl_op.operand);
874 switch (l.wantScalarizeOrSoftFloat(.mul_add, ty)) {
875 .none => {},
876 .scalarize => continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .pl_op_bin)),
877 .soft_float => {
878 const bin = l.extraData(Air.Bin, pl_op.payload).data;
879 const func = softFloatFunc(.mul_add, ty, zcu);
880 continue :inst try l.compilerRtCall(inst, func, &.{ bin.lhs, bin.rhs, pl_op.operand }, ty);
881 },
882 }
883 },
884 .field_parent_ptr,
885 .wasm_memory_size,
886 .wasm_memory_grow,
887 .cmp_lt_errors_len,
888 .err_return_trace,
889 .set_err_return_trace,
890 .addrspace_cast,
891 .save_err_return_trace_index,
892 .runtime_nav_ptr,
893 .c_va_arg,
894 .c_va_copy,
895 .c_va_end,
896 .c_va_start,
897 .work_item_id,
898 .work_group_size,
899 .work_group_id,
900 .legalize_vec_elem_val,
901 .legalize_vec_store_elem,
902 .legalize_compiler_rt_call,
903 => {},
904 }
905 }
906}
907
908const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, cmp_vector, select };
909fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, form: ScalarizeForm) Error!Air.Inst.Data {
910 const pt = l.pt;
911 const zcu = pt.zcu;
912
913 const orig = l.air_instructions.get(@intFromEnum(orig_inst));
914 const res_ty = l.typeOfIndex(orig_inst);
915 const result_is_array = switch (res_ty.zigTypeTag(zcu)) {
916 .vector => false,
917 .array => true,
918 else => unreachable,
919 };
920 const res_len = res_ty.arrayLen(zcu);
921 const res_elem_ty = res_ty.childType(zcu);
922
923 if (result_is_array) {
924 // This is only allowed when legalizing an elementwise bitcast.
925 assert(orig.tag == .bitcast);
926 assert(form == .ty_op);
927 }
928
929 // Our output will be a loop doing elementwise stores:
930 //
931 // %1 = block(@Vector(N, Scalar), {
932 // %2 = alloc(*usize)
933 // %3 = alloc(*@Vector(N, Scalar))
934 // %4 = store(%2, @zero_usize)
935 // %5 = loop({
936 // %6 = load(%2)
937 // %7 = <scalar result of operation at index %5>
938 // %8 = legalize_vec_store_elem(%3, %5, %6)
939 // %9 = cmp_eq(%6, <usize, N-1>)
940 // %10 = cond_br(%9, {
941 // %11 = load(%3)
942 // %12 = br(%1, %11)
943 // }, {
944 // %13 = add(%6, @one_usize)
945 // %14 = store(%2, %13)
946 // %15 = repeat(%5)
947 // })
948 // })
949 // })
950 //
951 // If scalarizing an elementwise bitcast, the result might be an array, in which case
952 // `legalize_vec_store_elem` becomes two instructions (`ptr_elem_ptr` and `store`).
953 // Therefore, there are 13 or 14 instructions in the block, plus however many are
954 // needed to compute each result element for `form`.
955 const inst_per_form: usize = switch (form) {
956 .un_op, .ty_op => 2,
957 .bin_op, .cmp_vector => 3,
958 .pl_op_bin => 4,
959 .select => 7,
960 };
961 const max_inst_per_form = 7; // maximum value in the above switch
962 var inst_buf: [14 + max_inst_per_form]Air.Inst.Index = undefined;
963
964 var main_block: Block = .init(&inst_buf);
965 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
966
967 const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
968 const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(res_ty)).toRef();
969
970 _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
971
972 var loop: Loop = .init(l, &main_block);
973 loop.block = .init(main_block.stealRemainingCapacity());
974
975 const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
976 const elem_val: Air.Inst.Ref = switch (form) {
977 .un_op => elem: {
978 const orig_operand = orig.data.un_op;
979 const operand = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operand, index_val).toRef();
980 break :elem loop.block.addUnOp(l, orig.tag, operand).toRef();
981 },
982 .ty_op => elem: {
983 const orig_operand = orig.data.ty_op.operand;
984 const operand_is_array = switch (l.typeOf(orig_operand).zigTypeTag(zcu)) {
985 .vector => false,
986 .array => true,
987 else => unreachable,
988 };
989 const operand = loop.block.addBinOp(
990 l,
991 if (operand_is_array) .array_elem_val else .legalize_vec_elem_val,
992 orig_operand,
993 index_val,
994 ).toRef();
995 break :elem loop.block.addTyOp(l, orig.tag, res_elem_ty, operand).toRef();
996 },
997 .bin_op => elem: {
998 const orig_bin = orig.data.bin_op;
999 const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef();
1000 const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef();
1001 break :elem loop.block.addBinOp(l, orig.tag, lhs, rhs).toRef();
1002 },
1003 .pl_op_bin => elem: {
1004 const orig_operand = orig.data.pl_op.operand;
1005 const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
1006 const operand = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operand, index_val).toRef();
1007 const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef();
1008 const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef();
1009 break :elem loop.block.add(l, .{
1010 .tag = orig.tag,
1011 .data = .{ .pl_op = .{
1012 .operand = operand,
1013 .payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }),
1014 } },
1015 }).toRef();
1016 },
1017 .cmp_vector => elem: {
1018 const orig_payload = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data;
1019 const cmp_op = orig_payload.compareOperator();
1020 const optimized = switch (orig.tag) {
1021 .cmp_vector => false,
1022 .cmp_vector_optimized => true,
1023 else => unreachable,
1024 };
1025 const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_payload.lhs, index_val).toRef();
1026 const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_payload.rhs, index_val).toRef();
1027 break :elem loop.block.addCmpScalar(l, cmp_op, lhs, rhs, optimized).toRef();
1028 },
1029 .select => elem: {
1030 const orig_cond = orig.data.pl_op.operand;
1031 const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
1032
1033 const elem_block_inst = loop.block.add(l, .{
1034 .tag = .block,
1035 .data = .{ .ty_pl = .{
1036 .ty = .fromType(res_elem_ty),
1037 .payload = undefined,
1038 } },
1039 });
1040 var elem_block: Block = .init(loop.block.stealCapacity(2));
1041 const cond = elem_block.addBinOp(l, .legalize_vec_elem_val, orig_cond, index_val).toRef();
1042
1043 var condbr: CondBr = .init(l, cond, &elem_block, .{});
1044
1045 condbr.then_block = .init(loop.block.stealCapacity(2));
1046 const lhs = condbr.then_block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef();
1047 condbr.then_block.addBr(l, elem_block_inst, lhs);
1048
1049 condbr.else_block = .init(loop.block.stealCapacity(2));
1050 const rhs = condbr.else_block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef();
1051 condbr.else_block.addBr(l, elem_block_inst, rhs);
1052
1053 try condbr.finish(l);
1054
1055 const inst_data = l.air_instructions.items(.data);
1056 inst_data[@intFromEnum(elem_block_inst)].ty_pl.payload = try l.addBlockBody(elem_block.body());
1057
1058 break :elem elem_block_inst.toRef();
1059 },
1060 };
1061 _ = loop.block.stealCapacity(max_inst_per_form - inst_per_form);
1062 if (result_is_array) {
1063 const elem_ptr = loop.block.add(l, .{
1064 .tag = .ptr_elem_ptr,
1065 .data = .{ .ty_pl = .{
1066 .ty = .fromType(try pt.singleMutPtrType(res_elem_ty)),
1067 .payload = try l.addExtra(Air.Bin, .{
1068 .lhs = result_ptr,
1069 .rhs = index_val,
1070 }),
1071 } },
1072 }).toRef();
1073 _ = loop.block.addBinOp(l, .store, elem_ptr, elem_val);
1074 } else {
1075 _ = loop.block.add(l, .{
1076 .tag = .legalize_vec_store_elem,
1077 .data = .{ .pl_op = .{
1078 .operand = result_ptr,
1079 .payload = try l.addExtra(Air.Bin, .{
1080 .lhs = index_val,
1081 .rhs = elem_val,
1082 }),
1083 } },
1084 });
1085 _ = loop.block.stealCapacity(1);
1086 }
1087 const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, res_len - 1))).toRef();
1088
1089 var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
1090 condbr.then_block = .init(loop.block.stealRemainingCapacity());
1091 const result_val = condbr.then_block.addTyOp(l, .load, res_ty, result_ptr).toRef();
1092 condbr.then_block.addBr(l, orig_inst, result_val);
1093
1094 condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
1095 const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
1096 _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
1097 _ = condbr.else_block.add(l, .{
1098 .tag = .repeat,
1099 .data = .{ .repeat = .{ .loop_inst = loop.inst } },
1100 });
1101
1102 try condbr.finish(l);
1103
1104 try loop.finish(l);
1105
1106 return .{ .ty_pl = .{
1107 .ty = .fromType(res_ty),
1108 .payload = try l.addBlockBody(main_block.body()),
1109 } };
1110}
1111fn scalarizeShuffleOneBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
1112 const pt = l.pt;
1113 const zcu = pt.zcu;
1114 const gpa = zcu.gpa;
1115
1116 const shuffle = l.getTmpAir().unwrapShuffleOne(zcu, orig_inst);
1117
1118 // We're going to emit something like this:
1119 //
1120 // var x: @Vector(N, T) = all_comptime_known_elems;
1121 // for (out_idxs, in_idxs) |i, j| x[i] = operand[j];
1122 //
1123 // So we must first compute `out_idxs` and `in_idxs`.
1124
1125 var sfba_state = std.heap.stackFallback(512, gpa);
1126 const sfba = sfba_state.get();
1127
1128 const out_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
1129 defer sfba.free(out_idxs_buf);
1130
1131 const in_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
1132 defer sfba.free(in_idxs_buf);
1133
1134 var n: usize = 0;
1135 for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) {
1136 .value => {},
1137 .elem => |in_idx| {
1138 out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern();
1139 in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern();
1140 n += 1;
1141 },
1142 };
1143
1144 const init_val: Value = init: {
1145 const undef_val = try pt.undefValue(shuffle.result_ty.childType(zcu));
1146 const elems = try sfba.alloc(InternPool.Index, shuffle.mask.len);
1147 defer sfba.free(elems);
1148 for (shuffle.mask, elems) |mask, *elem| elem.* = switch (mask.unwrap()) {
1149 .value => |ip_index| ip_index,
1150 .elem => undef_val.toIntern(),
1151 };
1152 break :init try pt.aggregateValue(shuffle.result_ty, elems);
1153 };
1154
1155 // %1 = block(@Vector(N, T), {
1156 // %2 = alloc(*@Vector(N, T))
1157 // %3 = alloc(*usize)
1158 // %4 = store(%2, <init_val>)
1159 // %5 = [addScalarizedShuffle]
1160 // %6 = load(%2)
1161 // %7 = br(%1, %6)
1162 // })
1163
1164 var inst_buf: [6]Air.Inst.Index = undefined;
1165 var main_block: Block = .init(&inst_buf);
1166 try l.air_instructions.ensureUnusedCapacity(gpa, 19);
1167
1168 const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(shuffle.result_ty)).toRef();
1169 const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
1170
1171 _ = main_block.addBinOp(l, .store, result_ptr, .fromValue(init_val));
1172
1173 try l.addScalarizedShuffle(
1174 &main_block,
1175 shuffle.operand,
1176 result_ptr,
1177 index_ptr,
1178 out_idxs_buf[0..n],
1179 in_idxs_buf[0..n],
1180 );
1181
1182 const result_val = main_block.addTyOp(l, .load, shuffle.result_ty, result_ptr).toRef();
1183 main_block.addBr(l, orig_inst, result_val);
1184
1185 return .{ .ty_pl = .{
1186 .ty = .fromType(shuffle.result_ty),
1187 .payload = try l.addBlockBody(main_block.body()),
1188 } };
1189}
1190fn scalarizeShuffleTwoBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
1191 const pt = l.pt;
1192 const zcu = pt.zcu;
1193 const gpa = zcu.gpa;
1194
1195 const shuffle = l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst);
1196
1197 // We're going to emit something like this:
1198 //
1199 // var x: @Vector(N, T) = undefined;
1200 // for (out_idxs_a, in_idxs_a) |i, j| x[i] = operand_a[j];
1201 // for (out_idxs_b, in_idxs_b) |i, j| x[i] = operand_b[j];
1202 //
1203 // The AIR will look like this:
1204 //
1205 // %1 = block(@Vector(N, T), {
1206 // %2 = alloc(*@Vector(N, T))
1207 // %3 = alloc(*usize)
1208 // %4 = store(%2, <@Vector(N, T), undefined>)
1209 // %5 = [addScalarizedShuffle]
1210 // %6 = [addScalarizedShuffle]
1211 // %7 = load(%2)
1212 // %8 = br(%1, %7)
1213 // })
1214
1215 var sfba_state = std.heap.stackFallback(512, gpa);
1216 const sfba = sfba_state.get();
1217
1218 const out_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
1219 defer sfba.free(out_idxs_buf);
1220
1221 const in_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
1222 defer sfba.free(in_idxs_buf);
1223
1224 // Iterate `shuffle.mask` before doing anything, because modifying AIR invalidates it.
1225 const out_idxs_a, const in_idxs_a, const out_idxs_b, const in_idxs_b = idxs: {
1226 var n: usize = 0;
1227 for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) {
1228 .undef, .b_elem => {},
1229 .a_elem => |in_idx| {
1230 out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern();
1231 in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern();
1232 n += 1;
1233 },
1234 };
1235 const a_len = n;
1236 for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) {
1237 .undef, .a_elem => {},
1238 .b_elem => |in_idx| {
1239 out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern();
1240 in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern();
1241 n += 1;
1242 },
1243 };
1244 break :idxs .{
1245 out_idxs_buf[0..a_len],
1246 in_idxs_buf[0..a_len],
1247 out_idxs_buf[a_len..n],
1248 in_idxs_buf[a_len..n],
1249 };
1250 };
1251
1252 var inst_buf: [7]Air.Inst.Index = undefined;
1253 var main_block: Block = .init(&inst_buf);
1254 try l.air_instructions.ensureUnusedCapacity(gpa, 33);
1255
1256 const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(shuffle.result_ty)).toRef();
1257 const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
1258
1259 _ = main_block.addBinOp(l, .store, result_ptr, .fromValue(try pt.undefValue(shuffle.result_ty)));
1260
1261 if (out_idxs_a.len == 0) {
1262 _ = main_block.stealCapacity(1);
1263 } else {
1264 try l.addScalarizedShuffle(
1265 &main_block,
1266 shuffle.operand_a,
1267 result_ptr,
1268 index_ptr,
1269 out_idxs_a,
1270 in_idxs_a,
1271 );
1272 }
1273
1274 if (out_idxs_b.len == 0) {
1275 _ = main_block.stealCapacity(1);
1276 } else {
1277 try l.addScalarizedShuffle(
1278 &main_block,
1279 shuffle.operand_b,
1280 result_ptr,
1281 index_ptr,
1282 out_idxs_b,
1283 in_idxs_b,
1284 );
1285 }
1286
1287 const result_val = main_block.addTyOp(l, .load, shuffle.result_ty, result_ptr).toRef();
1288 main_block.addBr(l, orig_inst, result_val);
1289
1290 return .{ .ty_pl = .{
1291 .ty = .fromType(shuffle.result_ty),
1292 .payload = try l.addBlockBody(main_block.body()),
1293 } };
1294}
1295/// Adds code to `parent_block` which behaves like this loop:
1296///
1297/// for (out_idxs, in_idxs) |i, j| result_vec_ptr[i] = operand_vec[j];
1298///
1299/// The actual AIR adds exactly one instruction to `parent_block` itself and 14 instructions
1300/// overall, and is as follows:
1301///
1302/// %1 = block(void, {
1303/// %2 = store(index_ptr, @zero_usize)
1304/// %3 = loop({
1305/// %4 = load(index_ptr)
1306/// %5 = ptr_elem_val(out_idxs_ptr, %4)
1307/// %6 = ptr_elem_val(in_idxs_ptr, %4)
1308/// %7 = legalize_vec_elem_val(operand_vec, %6)
1309/// %8 = legalize_vec_store_elem(result_vec_ptr, %4, %7)
1310/// %9 = cmp_eq(%4, <usize, out_idxs.len-1>)
1311/// %10 = cond_br(%9, {
1312/// %11 = br(%1, @void_value)
1313/// }, {
1314/// %12 = add(%4, @one_usize)
1315/// %13 = store(index_ptr, %12)
1316/// %14 = repeat(%3)
1317/// })
1318/// })
1319/// })
1320///
1321/// The caller is responsible for reserving space in `l.air_instructions`.
1322fn addScalarizedShuffle(
1323 l: *Legalize,
1324 parent_block: *Block,
1325 operand_vec: Air.Inst.Ref,
1326 result_vec_ptr: Air.Inst.Ref,
1327 index_ptr: Air.Inst.Ref,
1328 out_idxs: []const InternPool.Index,
1329 in_idxs: []const InternPool.Index,
1330) Error!void {
1331 const pt = l.pt;
1332
1333 assert(out_idxs.len == in_idxs.len);
1334 const n = out_idxs.len;
1335
1336 const idxs_ty = try pt.arrayType(.{ .len = n, .child = .usize_type });
1337 const idxs_ptr_ty = try pt.singleConstPtrType(idxs_ty);
1338 const manyptr_usize_ty = try pt.manyConstPtrType(.usize);
1339
1340 const out_idxs_ptr = try pt.intern(.{ .ptr = .{
1341 .ty = manyptr_usize_ty.toIntern(),
1342 .base_addr = .{ .uav = .{
1343 .val = (try pt.aggregateValue(idxs_ty, out_idxs)).toIntern(),
1344 .orig_ty = idxs_ptr_ty.toIntern(),
1345 } },
1346 .byte_offset = 0,
1347 } });
1348 const in_idxs_ptr = try pt.intern(.{ .ptr = .{
1349 .ty = manyptr_usize_ty.toIntern(),
1350 .base_addr = .{ .uav = .{
1351 .val = (try pt.aggregateValue(idxs_ty, in_idxs)).toIntern(),
1352 .orig_ty = idxs_ptr_ty.toIntern(),
1353 } },
1354 .byte_offset = 0,
1355 } });
1356
1357 const main_block_inst = parent_block.add(l, .{
1358 .tag = .block,
1359 .data = .{ .ty_pl = .{
1360 .ty = .void_type,
1361 .payload = undefined,
1362 } },
1363 });
1364
1365 var inst_buf: [13]Air.Inst.Index = undefined;
1366 var main_block: Block = .init(&inst_buf);
1367
1368 _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
1369
1370 var loop: Loop = .init(l, &main_block);
1371 loop.block = .init(main_block.stealRemainingCapacity());
1372
1373 const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
1374 const in_idx_val = loop.block.addBinOp(l, .ptr_elem_val, .fromIntern(in_idxs_ptr), index_val).toRef();
1375 const out_idx_val = loop.block.addBinOp(l, .ptr_elem_val, .fromIntern(out_idxs_ptr), index_val).toRef();
1376
1377 const elem_val = loop.block.addBinOp(l, .legalize_vec_elem_val, operand_vec, in_idx_val).toRef();
1378 _ = loop.block.add(l, .{
1379 .tag = .legalize_vec_store_elem,
1380 .data = .{ .pl_op = .{
1381 .operand = result_vec_ptr,
1382 .payload = try l.addExtra(Air.Bin, .{
1383 .lhs = out_idx_val,
1384 .rhs = elem_val,
1385 }),
1386 } },
1387 });
1388
1389 const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, n - 1))).toRef();
1390 var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
1391 condbr.then_block = .init(loop.block.stealRemainingCapacity());
1392 condbr.then_block.addBr(l, main_block_inst, .void_value);
1393
1394 condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
1395 const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
1396 _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
1397 _ = condbr.else_block.add(l, .{
1398 .tag = .repeat,
1399 .data = .{ .repeat = .{ .loop_inst = loop.inst } },
1400 });
1401
1402 try condbr.finish(l);
1403 try loop.finish(l);
1404
1405 const inst_data = l.air_instructions.items(.data);
1406 inst_data[@intFromEnum(main_block_inst)].ty_pl.payload = try l.addBlockBody(main_block.body());
1407}
1408fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!?Air.Inst.Data {
1409 const pt = l.pt;
1410 const zcu = pt.zcu;
1411
1412 const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
1413
1414 const dest_ty = ty_op.ty.toType();
1415 const dest_legal = switch (dest_ty.zigTypeTag(zcu)) {
1416 else => true,
1417 .array, .vector => legal: {
1418 if (dest_ty.arrayLen(zcu) == 1) break :legal true;
1419 const dest_elem_ty = dest_ty.childType(zcu);
1420 break :legal dest_elem_ty.bitSize(zcu) == 8 * dest_elem_ty.abiSize(zcu);
1421 },
1422 };
1423
1424 const operand_ty = l.typeOf(ty_op.operand);
1425 const operand_legal = switch (operand_ty.zigTypeTag(zcu)) {
1426 else => true,
1427 .array, .vector => legal: {
1428 if (operand_ty.arrayLen(zcu) == 1) break :legal true;
1429 const operand_elem_ty = operand_ty.childType(zcu);
1430 break :legal operand_elem_ty.bitSize(zcu) == 8 * operand_elem_ty.abiSize(zcu);
1431 },
1432 };
1433
1434 if (dest_legal and operand_legal) return null;
1435
1436 if (!operand_legal and !dest_legal and operand_ty.arrayLen(zcu) == dest_ty.arrayLen(zcu)) {
1437 // from_ty and to_ty are both arrays or vectors of types with the same bit size,
1438 // so we can do an elementwise bitcast.
1439 return try l.scalarizeBlockPayload(orig_inst, .ty_op);
1440 }
1441
1442 // Fallback path. Our strategy is to use an unsigned integer type as an intermediate
1443 // "bag of bits" representation which can be manipulated by bitwise operations.
1444
1445 const num_bits: u16 = @intCast(dest_ty.bitSize(zcu));
1446 assert(operand_ty.bitSize(zcu) == num_bits);
1447 const uint_ty = try pt.intType(.unsigned, num_bits);
1448 const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, num_bits));
1449
1450 var inst_buf: [39]Air.Inst.Index = undefined;
1451 var main_block: Block = .init(&inst_buf);
1452 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
1453
1454 // First, convert `operand_ty` to `uint_ty` (`uN`).
1455
1456 const uint_val: Air.Inst.Ref = uint_val: {
1457 if (operand_legal) {
1458 _ = main_block.stealCapacity(19);
1459 break :uint_val main_block.addBitCast(l, uint_ty, ty_op.operand);
1460 }
1461
1462 // %1 = block({
1463 // %2 = alloc(*usize)
1464 // %3 = alloc(*uN)
1465 // %4 = store(%2, <usize, operand_len>)
1466 // %5 = store(%3, <uN, 0>)
1467 // %6 = loop({
1468 // %7 = load(%2)
1469 // %8 = array_elem_val(orig_operand, %7)
1470 // %9 = bitcast(uE, %8)
1471 // %10 = intcast(uN, %9)
1472 // %11 = load(%3)
1473 // %12 = shl_exact(%11, <uS, E>)
1474 // %13 = bit_or(%12, %10)
1475 // %14 = cmp_eq(%4, @zero_usize)
1476 // %15 = cond_br(%14, {
1477 // %16 = br(%1, %13)
1478 // }, {
1479 // %17 = store(%3, %13)
1480 // %18 = sub(%7, @one_usize)
1481 // %19 = store(%2, %18)
1482 // %20 = repeat(%6)
1483 // })
1484 // })
1485 // })
1486
1487 const elem_bits = operand_ty.childType(zcu).bitSize(zcu);
1488 const elem_bits_val = try pt.intValue(shift_ty, elem_bits);
1489 const elem_uint_ty = try pt.intType(.unsigned, @intCast(elem_bits));
1490
1491 const uint_block_inst = main_block.add(l, .{
1492 .tag = .block,
1493 .data = .{ .ty_pl = .{
1494 .ty = .fromType(uint_ty),
1495 .payload = undefined,
1496 } },
1497 });
1498 var uint_block: Block = .init(main_block.stealCapacity(19));
1499
1500 const index_ptr = uint_block.addTy(l, .alloc, .ptr_usize).toRef();
1501 const result_ptr = uint_block.addTy(l, .alloc, try pt.singleMutPtrType(uint_ty)).toRef();
1502 _ = uint_block.addBinOp(
1503 l,
1504 .store,
1505 index_ptr,
1506 .fromValue(try pt.intValue(.usize, operand_ty.arrayLen(zcu))),
1507 );
1508 _ = uint_block.addBinOp(l, .store, result_ptr, .fromValue(try pt.intValue(uint_ty, 0)));
1509
1510 var loop: Loop = .init(l, &uint_block);
1511 loop.block = .init(uint_block.stealRemainingCapacity());
1512
1513 const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
1514 const raw_elem = loop.block.addBinOp(
1515 l,
1516 if (operand_ty.zigTypeTag(zcu) == .vector) .legalize_vec_elem_val else .array_elem_val,
1517 ty_op.operand,
1518 index_val,
1519 ).toRef();
1520 const elem_uint = loop.block.addBitCast(l, elem_uint_ty, raw_elem);
1521 const elem_extended = loop.block.addTyOp(l, .intcast, uint_ty, elem_uint).toRef();
1522 const old_result = loop.block.addTyOp(l, .load, uint_ty, result_ptr).toRef();
1523 const shifted_result = loop.block.addBinOp(l, .shl_exact, old_result, .fromValue(elem_bits_val)).toRef();
1524 const new_result = loop.block.addBinOp(l, .bit_or, shifted_result, elem_extended).toRef();
1525
1526 const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .zero_usize).toRef();
1527 var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
1528
1529 condbr.then_block = .init(loop.block.stealRemainingCapacity());
1530 condbr.then_block.addBr(l, uint_block_inst, new_result);
1531
1532 condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
1533 _ = condbr.else_block.addBinOp(l, .store, result_ptr, new_result);
1534 const new_index_val = condbr.else_block.addBinOp(l, .sub, index_val, .one_usize).toRef();
1535 _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
1536 _ = condbr.else_block.add(l, .{
1537 .tag = .repeat,
1538 .data = .{ .repeat = .{ .loop_inst = loop.inst } },
1539 });
1540
1541 try condbr.finish(l);
1542 try loop.finish(l);
1543
1544 const inst_data = l.air_instructions.items(.data);
1545 inst_data[@intFromEnum(uint_block_inst)].ty_pl.payload = try l.addBlockBody(uint_block.body());
1546
1547 break :uint_val uint_block_inst.toRef();
1548 };
1549
1550 // Now convert `uint_ty` (`uN`) to `dest_ty`.
1551
1552 if (dest_legal) {
1553 _ = main_block.stealCapacity(17);
1554 const result = main_block.addBitCast(l, dest_ty, uint_val);
1555 main_block.addBr(l, orig_inst, result);
1556 } else {
1557 // %1 = alloc(*usize)
1558 // %2 = alloc(*@Vector(N, Result))
1559 // %3 = store(%1, @zero_usize)
1560 // %4 = loop({
1561 // %5 = load(%1)
1562 // %6 = mul(%5, <usize, E>)
1563 // %7 = intcast(uS, %6)
1564 // %8 = shr(uint_val, %7)
1565 // %9 = trunc(uE, %8)
1566 // %10 = bitcast(Result, %9)
1567 // %11 = legalize_vec_store_elem(%2, %5, %10)
1568 // %12 = cmp_eq(%5, <usize, vec_len>)
1569 // %13 = cond_br(%12, {
1570 // %14 = load(%2)
1571 // %15 = br(%0, %14)
1572 // }, {
1573 // %16 = add(%5, @one_usize)
1574 // %17 = store(%1, %16)
1575 // %18 = repeat(%4)
1576 // })
1577 // })
1578 //
1579 // The result might be an array, in which case `legalize_vec_store_elem`
1580 // becomes `ptr_elem_ptr` followed by `store`.
1581
1582 const elem_ty = dest_ty.childType(zcu);
1583 const elem_bits = elem_ty.bitSize(zcu);
1584 const elem_uint_ty = try pt.intType(.unsigned, @intCast(elem_bits));
1585
1586 const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
1587 const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(dest_ty)).toRef();
1588 _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
1589
1590 var loop: Loop = .init(l, &main_block);
1591 loop.block = .init(main_block.stealRemainingCapacity());
1592
1593 const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
1594 const bit_offset = loop.block.addBinOp(l, .mul, index_val, .fromValue(try pt.intValue(.usize, elem_bits))).toRef();
1595 const casted_bit_offset = loop.block.addTyOp(l, .intcast, shift_ty, bit_offset).toRef();
1596 const shifted_uint = loop.block.addBinOp(l, .shr, index_val, casted_bit_offset).toRef();
1597 const elem_uint = loop.block.addTyOp(l, .trunc, elem_uint_ty, shifted_uint).toRef();
1598 const elem_val = loop.block.addBitCast(l, elem_ty, elem_uint);
1599 switch (dest_ty.zigTypeTag(zcu)) {
1600 .array => {
1601 const elem_ptr = loop.block.add(l, .{
1602 .tag = .ptr_elem_ptr,
1603 .data = .{ .ty_pl = .{
1604 .ty = .fromType(try pt.singleMutPtrType(elem_ty)),
1605 .payload = try l.addExtra(Air.Bin, .{
1606 .lhs = result_ptr,
1607 .rhs = index_val,
1608 }),
1609 } },
1610 }).toRef();
1611 _ = loop.block.addBinOp(l, .store, elem_ptr, elem_val);
1612 },
1613 .vector => {
1614 _ = loop.block.add(l, .{
1615 .tag = .legalize_vec_store_elem,
1616 .data = .{ .pl_op = .{
1617 .operand = result_ptr,
1618 .payload = try l.addExtra(Air.Bin, .{
1619 .lhs = index_val,
1620 .rhs = elem_val,
1621 }),
1622 } },
1623 });
1624 _ = loop.block.stealCapacity(1);
1625 },
1626 else => unreachable,
1627 }
1628
1629 const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, dest_ty.arrayLen(zcu) - 1))).toRef();
1630
1631 var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
1632
1633 condbr.then_block = .init(loop.block.stealRemainingCapacity());
1634 const result_val = condbr.then_block.addTyOp(l, .load, dest_ty, result_ptr).toRef();
1635 condbr.then_block.addBr(l, orig_inst, result_val);
1636
1637 condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
1638 const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
1639 _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
1640 _ = condbr.else_block.add(l, .{
1641 .tag = .repeat,
1642 .data = .{ .repeat = .{ .loop_inst = loop.inst } },
1643 });
1644
1645 try condbr.finish(l);
1646 try loop.finish(l);
1647 }
1648
1649 return .{ .ty_pl = .{
1650 .ty = .fromType(dest_ty),
1651 .payload = try l.addBlockBody(main_block.body()),
1652 } };
1653}
1654fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
1655 const pt = l.pt;
1656 const zcu = pt.zcu;
1657
1658 const orig = l.air_instructions.get(@intFromEnum(orig_inst));
1659 const orig_operands = l.extraData(Air.Bin, orig.data.ty_pl.payload).data;
1660
1661 const vec_tuple_ty = l.typeOfIndex(orig_inst);
1662 const vec_int_ty = vec_tuple_ty.fieldType(0, zcu);
1663 const vec_overflow_ty = vec_tuple_ty.fieldType(1, zcu);
1664
1665 assert(l.typeOf(orig_operands.lhs).toIntern() == vec_int_ty.toIntern());
1666 if (orig.tag != .shl_with_overflow) {
1667 assert(l.typeOf(orig_operands.rhs).toIntern() == vec_int_ty.toIntern());
1668 }
1669
1670 const scalar_int_ty = vec_int_ty.childType(zcu);
1671 const scalar_tuple_ty = try pt.overflowArithmeticTupleType(scalar_int_ty);
1672
1673 // %1 = block(struct { @Vector(N, Int), @Vector(N, u1) }, {
1674 // %2 = alloc(*usize)
1675 // %3 = alloc(*struct { @Vector(N, Int), @Vector(N, u1) })
1676 // %4 = struct_field_ptr_index_0(*@Vector(N, Int), %3)
1677 // %5 = struct_field_ptr_index_1(*@Vector(N, u1), %3)
1678 // %6 = store(%2, @zero_usize)
1679 // %7 = loop({
1680 // %8 = load(%2)
1681 // %9 = legalize_vec_elem_val(orig_lhs, %8)
1682 // %10 = legalize_vec_elem_val(orig_rhs, %8)
1683 // %11 = ???_with_overflow(struct { Int, u1 }, %9, %10)
1684 // %12 = struct_field_val(%11, 0)
1685 // %13 = struct_field_val(%11, 1)
1686 // %14 = legalize_vec_store_elem(%4, %8, %12)
1687 // %15 = legalize_vec_store_elem(%4, %8, %13)
1688 // %16 = cmp_eq(%8, <usize, N-1>)
1689 // %17 = cond_br(%16, {
1690 // %18 = load(%3)
1691 // %19 = br(%1, %18)
1692 // }, {
1693 // %20 = add(%8, @one_usize)
1694 // %21 = store(%2, %20)
1695 // %22 = repeat(%7)
1696 // })
1697 // })
1698 // })
1699
1700 const elems_len = vec_int_ty.vectorLen(zcu);
1701
1702 var inst_buf: [21]Air.Inst.Index = undefined;
1703 var main_block: Block = .init(&inst_buf);
1704 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
1705
1706 const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
1707 const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(vec_tuple_ty)).toRef();
1708 const result_int_ptr = main_block.addTyOp(
1709 l,
1710 .struct_field_ptr_index_0,
1711 try pt.singleMutPtrType(vec_int_ty),
1712 result_ptr,
1713 ).toRef();
1714 const result_overflow_ptr = main_block.addTyOp(
1715 l,
1716 .struct_field_ptr_index_1,
1717 try pt.singleMutPtrType(vec_overflow_ty),
1718 result_ptr,
1719 ).toRef();
1720
1721 _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
1722
1723 var loop: Loop = .init(l, &main_block);
1724 loop.block = .init(main_block.stealRemainingCapacity());
1725
1726 const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
1727 const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operands.lhs, index_val).toRef();
1728 const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operands.rhs, index_val).toRef();
1729 const elem_result = loop.block.add(l, .{
1730 .tag = orig.tag,
1731 .data = .{ .ty_pl = .{
1732 .ty = .fromType(scalar_tuple_ty),
1733 .payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }),
1734 } },
1735 }).toRef();
1736 const int_elem = loop.block.add(l, .{
1737 .tag = .struct_field_val,
1738 .data = .{ .ty_pl = .{
1739 .ty = .fromType(scalar_int_ty),
1740 .payload = try l.addExtra(Air.StructField, .{
1741 .struct_operand = elem_result,
1742 .field_index = 0,
1743 }),
1744 } },
1745 }).toRef();
1746 const overflow_elem = loop.block.add(l, .{
1747 .tag = .struct_field_val,
1748 .data = .{ .ty_pl = .{
1749 .ty = .u1_type,
1750 .payload = try l.addExtra(Air.StructField, .{
1751 .struct_operand = elem_result,
1752 .field_index = 1,
1753 }),
1754 } },
1755 }).toRef();
1756 _ = loop.block.add(l, .{
1757 .tag = .legalize_vec_store_elem,
1758 .data = .{ .pl_op = .{
1759 .operand = result_int_ptr,
1760 .payload = try l.addExtra(Air.Bin, .{
1761 .lhs = index_val,
1762 .rhs = int_elem,
1763 }),
1764 } },
1765 });
1766 _ = loop.block.add(l, .{
1767 .tag = .legalize_vec_store_elem,
1768 .data = .{ .pl_op = .{
1769 .operand = result_overflow_ptr,
1770 .payload = try l.addExtra(Air.Bin, .{
1771 .lhs = index_val,
1772 .rhs = overflow_elem,
1773 }),
1774 } },
1775 });
1776
1777 const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, elems_len - 1))).toRef();
1778 var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
1779
1780 condbr.then_block = .init(loop.block.stealRemainingCapacity());
1781 const result_val = condbr.then_block.addTyOp(l, .load, vec_tuple_ty, result_ptr).toRef();
1782 condbr.then_block.addBr(l, orig_inst, result_val);
1783
1784 condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
1785 const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
1786 _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
1787 _ = condbr.else_block.add(l, .{
1788 .tag = .repeat,
1789 .data = .{ .repeat = .{ .loop_inst = loop.inst } },
1790 });
1791
1792 try condbr.finish(l);
1793 try loop.finish(l);
1794
1795 return .{ .ty_pl = .{
1796 .ty = .fromType(vec_tuple_ty),
1797 .payload = try l.addBlockBody(main_block.body()),
1798 } };
1799}
1800fn scalarizeReduceBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, optimized: bool) Error!Air.Inst.Data {
1801 const pt = l.pt;
1802 const zcu = pt.zcu;
1803
1804 const reduce = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].reduce;
1805
1806 const vector_ty = l.typeOf(reduce.operand);
1807 const scalar_ty = vector_ty.childType(zcu);
1808
1809 const ident_val: Value = switch (reduce.operation) {
1810 // identity for add is 0; identity for OR and XOR is all 0 bits
1811 .Or, .Xor, .Add => switch (scalar_ty.zigTypeTag(zcu)) {
1812 .int => try pt.intValue(scalar_ty, 0),
1813 .float => try pt.floatValue(scalar_ty, 0.0),
1814 else => unreachable,
1815 },
1816 // identity for multiplication is 1
1817 .Mul => switch (scalar_ty.zigTypeTag(zcu)) {
1818 .int => try pt.intValue(scalar_ty, 1),
1819 .float => try pt.floatValue(scalar_ty, 1.0),
1820 else => unreachable,
1821 },
1822 // identity for AND is all 1 bits
1823 .And => switch (scalar_ty.intInfo(zcu).signedness) {
1824 .unsigned => try scalar_ty.maxIntScalar(pt, scalar_ty),
1825 .signed => try pt.intValue(scalar_ty, -1),
1826 },
1827 // identity for @min is maximum value
1828 .Min => switch (scalar_ty.zigTypeTag(zcu)) {
1829 .int => try scalar_ty.maxIntScalar(pt, scalar_ty),
1830 .float => try pt.floatValue(scalar_ty, std.math.inf(f32)),
1831 else => unreachable,
1832 },
1833 // identity for @max is minimum value
1834 .Max => switch (scalar_ty.zigTypeTag(zcu)) {
1835 .int => try scalar_ty.minIntScalar(pt, scalar_ty),
1836 .float => try pt.floatValue(scalar_ty, -std.math.inf(f32)),
1837 else => unreachable,
1838 },
1839 };
1840
1841 const op_tag: Air.Inst.Tag = switch (reduce.operation) {
1842 .Or => .bit_or,
1843 .And => .bit_and,
1844 .Xor => .xor,
1845 .Min => .min,
1846 .Max => .max,
1847 .Add => switch (scalar_ty.zigTypeTag(zcu)) {
1848 .int => .add_wrap,
1849 .float => if (optimized) .add_optimized else .add,
1850 else => unreachable,
1851 },
1852 .Mul => switch (scalar_ty.zigTypeTag(zcu)) {
1853 .int => .mul_wrap,
1854 .float => if (optimized) .mul_optimized else .mul,
1855 else => unreachable,
1856 },
1857 };
1858
1859 // %1 = block(Scalar, {
1860 // %2 = alloc(*usize)
1861 // %3 = alloc(*Scalar)
1862 // %4 = store(%2, @zero_usize)
1863 // %5 = store(%3, <Scalar, 0>) // or whatever the identity is for this operator
1864 // %6 = loop({
1865 // %7 = load(%2)
1866 // %8 = legalize_vec_elem_val(orig_operand, %7)
1867 // %9 = load(%3)
1868 // %10 = add(%8, %9) // or whatever the operator is
1869 // %11 = cmp_eq(%7, <usize, N-1>)
1870 // %12 = cond_br(%11, {
1871 // %13 = br(%1, %10)
1872 // }, {
1873 // %14 = store(%3, %10)
1874 // %15 = add(%7, @one_usize)
1875 // %16 = store(%2, %15)
1876 // %17 = repeat(%6)
1877 // })
1878 // })
1879 // })
1880
1881 var inst_buf: [16]Air.Inst.Index = undefined;
1882 var main_block: Block = .init(&inst_buf);
1883 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
1884
1885 const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
1886 const accum_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(scalar_ty)).toRef();
1887 _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
1888 _ = main_block.addBinOp(l, .store, accum_ptr, .fromValue(ident_val));
1889
1890 var loop: Loop = .init(l, &main_block);
1891 loop.block = .init(main_block.stealRemainingCapacity());
1892
1893 const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
1894 const elem_val = loop.block.addBinOp(l, .legalize_vec_elem_val, reduce.operand, index_val).toRef();
1895 const old_accum = loop.block.addTyOp(l, .load, scalar_ty, accum_ptr).toRef();
1896 const new_accum = loop.block.addBinOp(l, op_tag, old_accum, elem_val).toRef();
1897
1898 const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, vector_ty.vectorLen(zcu) - 1))).toRef();
1899
1900 var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
1901
1902 condbr.then_block = .init(loop.block.stealRemainingCapacity());
1903 condbr.then_block.addBr(l, orig_inst, new_accum);
1904
1905 condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
1906 _ = condbr.else_block.addBinOp(l, .store, accum_ptr, new_accum);
1907 const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
1908 _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
1909 _ = condbr.else_block.add(l, .{
1910 .tag = .repeat,
1911 .data = .{ .repeat = .{ .loop_inst = loop.inst } },
1912 });
1913
1914 try condbr.finish(l);
1915 try loop.finish(l);
1916
1917 return .{ .ty_pl = .{
1918 .ty = .fromType(scalar_ty),
1919 .payload = try l.addBlockBody(main_block.body()),
1920 } };
1921}
1922
1923fn safeIntcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
1924 const pt = l.pt;
1925 const zcu = pt.zcu;
1926 const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
1927
1928 const operand_ref = ty_op.operand;
1929 const operand_ty = l.typeOf(operand_ref);
1930 const dest_ty = ty_op.ty.toType();
1931
1932 const is_vector = operand_ty.zigTypeTag(zcu) == .vector;
1933 const operand_scalar_ty = operand_ty.scalarType(zcu);
1934 const dest_scalar_ty = dest_ty.scalarType(zcu);
1935
1936 assert(operand_scalar_ty.zigTypeTag(zcu) == .int);
1937 const dest_is_enum = switch (dest_scalar_ty.zigTypeTag(zcu)) {
1938 .int => false,
1939 .@"enum" => true,
1940 else => unreachable,
1941 };
1942
1943 const operand_info = operand_scalar_ty.intInfo(zcu);
1944 const dest_info = dest_scalar_ty.intInfo(zcu);
1945
1946 const have_min_check, const have_max_check = c: {
1947 const dest_pos_bits = dest_info.bits - @intFromBool(dest_info.signedness == .signed);
1948 const operand_pos_bits = operand_info.bits - @intFromBool(operand_info.signedness == .signed);
1949 const dest_allows_neg = dest_info.signedness == .signed and dest_info.bits > 0;
1950 const operand_allows_neg = operand_info.signedness == .signed and operand_info.bits > 0;
1951 break :c .{
1952 operand_allows_neg and (!dest_allows_neg or dest_info.bits < operand_info.bits),
1953 dest_pos_bits < operand_pos_bits,
1954 };
1955 };
1956
1957 // The worst-case scenario in terms of total instructions and total condbrs is the case where
1958 // the result type is an exhaustive enum whose tag type is smaller than the operand type:
1959 //
1960 // %x = block({
1961 // %1 = cmp_lt(%y, @min_allowed_int)
1962 // %2 = cmp_gt(%y, @max_allowed_int)
1963 // %3 = bool_or(%1, %2)
1964 // %4 = cond_br(%3, {
1965 // %5 = call(@panic.invalidEnumValue, [])
1966 // %6 = unreach()
1967 // }, {
1968 // %7 = intcast(@res_ty, %y)
1969 // %8 = is_named_enum_value(%7)
1970 // %9 = cond_br(%8, {
1971 // %10 = br(%x, %7)
1972 // }, {
1973 // %11 = call(@panic.invalidEnumValue, [])
1974 // %12 = unreach()
1975 // })
1976 // })
1977 // })
1978 //
1979 // Note that vectors of enums don't exist -- the worst case for vectors is this:
1980 //
1981 // %x = block({
1982 // %1 = cmp_lt(%y, @min_allowed_int)
1983 // %2 = cmp_gt(%y, @max_allowed_int)
1984 // %3 = bool_or(%1, %2)
1985 // %4 = reduce(%3, .@"or")
1986 // %5 = cond_br(%4, {
1987 // %6 = call(@panic.invalidEnumValue, [])
1988 // %7 = unreach()
1989 // }, {
1990 // %8 = intcast(@res_ty, %y)
1991 // %9 = br(%x, %8)
1992 // })
1993 // })
1994
1995 var inst_buf: [12]Air.Inst.Index = undefined;
1996 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
1997 var condbr_buf: [2]CondBr = undefined;
1998 var condbr_idx: usize = 0;
1999
2000 var main_block: Block = .init(&inst_buf);
2001 var cur_block: *Block = &main_block;
2002
2003 const panic_id: Zcu.SimplePanicId = if (dest_is_enum) .invalid_enum_value else .integer_out_of_bounds;
2004
2005 if (have_min_check or have_max_check) {
2006 const dest_int_ty = if (dest_is_enum) dest_ty.intTagType(zcu) else dest_ty;
2007 const condbr = &condbr_buf[condbr_idx];
2008 condbr_idx += 1;
2009 const below_min_inst: Air.Inst.Index = if (have_min_check) inst: {
2010 const min_val_ref = Air.internedToRef((try dest_int_ty.minInt(pt, operand_ty)).toIntern());
2011 break :inst try cur_block.addCmp(l, .lt, operand_ref, min_val_ref, .{ .vector = is_vector });
2012 } else undefined;
2013 const above_max_inst: Air.Inst.Index = if (have_max_check) inst: {
2014 const max_val_ref = Air.internedToRef((try dest_int_ty.maxInt(pt, operand_ty)).toIntern());
2015 break :inst try cur_block.addCmp(l, .gt, operand_ref, max_val_ref, .{ .vector = is_vector });
2016 } else undefined;
2017 const out_of_range_inst: Air.Inst.Index = inst: {
2018 if (have_min_check and have_max_check) break :inst cur_block.add(l, .{
2019 .tag = .bool_or,
2020 .data = .{ .bin_op = .{
2021 .lhs = below_min_inst.toRef(),
2022 .rhs = above_max_inst.toRef(),
2023 } },
2024 });
2025 if (have_min_check) break :inst below_min_inst;
2026 if (have_max_check) break :inst above_max_inst;
2027 unreachable;
2028 };
2029 const scalar_out_of_range_inst: Air.Inst.Index = if (is_vector) cur_block.add(l, .{
2030 .tag = .reduce,
2031 .data = .{ .reduce = .{
2032 .operand = out_of_range_inst.toRef(),
2033 .operation = .Or,
2034 } },
2035 }) else out_of_range_inst;
2036 condbr.* = .init(l, scalar_out_of_range_inst.toRef(), cur_block, .{ .true = .cold });
2037 condbr.then_block = .init(cur_block.stealRemainingCapacity());
2038 try condbr.then_block.addPanic(l, panic_id);
2039 condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
2040 cur_block = &condbr.else_block;
2041 }
2042
2043 // Now we know we're in-range, we can intcast:
2044 const cast_inst = cur_block.add(l, .{
2045 .tag = .intcast,
2046 .data = .{ .ty_op = .{
2047 .ty = Air.internedToRef(dest_ty.toIntern()),
2048 .operand = operand_ref,
2049 } },
2050 });
2051 // For ints we're already done, but for exhaustive enums we must check this is a valid tag.
2052 if (dest_is_enum and !dest_ty.isNonexhaustiveEnum(zcu) and zcu.backendSupportsFeature(.is_named_enum_value)) {
2053 assert(!is_vector); // vectors of enums don't exist
2054 // We are building this:
2055 // %1 = is_named_enum_value(%cast_inst)
2056 // %2 = cond_br(%1, {
2057 // <new cursor>
2058 // }, {
2059 // <panic>
2060 // })
2061 const is_named_inst = cur_block.add(l, .{
2062 .tag = .is_named_enum_value,
2063 .data = .{ .un_op = cast_inst.toRef() },
2064 });
2065 const condbr = &condbr_buf[condbr_idx];
2066 condbr_idx += 1;
2067 condbr.* = .init(l, is_named_inst.toRef(), cur_block, .{ .false = .cold });
2068 condbr.else_block = .init(cur_block.stealRemainingCapacity());
2069 try condbr.else_block.addPanic(l, panic_id);
2070 condbr.then_block = .init(condbr.else_block.stealRemainingCapacity());
2071 cur_block = &condbr.then_block;
2072 }
2073 // Finally, just `br` to our outer `block`.
2074 _ = cur_block.add(l, .{
2075 .tag = .br,
2076 .data = .{ .br = .{
2077 .block_inst = orig_inst,
2078 .operand = cast_inst.toRef(),
2079 } },
2080 });
2081 // We might not have used all of the instructions; that's intentional.
2082 _ = cur_block.stealRemainingCapacity();
2083
2084 for (condbr_buf[0..condbr_idx]) |*condbr| try condbr.finish(l);
2085 return .{ .ty_pl = .{
2086 .ty = Air.internedToRef(dest_ty.toIntern()),
2087 .payload = try l.addBlockBody(main_block.body()),
2088 } };
2089}
2090fn safeIntFromFloatBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, optimized: bool) Error!Air.Inst.Data {
2091 const pt = l.pt;
2092 const zcu = pt.zcu;
2093 const gpa = zcu.gpa;
2094 const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
2095
2096 const operand_ref = ty_op.operand;
2097 const operand_ty = l.typeOf(operand_ref);
2098 const dest_ty = ty_op.ty.toType();
2099
2100 const is_vector = operand_ty.zigTypeTag(zcu) == .vector;
2101 const dest_scalar_ty = dest_ty.scalarType(zcu);
2102 const int_info = dest_scalar_ty.intInfo(zcu);
2103
2104 // We emit 9 instructions in the worst case.
2105 var inst_buf: [9]Air.Inst.Index = undefined;
2106 try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
2107 var main_block: Block = .init(&inst_buf);
2108
2109 // This check is a bit annoying because of floating-point rounding and the fact that this
2110 // builtin truncates. We'll use a bigint for our calculations, because we need to construct
2111 // integers exceeding the bounds of the result integer type, and we need to convert it to a
2112 // float with a specific rounding mode to avoid errors.
2113 // Our bigint may exceed the twos complement limit by one, so add an extra limb.
2114 const limbs = try gpa.alloc(
2115 std.math.big.Limb,
2116 std.math.big.int.calcTwosCompLimbCount(int_info.bits) + 1,
2117 );
2118 defer gpa.free(limbs);
2119 var big: std.math.big.int.Mutable = .init(limbs, 0);
2120
2121 // Check if the operand is lower than `min_int` when truncated to an integer.
2122 big.setTwosCompIntLimit(.min, int_info.signedness, int_info.bits);
2123 const below_min_inst: Air.Inst.Index = if (!big.positive or big.eqlZero()) bad: {
2124 // `min_int <= 0`, so check for `x <= min_int - 1`.
2125 big.addScalar(big.toConst(), -1);
2126 // For `<=`, we must round the RHS down, so that this value is the first `x` which returns `true`.
2127 const limit_val = try floatFromBigIntVal(pt, is_vector, operand_ty, big.toConst(), .floor);
2128 break :bad try main_block.addCmp(l, .lte, operand_ref, Air.internedToRef(limit_val.toIntern()), .{
2129 .vector = is_vector,
2130 .optimized = optimized,
2131 });
2132 } else {
2133 // `min_int > 0`, which is currently impossible. It would become possible under #3806, in
2134 // which case we must detect `x < min_int`.
2135 unreachable;
2136 };
2137
2138 // Check if the operand is greater than `max_int` when truncated to an integer.
2139 big.setTwosCompIntLimit(.max, int_info.signedness, int_info.bits);
2140 const above_max_inst: Air.Inst.Index = if (big.positive or big.eqlZero()) bad: {
2141 // `max_int >= 0`, so check for `x >= max_int + 1`.
2142 big.addScalar(big.toConst(), 1);
2143 // For `>=`, we must round the RHS up, so that this value is the first `x` which returns `true`.
2144 const limit_val = try floatFromBigIntVal(pt, is_vector, operand_ty, big.toConst(), .ceil);
2145 break :bad try main_block.addCmp(l, .gte, operand_ref, Air.internedToRef(limit_val.toIntern()), .{
2146 .vector = is_vector,
2147 .optimized = optimized,
2148 });
2149 } else {
2150 // `max_int < 0`, which is currently impossible. It would become possible under #3806, in
2151 // which case we must detect `x > max_int`.
2152 unreachable;
2153 };
2154
2155 // Combine the conditions.
2156 const out_of_bounds_inst: Air.Inst.Index = main_block.add(l, .{
2157 .tag = .bool_or,
2158 .data = .{ .bin_op = .{
2159 .lhs = below_min_inst.toRef(),
2160 .rhs = above_max_inst.toRef(),
2161 } },
2162 });
2163 const scalar_out_of_bounds_inst: Air.Inst.Index = if (is_vector) main_block.add(l, .{
2164 .tag = .reduce,
2165 .data = .{ .reduce = .{
2166 .operand = out_of_bounds_inst.toRef(),
2167 .operation = .Or,
2168 } },
2169 }) else out_of_bounds_inst;
2170
2171 // Now emit the actual condbr. "true" will be safety panic. "false" will be "ok", meaning we do
2172 // the `int_from_float` and `br` the result to `orig_inst`.
2173 var condbr: CondBr = .init(l, scalar_out_of_bounds_inst.toRef(), &main_block, .{ .true = .cold });
2174 condbr.then_block = .init(main_block.stealRemainingCapacity());
2175 try condbr.then_block.addPanic(l, .integer_part_out_of_bounds);
2176 condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
2177 const cast_inst = condbr.else_block.add(l, .{
2178 .tag = if (optimized) .int_from_float_optimized else .int_from_float,
2179 .data = .{ .ty_op = .{
2180 .ty = Air.internedToRef(dest_ty.toIntern()),
2181 .operand = operand_ref,
2182 } },
2183 });
2184 _ = condbr.else_block.add(l, .{
2185 .tag = .br,
2186 .data = .{ .br = .{
2187 .block_inst = orig_inst,
2188 .operand = cast_inst.toRef(),
2189 } },
2190 });
2191 _ = condbr.else_block.stealRemainingCapacity(); // we might not have used it all
2192 try condbr.finish(l);
2193
2194 return .{ .ty_pl = .{
2195 .ty = Air.internedToRef(dest_ty.toIntern()),
2196 .payload = try l.addBlockBody(main_block.body()),
2197 } };
2198}
2199fn safeArithmeticBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, overflow_op_tag: Air.Inst.Tag) Error!Air.Inst.Data {
2200 const pt = l.pt;
2201 const zcu = pt.zcu;
2202 const bin_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].bin_op;
2203
2204 const operand_ty = l.typeOf(bin_op.lhs);
2205 assert(l.typeOf(bin_op.rhs).toIntern() == operand_ty.toIntern());
2206 const is_vector = operand_ty.zigTypeTag(zcu) == .vector;
2207
2208 const overflow_tuple_ty = try pt.overflowArithmeticTupleType(operand_ty);
2209 const overflow_bits_ty = overflow_tuple_ty.fieldType(1, zcu);
2210
2211 // The worst-case scenario is a vector operand:
2212 //
2213 // %1 = add_with_overflow(%x, %y)
2214 // %2 = struct_field_val(%1, .@"1")
2215 // %3 = reduce(%2, .@"or")
2216 // %4 = bitcast(%3, @bool_type)
2217 // %5 = cond_br(%4, {
2218 // %6 = call(@panic.integerOverflow, [])
2219 // %7 = unreach()
2220 // }, {
2221 // %8 = struct_field_val(%1, .@"0")
2222 // %9 = br(%z, %8)
2223 // })
2224 var inst_buf: [9]Air.Inst.Index = undefined;
2225 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
2226
2227 var main_block: Block = .init(&inst_buf);
2228
2229 const overflow_op_inst = main_block.add(l, .{
2230 .tag = overflow_op_tag,
2231 .data = .{ .ty_pl = .{
2232 .ty = Air.internedToRef(overflow_tuple_ty.toIntern()),
2233 .payload = try l.addExtra(Air.Bin, .{
2234 .lhs = bin_op.lhs,
2235 .rhs = bin_op.rhs,
2236 }),
2237 } },
2238 });
2239 const overflow_bits_inst = main_block.add(l, .{
2240 .tag = .struct_field_val,
2241 .data = .{ .ty_pl = .{
2242 .ty = Air.internedToRef(overflow_bits_ty.toIntern()),
2243 .payload = try l.addExtra(Air.StructField, .{
2244 .struct_operand = overflow_op_inst.toRef(),
2245 .field_index = 1,
2246 }),
2247 } },
2248 });
2249 const any_overflow_bit_inst = if (is_vector) main_block.add(l, .{
2250 .tag = .reduce,
2251 .data = .{ .reduce = .{
2252 .operand = overflow_bits_inst.toRef(),
2253 .operation = .Or,
2254 } },
2255 }) else overflow_bits_inst;
2256 const any_overflow_inst = try main_block.addCmp(l, .eq, any_overflow_bit_inst.toRef(), .one_u1, .{});
2257
2258 var condbr: CondBr = .init(l, any_overflow_inst.toRef(), &main_block, .{ .true = .cold });
2259 condbr.then_block = .init(main_block.stealRemainingCapacity());
2260 try condbr.then_block.addPanic(l, .integer_overflow);
2261 condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
2262
2263 const result_inst = condbr.else_block.add(l, .{
2264 .tag = .struct_field_val,
2265 .data = .{ .ty_pl = .{
2266 .ty = Air.internedToRef(operand_ty.toIntern()),
2267 .payload = try l.addExtra(Air.StructField, .{
2268 .struct_operand = overflow_op_inst.toRef(),
2269 .field_index = 0,
2270 }),
2271 } },
2272 });
2273 _ = condbr.else_block.add(l, .{
2274 .tag = .br,
2275 .data = .{ .br = .{
2276 .block_inst = orig_inst,
2277 .operand = result_inst.toRef(),
2278 } },
2279 });
2280 // We might not have used all of the instructions; that's intentional.
2281 _ = condbr.else_block.stealRemainingCapacity();
2282
2283 try condbr.finish(l);
2284 return .{ .ty_pl = .{
2285 .ty = Air.internedToRef(operand_ty.toIntern()),
2286 .payload = try l.addBlockBody(main_block.body()),
2287 } };
2288}
2289
2290fn packedLoadBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
2291 const pt = l.pt;
2292 const zcu = pt.zcu;
2293
2294 const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
2295 const res_ty = orig_ty_op.ty.toType();
2296 const res_int_ty = try pt.intType(.unsigned, @intCast(res_ty.bitSize(zcu)));
2297 const ptr_ty = l.typeOf(orig_ty_op.operand);
2298 const ptr_info = ptr_ty.ptrInfo(zcu);
2299 // This relies on a heap of possibly invalid assumptions to work around not knowing the actual backing type.
2300 const load_bits = 8 * ptr_info.packed_offset.host_size;
2301 const load_ty = try pt.intType(.unsigned, load_bits);
2302
2303 var inst_buf: [6]Air.Inst.Index = undefined;
2304 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
2305
2306 var res_block: Block = .init(&inst_buf);
2307 _ = res_block.add(l, .{
2308 .tag = .br,
2309 .data = .{ .br = .{
2310 .block_inst = orig_inst,
2311 .operand = res_block.addBitCast(l, res_ty, res_block.add(l, .{
2312 .tag = .trunc,
2313 .data = .{ .ty_op = .{
2314 .ty = Air.internedToRef(res_int_ty.toIntern()),
2315 .operand = res_block.add(l, .{
2316 .tag = .shr,
2317 .data = .{ .bin_op = .{
2318 .lhs = res_block.add(l, .{
2319 .tag = .load,
2320 .data = .{ .ty_op = .{
2321 .ty = Air.internedToRef(load_ty.toIntern()),
2322 .operand = res_block.addBitCast(l, load_ptr_ty: {
2323 var load_ptr_info = ptr_info;
2324 load_ptr_info.child = load_ty.toIntern();
2325 load_ptr_info.flags.vector_index = .none;
2326 load_ptr_info.packed_offset = .{ .host_size = 0, .bit_offset = 0 };
2327 break :load_ptr_ty try pt.ptrType(load_ptr_info);
2328 }, orig_ty_op.operand),
2329 } },
2330 }).toRef(),
2331 .rhs = try pt.intRef(
2332 try pt.intType(.unsigned, std.math.log2_int_ceil(u16, load_bits)),
2333 ptr_info.packed_offset.bit_offset,
2334 ),
2335 } },
2336 }).toRef(),
2337 } },
2338 }).toRef()),
2339 } },
2340 });
2341 return .{ .ty_pl = .{
2342 .ty = Air.internedToRef(res_ty.toIntern()),
2343 .payload = try l.addBlockBody(res_block.body()),
2344 } };
2345}
2346fn packedStoreBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
2347 const pt = l.pt;
2348 const zcu = pt.zcu;
2349
2350 const orig_bin_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].bin_op;
2351 const ptr_ty = l.typeOf(orig_bin_op.lhs);
2352 const ptr_info = ptr_ty.ptrInfo(zcu);
2353 const operand_ty = l.typeOf(orig_bin_op.rhs);
2354 const operand_bits: u16 = @intCast(operand_ty.bitSize(zcu));
2355 const operand_int_ty = try pt.intType(.unsigned, operand_bits);
2356 // This relies on a heap of possibly invalid assumptions to work around not knowing the actual backing type.
2357 const load_store_bits = 8 * ptr_info.packed_offset.host_size;
2358 const load_store_ty = try pt.intType(.unsigned, load_store_bits);
2359
2360 var inst_buf: [9]Air.Inst.Index = undefined;
2361 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
2362
2363 var res_block: Block = .init(&inst_buf);
2364 {
2365 const backing_ptr_inst = res_block.add(l, .{
2366 .tag = .bitcast,
2367 .data = .{ .ty_op = .{
2368 .ty = Air.internedToRef((load_store_ptr_ty: {
2369 var load_ptr_info = ptr_info;
2370 load_ptr_info.child = load_store_ty.toIntern();
2371 load_ptr_info.flags.vector_index = .none;
2372 load_ptr_info.packed_offset = .{ .host_size = 0, .bit_offset = 0 };
2373 break :load_store_ptr_ty try pt.ptrType(load_ptr_info);
2374 }).toIntern()),
2375 .operand = orig_bin_op.lhs,
2376 } },
2377 });
2378 _ = res_block.add(l, .{
2379 .tag = .store,
2380 .data = .{ .bin_op = .{
2381 .lhs = backing_ptr_inst.toRef(),
2382 .rhs = res_block.add(l, .{
2383 .tag = .bit_or,
2384 .data = .{ .bin_op = .{
2385 .lhs = res_block.add(l, .{
2386 .tag = .bit_and,
2387 .data = .{ .bin_op = .{
2388 .lhs = res_block.add(l, .{
2389 .tag = .load,
2390 .data = .{ .ty_op = .{
2391 .ty = Air.internedToRef(load_store_ty.toIntern()),
2392 .operand = backing_ptr_inst.toRef(),
2393 } },
2394 }).toRef(),
2395 .rhs = Air.internedToRef((keep_mask: {
2396 const ExpectedContents = [std.math.big.int.calcTwosCompLimbCount(256)]std.math.big.Limb;
2397 var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
2398 std.heap.stackFallback(@sizeOf(ExpectedContents), zcu.gpa);
2399 const gpa = stack.get();
2400
2401 var mask_big_int: std.math.big.int.Mutable = .{
2402 .limbs = try gpa.alloc(
2403 std.math.big.Limb,
2404 std.math.big.int.calcTwosCompLimbCount(load_store_bits),
2405 ),
2406 .len = undefined,
2407 .positive = undefined,
2408 };
2409 defer gpa.free(mask_big_int.limbs);
2410 mask_big_int.setTwosCompIntLimit(.max, .unsigned, operand_bits);
2411 mask_big_int.shiftLeft(mask_big_int.toConst(), ptr_info.packed_offset.bit_offset);
2412 mask_big_int.bitNotWrap(mask_big_int.toConst(), .unsigned, load_store_bits);
2413 break :keep_mask try pt.intValue_big(load_store_ty, mask_big_int.toConst());
2414 }).toIntern()),
2415 } },
2416 }).toRef(),
2417 .rhs = res_block.add(l, .{
2418 .tag = .shl_exact,
2419 .data = .{ .bin_op = .{
2420 .lhs = res_block.add(l, .{
2421 .tag = .intcast,
2422 .data = .{ .ty_op = .{
2423 .ty = Air.internedToRef(load_store_ty.toIntern()),
2424 .operand = res_block.addBitCast(l, operand_int_ty, orig_bin_op.rhs),
2425 } },
2426 }).toRef(),
2427 .rhs = try pt.intRef(
2428 try pt.intType(.unsigned, std.math.log2_int_ceil(u16, load_store_bits)),
2429 ptr_info.packed_offset.bit_offset,
2430 ),
2431 } },
2432 }).toRef(),
2433 } },
2434 }).toRef(),
2435 } },
2436 });
2437 _ = res_block.add(l, .{
2438 .tag = .br,
2439 .data = .{ .br = .{
2440 .block_inst = orig_inst,
2441 .operand = .void_value,
2442 } },
2443 });
2444 }
2445 return .{ .ty_pl = .{
2446 .ty = .void_type,
2447 .payload = try l.addBlockBody(res_block.body()),
2448 } };
2449}
2450fn packedStructFieldValBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
2451 const pt = l.pt;
2452 const zcu = pt.zcu;
2453
2454 const orig_ty_pl = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_pl;
2455 const orig_extra = l.extraData(Air.StructField, orig_ty_pl.payload).data;
2456 const field_ty = orig_ty_pl.ty.toType();
2457 const agg_ty = l.typeOf(orig_extra.struct_operand);
2458
2459 const agg_bits: u16 = @intCast(agg_ty.bitSize(zcu));
2460 const bit_offset = zcu.structPackedFieldBitOffset(zcu.typeToStruct(agg_ty).?, orig_extra.field_index);
2461
2462 const agg_int_ty = try pt.intType(.unsigned, agg_bits);
2463 const field_int_ty = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu)));
2464
2465 const agg_shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, agg_bits));
2466 const bit_offset_ref: Air.Inst.Ref = .fromValue(try pt.intValue(agg_shift_ty, bit_offset));
2467
2468 var inst_buf: [5]Air.Inst.Index = undefined;
2469 var main_block: Block = .init(&inst_buf);
2470 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
2471
2472 const agg_int = main_block.addBitCast(l, agg_int_ty, orig_extra.struct_operand);
2473 const shifted_agg_int = main_block.addBinOp(l, .shr, agg_int, bit_offset_ref).toRef();
2474 const field_int = main_block.addTyOp(l, .trunc, field_int_ty, shifted_agg_int).toRef();
2475 const field_val = main_block.addBitCast(l, field_ty, field_int);
2476 main_block.addBr(l, orig_inst, field_val);
2477
2478 return .{ .ty_pl = .{
2479 .ty = .fromType(field_ty),
2480 .payload = try l.addBlockBody(main_block.body()),
2481 } };
2482}
2483fn packedAggregateInitBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
2484 const pt = l.pt;
2485 const zcu = pt.zcu;
2486 const gpa = zcu.gpa;
2487
2488 const orig_ty_pl = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_pl;
2489 const agg_ty = orig_ty_pl.ty.toType();
2490 const agg_field_count = agg_ty.structFieldCount(zcu);
2491
2492 var sfba_state = std.heap.stackFallback(@sizeOf([4 * 32 + 2]Air.Inst.Index), gpa);
2493 const sfba = sfba_state.get();
2494
2495 const inst_buf = try sfba.alloc(Air.Inst.Index, 4 * agg_field_count + 2);
2496 defer sfba.free(inst_buf);
2497
2498 var main_block: Block = .init(inst_buf);
2499 try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
2500
2501 const num_bits: u16 = @intCast(agg_ty.bitSize(zcu));
2502 const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, num_bits));
2503 const uint_ty = try pt.intType(.unsigned, num_bits);
2504 var cur_uint: Air.Inst.Ref = .fromValue(try pt.intValue(uint_ty, 0));
2505
2506 var field_idx = agg_field_count;
2507 while (field_idx > 0) {
2508 field_idx -= 1;
2509 const field_ty = agg_ty.fieldType(field_idx, zcu);
2510 const field_uint_ty = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu)));
2511 const field_bit_size_ref: Air.Inst.Ref = .fromValue(try pt.intValue(shift_ty, field_ty.bitSize(zcu)));
2512 const field_val: Air.Inst.Ref = @enumFromInt(l.air_extra.items[orig_ty_pl.payload + field_idx]);
2513
2514 const shifted = main_block.addBinOp(l, .shl_exact, cur_uint, field_bit_size_ref).toRef();
2515 const field_as_uint = main_block.addBitCast(l, field_uint_ty, field_val);
2516 const field_extended = main_block.addTyOp(l, .intcast, uint_ty, field_as_uint).toRef();
2517 cur_uint = main_block.addBinOp(l, .bit_or, shifted, field_extended).toRef();
2518 }
2519
2520 const result = main_block.addBitCast(l, agg_ty, cur_uint);
2521 main_block.addBr(l, orig_inst, result);
2522
2523 return .{ .ty_pl = .{
2524 .ty = .fromType(agg_ty),
2525 .payload = try l.addBlockBody(main_block.body()),
2526 } };
2527}
2528
2529/// Given a `std.math.big.int.Const`, converts it to a `Value` which is a float of type `float_ty`
2530/// representing the same numeric value. If the integer cannot be exactly represented, `round`
2531/// decides whether the value should be rounded up or down. If `is_vector`, then `float_ty` is
2532/// instead a vector of floats, and the result value is a vector containing the converted scalar
2533/// repeated N times.
2534fn floatFromBigIntVal(
2535 pt: Zcu.PerThread,
2536 is_vector: bool,
2537 float_ty: Type,
2538 x: std.math.big.int.Const,
2539 round: std.math.big.int.Round,
2540) Error!Value {
2541 const zcu = pt.zcu;
2542 const scalar_ty = switch (is_vector) {
2543 true => float_ty.childType(zcu),
2544 false => float_ty,
2545 };
2546 assert(scalar_ty.zigTypeTag(zcu) == .float);
2547 const scalar_val: Value = switch (scalar_ty.floatBits(zcu.getTarget())) {
2548 16 => try pt.floatValue(scalar_ty, x.toFloat(f16, round)[0]),
2549 32 => try pt.floatValue(scalar_ty, x.toFloat(f32, round)[0]),
2550 64 => try pt.floatValue(scalar_ty, x.toFloat(f64, round)[0]),
2551 80 => try pt.floatValue(scalar_ty, x.toFloat(f80, round)[0]),
2552 128 => try pt.floatValue(scalar_ty, x.toFloat(f128, round)[0]),
2553 else => unreachable,
2554 };
2555 if (is_vector) {
2556 return pt.aggregateSplatValue(float_ty, scalar_val);
2557 } else {
2558 return scalar_val;
2559 }
2560}
2561
2562const Block = struct {
2563 instructions: []Air.Inst.Index,
2564 len: usize,
2565
2566 /// There are two common usages of the API:
2567 /// * `buf.len` is exactly the number of instructions which will be in this block
2568 /// * `buf.len` is no smaller than necessary, and `b.stealRemainingCapacity` will be used
2569 fn init(buf: []Air.Inst.Index) Block {
2570 return .{
2571 .instructions = buf,
2572 .len = 0,
2573 };
2574 }
2575
2576 /// Like `Legalize.addInstAssumeCapacity`, but also appends the instruction to `b`.
2577 fn add(b: *Block, l: *Legalize, inst_data: Air.Inst) Air.Inst.Index {
2578 const inst = l.addInstAssumeCapacity(inst_data);
2579 b.instructions[b.len] = inst;
2580 b.len += 1;
2581 return inst;
2582 }
2583 fn addBr(b: *Block, l: *Legalize, target: Air.Inst.Index, operand: Air.Inst.Ref) void {
2584 _ = b.add(l, .{
2585 .tag = .br,
2586 .data = .{ .br = .{ .block_inst = target, .operand = operand } },
2587 });
2588 }
2589 fn addTy(b: *Block, l: *Legalize, tag: Air.Inst.Tag, ty: Type) Air.Inst.Index {
2590 return b.add(l, .{ .tag = tag, .data = .{ .ty = ty } });
2591 }
2592 fn addBinOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) Air.Inst.Index {
2593 return b.add(l, .{
2594 .tag = tag,
2595 .data = .{ .bin_op = .{ .lhs = lhs, .rhs = rhs } },
2596 });
2597 }
2598 fn addUnOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, operand: Air.Inst.Ref) Air.Inst.Index {
2599 return b.add(l, .{
2600 .tag = tag,
2601 .data = .{ .un_op = operand },
2602 });
2603 }
2604 fn addTyOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, ty: Type, operand: Air.Inst.Ref) Air.Inst.Index {
2605 return b.add(l, .{
2606 .tag = tag,
2607 .data = .{ .ty_op = .{
2608 .ty = .fromType(ty),
2609 .operand = operand,
2610 } },
2611 });
2612 }
2613
2614 fn addCompilerRtCall(b: *Block, l: *Legalize, func: Air.CompilerRtFunc, args: []const Air.Inst.Ref) Error!Air.Inst.Index {
2615 return b.add(l, .{
2616 .tag = .legalize_compiler_rt_call,
2617 .data = .{ .legalize_compiler_rt_call = .{
2618 .func = func,
2619 .payload = payload: {
2620 const extra_len = @typeInfo(Air.Call).@"struct".fields.len + args.len;
2621 try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, extra_len);
2622 const index = l.addExtra(Air.Call, .{ .args_len = @intCast(args.len) }) catch unreachable;
2623 l.air_extra.appendSliceAssumeCapacity(@ptrCast(args));
2624 break :payload index;
2625 },
2626 } },
2627 });
2628 }
2629
2630 /// Adds the code to call the panic handler `panic_id`. This is usually `.call` then `.unreach`,
2631 /// but if `Zcu.Feature.panic_fn` is unsupported, we lower to `.trap` instead.
2632 fn addPanic(b: *Block, l: *Legalize, panic_id: Zcu.SimplePanicId) Error!void {
2633 const zcu = l.pt.zcu;
2634 if (!zcu.backendSupportsFeature(.panic_fn)) {
2635 _ = b.add(l, .{
2636 .tag = .trap,
2637 .data = .{ .no_op = {} },
2638 });
2639 return;
2640 }
2641 const panic_fn_val = zcu.builtin_decl_values.get(panic_id.toBuiltin());
2642 _ = b.add(l, .{
2643 .tag = .call,
2644 .data = .{ .pl_op = .{
2645 .operand = Air.internedToRef(panic_fn_val),
2646 .payload = try l.addExtra(Air.Call, .{ .args_len = 0 }),
2647 } },
2648 });
2649 _ = b.add(l, .{
2650 .tag = .unreach,
2651 .data = .{ .no_op = {} },
2652 });
2653 }
2654
2655 /// Adds a `cmp_*` instruction (including maybe `cmp_vector`) to `b`. This is a fairly thin wrapper
2656 /// around `add`, although it does compute the result type if `is_vector` (`@Vector(n, bool)`).
2657 fn addCmp(
2658 b: *Block,
2659 l: *Legalize,
2660 op: std.math.CompareOperator,
2661 lhs: Air.Inst.Ref,
2662 rhs: Air.Inst.Ref,
2663 opts: struct { optimized: bool = false, vector: bool = false },
2664 ) Error!Air.Inst.Index {
2665 const pt = l.pt;
2666 if (opts.vector) {
2667 const bool_vec_ty = try pt.vectorType(.{
2668 .child = .bool_type,
2669 .len = l.typeOf(lhs).vectorLen(pt.zcu),
2670 });
2671 return b.add(l, .{
2672 .tag = if (opts.optimized) .cmp_vector_optimized else .cmp_vector,
2673 .data = .{ .ty_pl = .{
2674 .ty = Air.internedToRef(bool_vec_ty.toIntern()),
2675 .payload = try l.addExtra(Air.VectorCmp, .{
2676 .lhs = lhs,
2677 .rhs = rhs,
2678 .op = Air.VectorCmp.encodeOp(op),
2679 }),
2680 } },
2681 });
2682 }
2683 return addCmpScalar(b, l, op, lhs, rhs, opts.optimized);
2684 }
2685
2686 /// Similar to `addCmp`, but for scalars only. Unlike `addCmp`, this function is
2687 /// infallible, because it doesn't need to add entries to `extra`.
2688 fn addCmpScalar(
2689 b: *Block,
2690 l: *Legalize,
2691 op: std.math.CompareOperator,
2692 lhs: Air.Inst.Ref,
2693 rhs: Air.Inst.Ref,
2694 optimized: bool,
2695 ) Air.Inst.Index {
2696 return b.add(l, .{
2697 .tag = .fromCmpOp(op, optimized),
2698 .data = .{ .bin_op = .{
2699 .lhs = lhs,
2700 .rhs = rhs,
2701 } },
2702 });
2703 }
2704
2705 /// Adds a `bitcast` instruction to `b`. This is a thin wrapper that omits the instruction for
2706 /// no-op casts.
2707 fn addBitCast(
2708 b: *Block,
2709 l: *Legalize,
2710 ty: Type,
2711 operand: Air.Inst.Ref,
2712 ) Air.Inst.Ref {
2713 if (ty.toIntern() != l.typeOf(operand).toIntern()) return b.add(l, .{
2714 .tag = .bitcast,
2715 .data = .{ .ty_op = .{
2716 .ty = Air.internedToRef(ty.toIntern()),
2717 .operand = operand,
2718 } },
2719 }).toRef();
2720 _ = b.stealCapacity(1);
2721 return operand;
2722 }
2723
2724 /// This function emits *two* instructions.
2725 fn addSoftFloatCmp(
2726 b: *Block,
2727 l: *Legalize,
2728 float_ty: Type,
2729 op: std.math.CompareOperator,
2730 lhs: Air.Inst.Ref,
2731 rhs: Air.Inst.Ref,
2732 ) Error!Air.Inst.Ref {
2733 const pt = l.pt;
2734 const target = pt.zcu.getTarget();
2735 const use_aeabi = target.cpu.arch.isArm() and switch (target.abi) {
2736 .eabi,
2737 .eabihf,
2738 .musleabi,
2739 .musleabihf,
2740 .gnueabi,
2741 .gnueabihf,
2742 .android,
2743 .androideabi,
2744 => true,
2745 else => false,
2746 };
2747 const func: Air.CompilerRtFunc, const ret_cmp_op: std.math.CompareOperator = switch (float_ty.floatBits(target)) {
2748 // zig fmt: off
2749 16 => switch (op) {
2750 .eq => .{ .__eqhf2, .eq },
2751 .neq => .{ .__nehf2, .neq },
2752 .lt => .{ .__lthf2, .lt },
2753 .lte => .{ .__lehf2, .lte },
2754 .gt => .{ .__gthf2, .gt },
2755 .gte => .{ .__gehf2, .gte },
2756 },
2757 32 => switch (op) {
2758 .eq => if (use_aeabi) .{ .__aeabi_fcmpeq, .neq } else .{ .__eqsf2, .eq },
2759 .neq => if (use_aeabi) .{ .__aeabi_fcmpeq, .eq } else .{ .__nesf2, .neq },
2760 .lt => if (use_aeabi) .{ .__aeabi_fcmplt, .neq } else .{ .__ltsf2, .lt },
2761 .lte => if (use_aeabi) .{ .__aeabi_fcmple, .neq } else .{ .__lesf2, .lte },
2762 .gt => if (use_aeabi) .{ .__aeabi_fcmpgt, .neq } else .{ .__gtsf2, .gt },
2763 .gte => if (use_aeabi) .{ .__aeabi_fcmpge, .neq } else .{ .__gesf2, .gte },
2764 },
2765 64 => switch (op) {
2766 .eq => if (use_aeabi) .{ .__aeabi_dcmpeq, .neq } else .{ .__eqdf2, .eq },
2767 .neq => if (use_aeabi) .{ .__aeabi_dcmpeq, .eq } else .{ .__nedf2, .neq },
2768 .lt => if (use_aeabi) .{ .__aeabi_dcmplt, .neq } else .{ .__ltdf2, .lt },
2769 .lte => if (use_aeabi) .{ .__aeabi_dcmple, .neq } else .{ .__ledf2, .lte },
2770 .gt => if (use_aeabi) .{ .__aeabi_dcmpgt, .neq } else .{ .__gtdf2, .gt },
2771 .gte => if (use_aeabi) .{ .__aeabi_dcmpge, .neq } else .{ .__gedf2, .gte },
2772 },
2773 80 => switch (op) {
2774 .eq => .{ .__eqxf2, .eq },
2775 .neq => .{ .__nexf2, .neq },
2776 .lt => .{ .__ltxf2, .lt },
2777 .lte => .{ .__lexf2, .lte },
2778 .gt => .{ .__gtxf2, .gt },
2779 .gte => .{ .__gexf2, .gte },
2780 },
2781 128 => switch (op) {
2782 .eq => .{ .__eqtf2, .eq },
2783 .neq => .{ .__netf2, .neq },
2784 .lt => .{ .__lttf2, .lt },
2785 .lte => .{ .__letf2, .lte },
2786 .gt => .{ .__gttf2, .gt },
2787 .gte => .{ .__getf2, .gte },
2788 },
2789 else => unreachable,
2790 // zig fmt: on
2791 };
2792 const call_inst = try b.addCompilerRtCall(l, func, &.{ lhs, rhs });
2793 const raw_result = call_inst.toRef();
2794 assert(l.typeOf(raw_result).toIntern() == .i32_type);
2795 const zero_i32: Air.Inst.Ref = .fromValue(try pt.intValue(.i32, 0));
2796 const ret_cmp_tag: Air.Inst.Tag = .fromCmpOp(ret_cmp_op, false);
2797 return b.addBinOp(l, ret_cmp_tag, raw_result, zero_i32).toRef();
2798 }
2799
2800 /// Returns the unused capacity of `b.instructions`, and shrinks `b.instructions` down to `b.len`.
2801 /// This is useful when you've provided a buffer big enough for all your instructions, but you are
2802 /// now starting a new block and some of them need to live there instead.
2803 fn stealRemainingCapacity(b: *Block) []Air.Inst.Index {
2804 return b.stealFrom(b.len);
2805 }
2806
2807 /// Returns `len` elements taken from the unused capacity of `b.instructions`, and shrinks
2808 /// `b.instructions` down to not include them anymore.
2809 /// This is useful when you've provided a buffer big enough for all your instructions, but you are
2810 /// now starting a new block and some of them need to live there instead.
2811 fn stealCapacity(b: *Block, len: usize) []Air.Inst.Index {
2812 return b.stealFrom(b.instructions.len - len);
2813 }
2814
2815 fn stealFrom(b: *Block, start: usize) []Air.Inst.Index {
2816 assert(start >= b.len);
2817 defer b.instructions.len = start;
2818 return b.instructions[start..];
2819 }
2820
2821 fn body(b: *const Block) []const Air.Inst.Index {
2822 assert(b.len == b.instructions.len);
2823 return b.instructions;
2824 }
2825};
2826
2827const Loop = struct {
2828 inst: Air.Inst.Index,
2829 block: Block,
2830
2831 /// The return value has `block` initialized to `undefined`; it is the caller's reponsibility
2832 /// to initialize it.
2833 fn init(l: *Legalize, parent_block: *Block) Loop {
2834 return .{
2835 .inst = parent_block.add(l, .{
2836 .tag = .loop,
2837 .data = .{ .ty_pl = .{
2838 .ty = .noreturn_type,
2839 .payload = undefined,
2840 } },
2841 }),
2842 .block = undefined,
2843 };
2844 }
2845
2846 fn finish(loop: Loop, l: *Legalize) Error!void {
2847 const data = &l.air_instructions.items(.data)[@intFromEnum(loop.inst)];
2848 data.ty_pl.payload = try l.addBlockBody(loop.block.body());
2849 }
2850};
2851
2852const CondBr = struct {
2853 inst: Air.Inst.Index,
2854 hints: Air.CondBr.BranchHints,
2855 then_block: Block,
2856 else_block: Block,
2857
2858 /// The return value has `then_block` and `else_block` initialized to `undefined`; it is the
2859 /// caller's reponsibility to initialize them.
2860 fn init(l: *Legalize, operand: Air.Inst.Ref, parent_block: *Block, hints: Air.CondBr.BranchHints) CondBr {
2861 return .{
2862 .inst = parent_block.add(l, .{
2863 .tag = .cond_br,
2864 .data = .{ .pl_op = .{
2865 .operand = operand,
2866 .payload = undefined,
2867 } },
2868 }),
2869 .hints = hints,
2870 .then_block = undefined,
2871 .else_block = undefined,
2872 };
2873 }
2874
2875 fn finish(cond_br: CondBr, l: *Legalize) Error!void {
2876 const then_body = cond_br.then_block.body();
2877 const else_body = cond_br.else_block.body();
2878 try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, 3 + then_body.len + else_body.len);
2879
2880 const data = &l.air_instructions.items(.data)[@intFromEnum(cond_br.inst)];
2881 data.pl_op.payload = @intCast(l.air_extra.items.len);
2882 l.air_extra.appendSliceAssumeCapacity(&.{
2883 @intCast(then_body.len),
2884 @intCast(else_body.len),
2885 @bitCast(cond_br.hints),
2886 });
2887 l.air_extra.appendSliceAssumeCapacity(@ptrCast(then_body));
2888 l.air_extra.appendSliceAssumeCapacity(@ptrCast(else_body));
2889 }
2890};
2891
2892fn addInstAssumeCapacity(l: *Legalize, inst: Air.Inst) Air.Inst.Index {
2893 defer l.air_instructions.appendAssumeCapacity(inst);
2894 return @enumFromInt(l.air_instructions.len);
2895}
2896
2897fn addExtra(l: *Legalize, comptime Extra: type, extra: Extra) Error!u32 {
2898 const extra_fields = @typeInfo(Extra).@"struct".fields;
2899 try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, extra_fields.len);
2900 defer inline for (extra_fields) |field| l.air_extra.appendAssumeCapacity(switch (field.type) {
2901 u32 => @field(extra, field.name),
2902 Air.Inst.Ref => @intFromEnum(@field(extra, field.name)),
2903 else => @compileError(@typeName(field.type)),
2904 });
2905 return @intCast(l.air_extra.items.len);
2906}
2907
2908fn addBlockBody(l: *Legalize, body: []const Air.Inst.Index) Error!u32 {
2909 try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, 1 + body.len);
2910 defer {
2911 l.air_extra.appendAssumeCapacity(@intCast(body.len));
2912 l.air_extra.appendSliceAssumeCapacity(@ptrCast(body));
2913 }
2914 return @intCast(l.air_extra.items.len);
2915}
2916
2917/// Returns `tag` to remind the caller to `continue :inst` the result.
2918/// `inline` to propagate the comptime-known `tag` result.
2919inline fn replaceInst(l: *Legalize, inst: Air.Inst.Index, comptime tag: Air.Inst.Tag, data: Air.Inst.Data) Air.Inst.Tag {
2920 const orig_ty = if (std.debug.runtime_safety) l.typeOfIndex(inst) else {};
2921 l.air_instructions.set(@intFromEnum(inst), .{ .tag = tag, .data = data });
2922 if (std.debug.runtime_safety) assert(l.typeOfIndex(inst).toIntern() == orig_ty.toIntern());
2923 return tag;
2924}
2925
2926fn compilerRtCall(
2927 l: *Legalize,
2928 orig_inst: Air.Inst.Index,
2929 func: Air.CompilerRtFunc,
2930 args: []const Air.Inst.Ref,
2931 result_ty: Type,
2932) Error!Air.Inst.Tag {
2933 const zcu = l.pt.zcu;
2934 const gpa = zcu.gpa;
2935
2936 const func_ret_ty = func.returnType();
2937
2938 if (func_ret_ty.toIntern() == result_ty.toIntern()) {
2939 try l.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.Call).@"struct".fields.len + args.len);
2940 const payload = l.addExtra(Air.Call, .{ .args_len = @intCast(args.len) }) catch unreachable;
2941 l.air_extra.appendSliceAssumeCapacity(@ptrCast(args));
2942 return l.replaceInst(orig_inst, .legalize_compiler_rt_call, .{ .legalize_compiler_rt_call = .{
2943 .func = func,
2944 .payload = payload,
2945 } });
2946 }
2947
2948 // We need to bitcast the result to an "alias" type (e.g. c_int/i32, c_longdouble/f128).
2949
2950 assert(func_ret_ty.bitSize(zcu) == result_ty.bitSize(zcu));
2951
2952 var inst_buf: [3]Air.Inst.Index = undefined;
2953 var main_block: Block = .init(&inst_buf);
2954 try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
2955
2956 const call_inst = try main_block.addCompilerRtCall(l, func, args);
2957 const casted_result = main_block.addBitCast(l, result_ty, call_inst.toRef());
2958 main_block.addBr(l, orig_inst, casted_result);
2959
2960 return l.replaceInst(orig_inst, .block, .{ .ty_pl = .{
2961 .ty = .fromType(result_ty),
2962 .payload = try l.addBlockBody(main_block.body()),
2963 } });
2964}
2965
2966fn softFptruncFunc(l: *const Legalize, src_ty: Type, dst_ty: Type) Air.CompilerRtFunc {
2967 const target = l.pt.zcu.getTarget();
2968 const src_bits = src_ty.floatBits(target);
2969 const dst_bits = dst_ty.floatBits(target);
2970 assert(dst_bits < src_bits);
2971 const to_f16_func: Air.CompilerRtFunc = switch (src_bits) {
2972 128 => .__trunctfhf2,
2973 80 => .__truncxfhf2,
2974 64 => .__truncdfhf2,
2975 32 => .__truncsfhf2,
2976 else => unreachable,
2977 };
2978 const offset: u8 = switch (dst_bits) {
2979 16 => 0,
2980 32 => 1,
2981 64 => 2,
2982 80 => 3,
2983 else => unreachable,
2984 };
2985 return @enumFromInt(@intFromEnum(to_f16_func) + offset);
2986}
2987fn softFpextFunc(l: *const Legalize, src_ty: Type, dst_ty: Type) Air.CompilerRtFunc {
2988 const target = l.pt.zcu.getTarget();
2989 const src_bits = src_ty.floatBits(target);
2990 const dst_bits = dst_ty.floatBits(target);
2991 assert(dst_bits > src_bits);
2992 const to_f128_func: Air.CompilerRtFunc = switch (src_bits) {
2993 16 => .__extendhftf2,
2994 32 => .__extendsftf2,
2995 64 => .__extenddftf2,
2996 80 => .__extendxftf2,
2997 else => unreachable,
2998 };
2999 const offset: u8 = switch (dst_bits) {
3000 128 => 0,
3001 80 => 1,
3002 64 => 2,
3003 32 => 3,
3004 else => unreachable,
3005 };
3006 return @enumFromInt(@intFromEnum(to_f128_func) + offset);
3007}
3008fn softFloatFromInt(l: *Legalize, orig_inst: Air.Inst.Index) Error!union(enum) {
3009 call: Air.CompilerRtFunc,
3010 block_payload: Air.Inst.Data,
3011} {
3012 const pt = l.pt;
3013 const zcu = pt.zcu;
3014 const target = zcu.getTarget();
3015
3016 const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
3017 const dest_ty = ty_op.ty.toType();
3018 const src_ty = l.typeOf(ty_op.operand);
3019
3020 const src_info = src_ty.intInfo(zcu);
3021 const float_off: u32 = switch (dest_ty.floatBits(target)) {
3022 16 => 0,
3023 32 => 1,
3024 64 => 2,
3025 80 => 3,
3026 128 => 4,
3027 else => unreachable,
3028 };
3029 const base: Air.CompilerRtFunc = switch (src_info.signedness) {
3030 .signed => .__floatsihf,
3031 .unsigned => .__floatunsihf,
3032 };
3033 fixed: {
3034 const extended_int_bits: u16, const int_bits_off: u32 = switch (src_info.bits) {
3035 0...32 => .{ 32, 0 },
3036 33...64 => .{ 64, 5 },
3037 65...128 => .{ 128, 10 },
3038 else => break :fixed,
3039 };
3040 // x86_64-windows uses an odd callconv for 128-bit integers, so we use the
3041 // arbitrary-precision routine in that case for simplicity.
3042 if (target.cpu.arch == .x86_64 and target.os.tag == .windows and extended_int_bits == 128) {
3043 break :fixed;
3044 }
3045
3046 const func: Air.CompilerRtFunc = @enumFromInt(@intFromEnum(base) + int_bits_off + float_off);
3047 if (extended_int_bits == src_info.bits) return .{ .call = func };
3048
3049 // We need to emit a block which first sign/zero-extends to the right type and *then* calls
3050 // the required routine.
3051 const extended_ty = try l.pt.intType(src_info.signedness, extended_int_bits);
3052
3053 var inst_buf: [4]Air.Inst.Index = undefined;
3054 var main_block: Block = .init(&inst_buf);
3055 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
3056
3057 const extended_val = main_block.addTyOp(l, .intcast, extended_ty, ty_op.operand).toRef();
3058 const call_inst = try main_block.addCompilerRtCall(l, func, &.{extended_val});
3059 const casted_result = main_block.addBitCast(l, dest_ty, call_inst.toRef());
3060 main_block.addBr(l, orig_inst, casted_result);
3061
3062 return .{ .block_payload = .{ .ty_pl = .{
3063 .ty = .fromType(dest_ty),
3064 .payload = try l.addBlockBody(main_block.body()),
3065 } } };
3066 }
3067
3068 // We need to emit a block which puts the integer into an `alloc` (possibly sign/zero-extended)
3069 // and calls an arbitrary-width conversion routine.
3070
3071 const func: Air.CompilerRtFunc = @enumFromInt(@intFromEnum(base) + 15 + float_off);
3072
3073 // The extended integer routines expect the integer representation where the integer is
3074 // effectively zero- or sign-extended to its ABI size. We represent that by intcasting to
3075 // such an integer type and passing a pointer to *that*.
3076 const extended_ty = try pt.intType(src_info.signedness, @intCast(src_ty.abiSize(zcu) * 8));
3077 assert(extended_ty.abiSize(zcu) == src_ty.abiSize(zcu));
3078
3079 var inst_buf: [6]Air.Inst.Index = undefined;
3080 var main_block: Block = .init(&inst_buf);
3081 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
3082
3083 const extended_val: Air.Inst.Ref = if (extended_ty.toIntern() != src_ty.toIntern()) ext: {
3084 break :ext main_block.addTyOp(l, .intcast, extended_ty, ty_op.operand).toRef();
3085 } else ext: {
3086 _ = main_block.stealCapacity(1);
3087 break :ext ty_op.operand;
3088 };
3089 const extended_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(extended_ty)).toRef();
3090 _ = main_block.addBinOp(l, .store, extended_ptr, extended_val);
3091 const bits_val = try pt.intValue(.usize, src_info.bits);
3092 const call_inst = try main_block.addCompilerRtCall(l, func, &.{ extended_ptr, .fromValue(bits_val) });
3093 const casted_result = main_block.addBitCast(l, dest_ty, call_inst.toRef());
3094 main_block.addBr(l, orig_inst, casted_result);
3095
3096 return .{ .block_payload = .{ .ty_pl = .{
3097 .ty = .fromType(dest_ty),
3098 .payload = try l.addBlockBody(main_block.body()),
3099 } } };
3100}
3101fn softIntFromFloat(l: *Legalize, orig_inst: Air.Inst.Index) Error!union(enum) {
3102 call: Air.CompilerRtFunc,
3103 block_payload: Air.Inst.Data,
3104} {
3105 const pt = l.pt;
3106 const zcu = pt.zcu;
3107 const target = zcu.getTarget();
3108
3109 const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
3110 const src_ty = l.typeOf(ty_op.operand);
3111 const dest_ty = ty_op.ty.toType();
3112
3113 const dest_info = dest_ty.intInfo(zcu);
3114 const float_off: u32 = switch (src_ty.floatBits(target)) {
3115 16 => 0,
3116 32 => 1,
3117 64 => 2,
3118 80 => 3,
3119 128 => 4,
3120 else => unreachable,
3121 };
3122 const base: Air.CompilerRtFunc = switch (dest_info.signedness) {
3123 .signed => .__fixhfsi,
3124 .unsigned => .__fixunshfsi,
3125 };
3126 fixed: {
3127 const extended_int_bits: u16, const int_bits_off: u32 = switch (dest_info.bits) {
3128 0...32 => .{ 32, 0 },
3129 33...64 => .{ 64, 5 },
3130 65...128 => .{ 128, 10 },
3131 else => break :fixed,
3132 };
3133 // x86_64-windows uses an odd callconv for 128-bit integers, so we use the
3134 // arbitrary-precision routine in that case for simplicity.
3135 if (target.cpu.arch == .x86_64 and target.os.tag == .windows and extended_int_bits == 128) {
3136 break :fixed;
3137 }
3138
3139 const func: Air.CompilerRtFunc = @enumFromInt(@intFromEnum(base) + int_bits_off + float_off);
3140 if (extended_int_bits == dest_info.bits) return .{ .call = func };
3141
3142 // We need to emit a block which calls the routine and then casts to the required type.
3143
3144 var inst_buf: [3]Air.Inst.Index = undefined;
3145 var main_block: Block = .init(&inst_buf);
3146 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
3147
3148 const call_inst = try main_block.addCompilerRtCall(l, func, &.{ty_op.operand});
3149 const casted_val = main_block.addTyOp(l, .intcast, dest_ty, call_inst.toRef()).toRef();
3150 main_block.addBr(l, orig_inst, casted_val);
3151
3152 return .{ .block_payload = .{ .ty_pl = .{
3153 .ty = .fromType(dest_ty),
3154 .payload = try l.addBlockBody(main_block.body()),
3155 } } };
3156 }
3157
3158 // We need to emit a block which calls an arbitrary-width conversion routine, then loads the
3159 // integer from an `alloc` and possibly truncates it.
3160 const func: Air.CompilerRtFunc = @enumFromInt(@intFromEnum(base) + 15 + float_off);
3161
3162 const extended_ty = try pt.intType(dest_info.signedness, @intCast(dest_ty.abiSize(zcu) * 8));
3163 assert(extended_ty.abiSize(zcu) == dest_ty.abiSize(zcu));
3164
3165 var inst_buf: [5]Air.Inst.Index = undefined;
3166 var main_block: Block = .init(&inst_buf);
3167 try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
3168
3169 const extended_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(extended_ty)).toRef();
3170 const bits_val = try pt.intValue(.usize, dest_info.bits);
3171 _ = try main_block.addCompilerRtCall(l, func, &.{ extended_ptr, .fromValue(bits_val), ty_op.operand });
3172 const extended_val = main_block.addTyOp(l, .load, extended_ty, extended_ptr).toRef();
3173 const result_val = main_block.addTyOp(l, .intcast, dest_ty, extended_val).toRef();
3174 main_block.addBr(l, orig_inst, result_val);
3175
3176 return .{ .block_payload = .{ .ty_pl = .{
3177 .ty = .fromType(dest_ty),
3178 .payload = try l.addBlockBody(main_block.body()),
3179 } } };
3180}
3181fn softFloatFunc(op: Air.Inst.Tag, float_ty: Type, zcu: *const Zcu) Air.CompilerRtFunc {
3182 const f16_func: Air.CompilerRtFunc = switch (op) {
3183 .add, .add_optimized => .__addhf3,
3184 .sub, .sub_optimized => .__subhf3,
3185 .mul, .mul_optimized => .__mulhf3,
3186
3187 .div_float,
3188 .div_float_optimized,
3189 .div_exact,
3190 .div_exact_optimized,
3191 => .__divhf3,
3192
3193 .min => .__fminh,
3194 .max => .__fmaxh,
3195
3196 .ceil => .__ceilh,
3197 .floor => .__floorh,
3198 .trunc_float => .__trunch,
3199 .round => .__roundh,
3200
3201 .log => .__logh,
3202 .log2 => .__log2h,
3203 .log10 => .__log10h,
3204
3205 .exp => .__exph,
3206 .exp2 => .__exp2h,
3207
3208 .sin => .__sinh,
3209 .cos => .__cosh,
3210 .tan => .__tanh,
3211
3212 .abs => .__fabsh,
3213 .sqrt => .__sqrth,
3214 .rem, .rem_optimized => .__fmodh,
3215 .mul_add => .__fmah,
3216
3217 else => unreachable,
3218 };
3219 const offset: u8 = switch (float_ty.floatBits(zcu.getTarget())) {
3220 16 => 0,
3221 32 => 1,
3222 64 => 2,
3223 80 => 3,
3224 128 => 4,
3225 else => unreachable,
3226 };
3227 return @enumFromInt(@intFromEnum(f16_func) + offset);
3228}
3229
3230fn softFloatNegBlockPayload(
3231 l: *Legalize,
3232 orig_inst: Air.Inst.Index,
3233 operand: Air.Inst.Ref,
3234) Error!Air.Inst.Data {
3235 const pt = l.pt;
3236 const zcu = pt.zcu;
3237 const gpa = zcu.gpa;
3238
3239 const float_ty = l.typeOfIndex(orig_inst);
3240
3241 const int_ty: Type, const sign_bit: Value = switch (float_ty.floatBits(zcu.getTarget())) {
3242 16 => .{ .u16, try pt.intValue(.u16, @as(u16, 1) << 15) },
3243 32 => .{ .u32, try pt.intValue(.u32, @as(u32, 1) << 31) },
3244 64 => .{ .u64, try pt.intValue(.u64, @as(u64, 1) << 63) },
3245 80 => .{ .u80, try pt.intValue(.u80, @as(u80, 1) << 79) },
3246 128 => .{ .u128, try pt.intValue(.u128, @as(u128, 1) << 127) },
3247 else => unreachable,
3248 };
3249
3250 const sign_bit_ref: Air.Inst.Ref = .fromValue(sign_bit);
3251
3252 var inst_buf: [4]Air.Inst.Index = undefined;
3253 var main_block: Block = .init(&inst_buf);
3254 try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
3255
3256 const operand_as_int = main_block.addBitCast(l, int_ty, operand);
3257 const result_as_int = main_block.addBinOp(l, .xor, operand_as_int, sign_bit_ref).toRef();
3258 const result = main_block.addBitCast(l, float_ty, result_as_int);
3259 main_block.addBr(l, orig_inst, result);
3260
3261 return .{ .ty_pl = .{
3262 .ty = .fromType(float_ty),
3263 .payload = try l.addBlockBody(main_block.body()),
3264 } };
3265}
3266
3267fn softFloatDivTruncFloorBlockPayload(
3268 l: *Legalize,
3269 orig_inst: Air.Inst.Index,
3270 lhs: Air.Inst.Ref,
3271 rhs: Air.Inst.Ref,
3272 air_tag: Air.Inst.Tag,
3273) Error!Air.Inst.Data {
3274 const zcu = l.pt.zcu;
3275 const gpa = zcu.gpa;
3276
3277 const float_ty = l.typeOfIndex(orig_inst);
3278
3279 const floor_tag: Air.Inst.Tag = switch (air_tag) {
3280 .div_trunc, .div_trunc_optimized => .trunc_float,
3281 .div_floor, .div_floor_optimized => .floor,
3282 else => unreachable,
3283 };
3284
3285 var inst_buf: [4]Air.Inst.Index = undefined;
3286 var main_block: Block = .init(&inst_buf);
3287 try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
3288
3289 const div_inst = try main_block.addCompilerRtCall(l, softFloatFunc(.div_float, float_ty, zcu), &.{ lhs, rhs });
3290 const floor_inst = try main_block.addCompilerRtCall(l, softFloatFunc(floor_tag, float_ty, zcu), &.{div_inst.toRef()});
3291 const casted_result = main_block.addBitCast(l, float_ty, floor_inst.toRef());
3292 main_block.addBr(l, orig_inst, casted_result);
3293
3294 return .{ .ty_pl = .{
3295 .ty = .fromType(float_ty),
3296 .payload = try l.addBlockBody(main_block.body()),
3297 } };
3298}
3299fn softFloatModBlockPayload(
3300 l: *Legalize,
3301 orig_inst: Air.Inst.Index,
3302 lhs: Air.Inst.Ref,
3303 rhs: Air.Inst.Ref,
3304) Error!Air.Inst.Data {
3305 const pt = l.pt;
3306 const zcu = pt.zcu;
3307 const gpa = zcu.gpa;
3308
3309 const float_ty = l.typeOfIndex(orig_inst);
3310
3311 var inst_buf: [10]Air.Inst.Index = undefined;
3312 var main_block: Block = .init(&inst_buf);
3313 try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
3314
3315 const rem = try main_block.addCompilerRtCall(l, softFloatFunc(.rem, float_ty, zcu), &.{ lhs, rhs });
3316 const lhs_lt_zero = try main_block.addSoftFloatCmp(l, float_ty, .lt, lhs, .fromValue(try pt.floatValue(float_ty, 0.0)));
3317
3318 var condbr: CondBr = .init(l, lhs_lt_zero, &main_block, .{});
3319 condbr.then_block = .init(main_block.stealRemainingCapacity());
3320 {
3321 const add = try condbr.then_block.addCompilerRtCall(l, softFloatFunc(.add, float_ty, zcu), &.{ rem.toRef(), rhs });
3322 const inner_rem = try condbr.then_block.addCompilerRtCall(l, softFloatFunc(.rem, float_ty, zcu), &.{ add.toRef(), rhs });
3323 const casted_result = condbr.then_block.addBitCast(l, float_ty, inner_rem.toRef());
3324 condbr.then_block.addBr(l, orig_inst, casted_result);
3325 }
3326 condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
3327 {
3328 const casted_result = condbr.else_block.addBitCast(l, float_ty, rem.toRef());
3329 condbr.else_block.addBr(l, orig_inst, casted_result);
3330 }
3331
3332 try condbr.finish(l);
3333
3334 return .{ .ty_pl = .{
3335 .ty = .fromType(float_ty),
3336 .payload = try l.addBlockBody(main_block.body()),
3337 } };
3338}
3339fn softFloatCmpBlockPayload(
3340 l: *Legalize,
3341 orig_inst: Air.Inst.Index,
3342 float_ty: Type,
3343 op: std.math.CompareOperator,
3344 lhs: Air.Inst.Ref,
3345 rhs: Air.Inst.Ref,
3346) Error!Air.Inst.Data {
3347 const pt = l.pt;
3348 const gpa = pt.zcu.gpa;
3349
3350 var inst_buf: [3]Air.Inst.Index = undefined;
3351 var main_block: Block = .init(&inst_buf);
3352 try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
3353
3354 const result = try main_block.addSoftFloatCmp(l, float_ty, op, lhs, rhs);
3355 main_block.addBr(l, orig_inst, result);
3356
3357 return .{ .ty_pl = .{
3358 .ty = .bool_type,
3359 .payload = try l.addBlockBody(main_block.body()),
3360 } };
3361}
3362
3363/// `inline` to propagate potentially comptime-known return value.
3364inline fn wantScalarizeOrSoftFloat(
3365 l: *const Legalize,
3366 comptime air_tag: Air.Inst.Tag,
3367 ty: Type,
3368) enum {
3369 none,
3370 scalarize,
3371 soft_float,
3372} {
3373 const zcu = l.pt.zcu;
3374 const is_vec, const scalar_ty = switch (ty.zigTypeTag(zcu)) {
3375 .vector => .{ true, ty.childType(zcu) },
3376 else => .{ false, ty },
3377 };
3378
3379 if (is_vec and l.features.has(.scalarize(air_tag))) return .scalarize;
3380
3381 if (l.wantSoftFloatScalar(scalar_ty)) {
3382 return if (is_vec) .scalarize else .soft_float;
3383 }
3384 return .none;
3385}
3386
3387/// `inline` to propagate potentially comptime-known return value.
3388inline fn wantSoftFloatScalar(l: *const Legalize, ty: Type) bool {
3389 const zcu = l.pt.zcu;
3390 return switch (ty.zigTypeTag(zcu)) {
3391 .vector => unreachable,
3392 .float => switch (ty.floatBits(zcu.getTarget())) {
3393 16 => l.features.has(.soft_f16),
3394 32 => l.features.has(.soft_f32),
3395 64 => l.features.has(.soft_f64),
3396 80 => l.features.has(.soft_f80),
3397 128 => l.features.has(.soft_f128),
3398 else => unreachable,
3399 },
3400 else => false,
3401 };
3402}
3403
3404const Air = @import("../Air.zig");
3405const assert = std.debug.assert;
3406const dev = @import("../dev.zig");
3407const InternPool = @import("../InternPool.zig");
3408const Legalize = @This();
3409const std = @import("std");
3410const Type = @import("../Type.zig");
3411const Value = @import("../Value.zig");
3412const Zcu = @import("../Zcu.zig");