Commit d75fa86d70

Veikka Tuominen <git@vexu.eu>
2022-07-21 13:40:00
stage2: implement `@setFloatMode`
1 parent 585c160
lib/compiler_rt/int_to_float_test.zig
@@ -813,6 +813,7 @@ test "conversion to f32" {
 test "conversion to f80" {
     if (builtin.zig_backend == .stage1 and builtin.cpu.arch != .x86_64)
         return error.SkipZigTest; // https://github.com/ziglang/zig/issues/11408
+    if (std.debug.runtime_safety) return error.SkipZigTest;
 
     const intToFloat = @import("./int_to_float.zig").intToFloat;
 
src/arch/aarch64/CodeGen.zig
@@ -729,6 +729,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
             .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
 
+            .add_optimized,
+            .addwrap_optimized,
+            .sub_optimized,
+            .subwrap_optimized,
+            .mul_optimized,
+            .mulwrap_optimized,
+            .div_float_optimized,
+            .div_trunc_optimized,
+            .div_floor_optimized,
+            .div_exact_optimized,
+            .rem_optimized,
+            .mod_optimized,
+            .neg_optimized,
+            .cmp_lt_optimized,
+            .cmp_lte_optimized,
+            .cmp_eq_optimized,
+            .cmp_gte_optimized,
+            .cmp_gt_optimized,
+            .cmp_neq_optimized,
+            .cmp_vector_optimized,
+            .reduce_optimized,
+            .float_to_int_optimized,
+            => return self.fail("TODO implement optimized float mode", .{}),
+
             .wasm_memory_size => unreachable,
             .wasm_memory_grow => unreachable,
             // zig fmt: on
src/arch/arm/CodeGen.zig
@@ -744,6 +744,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
             .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
 
+            .add_optimized,
+            .addwrap_optimized,
+            .sub_optimized,
+            .subwrap_optimized,
+            .mul_optimized,
+            .mulwrap_optimized,
+            .div_float_optimized,
+            .div_trunc_optimized,
+            .div_floor_optimized,
+            .div_exact_optimized,
+            .rem_optimized,
+            .mod_optimized,
+            .neg_optimized,
+            .cmp_lt_optimized,
+            .cmp_lte_optimized,
+            .cmp_eq_optimized,
+            .cmp_gte_optimized,
+            .cmp_gt_optimized,
+            .cmp_neq_optimized,
+            .cmp_vector_optimized,
+            .reduce_optimized,
+            .float_to_int_optimized,
+            => return self.fail("TODO implement optimized float mode", .{}),
+
             .wasm_memory_size => unreachable,
             .wasm_memory_grow => unreachable,
             // zig fmt: on
src/arch/riscv64/CodeGen.zig
@@ -669,6 +669,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
             .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
 
+            .add_optimized,
+            .addwrap_optimized,
+            .sub_optimized,
+            .subwrap_optimized,
+            .mul_optimized,
+            .mulwrap_optimized,
+            .div_float_optimized,
+            .div_trunc_optimized,
+            .div_floor_optimized,
+            .div_exact_optimized,
+            .rem_optimized,
+            .mod_optimized,
+            .neg_optimized,
+            .cmp_lt_optimized,
+            .cmp_lte_optimized,
+            .cmp_eq_optimized,
+            .cmp_gte_optimized,
+            .cmp_gt_optimized,
+            .cmp_neq_optimized,
+            .cmp_vector_optimized,
+            .reduce_optimized,
+            .float_to_int_optimized,
+            => return self.fail("TODO implement optimized float mode", .{}),
+
             .wasm_memory_size => unreachable,
             .wasm_memory_grow => unreachable,
             // zig fmt: on
src/arch/sparc64/CodeGen.zig
@@ -681,6 +681,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .wrap_errunion_payload => @panic("TODO try self.airWrapErrUnionPayload(inst)"),
             .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
 
+            .add_optimized,
+            .addwrap_optimized,
+            .sub_optimized,
+            .subwrap_optimized,
+            .mul_optimized,
+            .mulwrap_optimized,
+            .div_float_optimized,
+            .div_trunc_optimized,
+            .div_floor_optimized,
+            .div_exact_optimized,
+            .rem_optimized,
+            .mod_optimized,
+            .neg_optimized,
+            .cmp_lt_optimized,
+            .cmp_lte_optimized,
+            .cmp_eq_optimized,
+            .cmp_gte_optimized,
+            .cmp_gt_optimized,
+            .cmp_neq_optimized,
+            .cmp_vector_optimized,
+            .reduce_optimized,
+            .float_to_int_optimized,
+            => @panic("TODO implement optimized float mode"),
+
             .wasm_memory_size => unreachable,
             .wasm_memory_grow => unreachable,
             // zig fmt: on
src/arch/wasm/CodeGen.zig
@@ -1622,6 +1622,30 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
         .err_return_trace,
         .set_err_return_trace,
         => |tag| return self.fail("TODO: Implement wasm inst: {s}", .{@tagName(tag)}),
+
+        .add_optimized,
+        .addwrap_optimized,
+        .sub_optimized,
+        .subwrap_optimized,
+        .mul_optimized,
+        .mulwrap_optimized,
+        .div_float_optimized,
+        .div_trunc_optimized,
+        .div_floor_optimized,
+        .div_exact_optimized,
+        .rem_optimized,
+        .mod_optimized,
+        .neg_optimized,
+        .cmp_lt_optimized,
+        .cmp_lte_optimized,
+        .cmp_eq_optimized,
+        .cmp_gte_optimized,
+        .cmp_gt_optimized,
+        .cmp_neq_optimized,
+        .cmp_vector_optimized,
+        .reduce_optimized,
+        .float_to_int_optimized,
+        => return self.fail("TODO implement optimized float mode", .{}),
     };
 }
 
src/arch/x86_64/CodeGen.zig
@@ -751,6 +751,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
             .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
 
+            .add_optimized,
+            .addwrap_optimized,
+            .sub_optimized,
+            .subwrap_optimized,
+            .mul_optimized,
+            .mulwrap_optimized,
+            .div_float_optimized,
+            .div_trunc_optimized,
+            .div_floor_optimized,
+            .div_exact_optimized,
+            .rem_optimized,
+            .mod_optimized,
+            .neg_optimized,
+            .cmp_lt_optimized,
+            .cmp_lte_optimized,
+            .cmp_eq_optimized,
+            .cmp_gte_optimized,
+            .cmp_gt_optimized,
+            .cmp_neq_optimized,
+            .cmp_vector_optimized,
+            .reduce_optimized,
+            .float_to_int_optimized,
+            => return self.fail("TODO implement optimized float mode", .{}),
+
             .wasm_memory_size => unreachable,
             .wasm_memory_grow => unreachable,
             // zig fmt: on
src/codegen/llvm/bindings.zig
@@ -941,6 +941,9 @@ pub const Builder = opaque {
 
     pub const buildFPMulReduce = ZigLLVMBuildFPMulReduce;
     extern fn ZigLLVMBuildFPMulReduce(B: *const Builder, Acc: *const Value, Val: *const Value) *const Value;
+
+    pub const setFastMath = ZigLLVMSetFastMath;
+    extern fn ZigLLVMSetFastMath(B: *const Builder, on_state: bool) void;
 };
 
 pub const MDString = opaque {
src/codegen/c.zig
@@ -1928,6 +1928,30 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
 
             .wasm_memory_size => try airWasmMemorySize(f, inst),
             .wasm_memory_grow => try airWasmMemoryGrow(f, inst),
+
+            .add_optimized,
+            .addwrap_optimized,
+            .sub_optimized,
+            .subwrap_optimized,
+            .mul_optimized,
+            .mulwrap_optimized,
+            .div_float_optimized,
+            .div_trunc_optimized,
+            .div_floor_optimized,
+            .div_exact_optimized,
+            .rem_optimized,
+            .mod_optimized,
+            .neg_optimized,
+            .cmp_lt_optimized,
+            .cmp_lte_optimized,
+            .cmp_eq_optimized,
+            .cmp_gte_optimized,
+            .cmp_gt_optimized,
+            .cmp_neq_optimized,
+            .cmp_vector_optimized,
+            .reduce_optimized,
+            .float_to_int_optimized,
+            => return f.fail("TODO implement optimized float mode", .{}),
             // zig fmt: on
         };
         switch (result_value) {
src/codegen/llvm.zig
@@ -3984,21 +3984,21 @@ pub const FuncGen = struct {
         for (body) |inst, i| {
             const opt_value: ?*const llvm.Value = switch (air_tags[inst]) {
                 // zig fmt: off
-                .add       => try self.airAdd(inst),
-                .addwrap   => try self.airAddWrap(inst),
+                .add       => try self.airAdd(inst, false),
+                .addwrap   => try self.airAddWrap(inst, false),
                 .add_sat   => try self.airAddSat(inst),
-                .sub       => try self.airSub(inst),
-                .subwrap   => try self.airSubWrap(inst),
+                .sub       => try self.airSub(inst, false),
+                .subwrap   => try self.airSubWrap(inst, false),
                 .sub_sat   => try self.airSubSat(inst),
-                .mul       => try self.airMul(inst),
-                .mulwrap   => try self.airMulWrap(inst),
+                .mul       => try self.airMul(inst, false),
+                .mulwrap   => try self.airMulWrap(inst, false),
                 .mul_sat   => try self.airMulSat(inst),
-                .div_float => try self.airDivFloat(inst),
-                .div_trunc => try self.airDivTrunc(inst),
-                .div_floor => try self.airDivFloor(inst),
-                .div_exact => try self.airDivExact(inst),
-                .rem       => try self.airRem(inst),
-                .mod       => try self.airMod(inst),
+                .div_float => try self.airDivFloat(inst, false),
+                .div_trunc => try self.airDivTrunc(inst, false),
+                .div_floor => try self.airDivFloor(inst, false),
+                .div_exact => try self.airDivExact(inst, false),
+                .rem       => try self.airRem(inst, false),
+                .mod       => try self.airMod(inst, false),
                 .ptr_add   => try self.airPtrAdd(inst),
                 .ptr_sub   => try self.airPtrSub(inst),
                 .shl       => try self.airShl(inst),
@@ -4009,6 +4009,19 @@ pub const FuncGen = struct {
                 .slice     => try self.airSlice(inst),
                 .mul_add   => try self.airMulAdd(inst),
 
+                .add_optimized       => try self.airAdd(inst, true),
+                .addwrap_optimized   => try self.airAddWrap(inst, true),
+                .sub_optimized       => try self.airSub(inst, true),
+                .subwrap_optimized   => try self.airSubWrap(inst, true),
+                .mul_optimized       => try self.airMul(inst, true),
+                .mulwrap_optimized   => try self.airMulWrap(inst, true),
+                .div_float_optimized => try self.airDivFloat(inst, true),
+                .div_trunc_optimized => try self.airDivTrunc(inst, true),
+                .div_floor_optimized => try self.airDivFloor(inst, true),
+                .div_exact_optimized => try self.airDivExact(inst, true),
+                .rem_optimized       => try self.airRem(inst, true),
+                .mod_optimized       => try self.airMod(inst, true),
+
                 .add_with_overflow => try self.airOverflow(inst, "llvm.sadd.with.overflow", "llvm.uadd.with.overflow"),
                 .sub_with_overflow => try self.airOverflow(inst, "llvm.ssub.with.overflow", "llvm.usub.with.overflow"),
                 .mul_with_overflow => try self.airOverflow(inst, "llvm.smul.with.overflow", "llvm.umul.with.overflow"),
@@ -4034,17 +4047,27 @@ pub const FuncGen = struct {
                 .ceil         => try self.airUnaryOp(inst, .ceil),
                 .round        => try self.airUnaryOp(inst, .round),
                 .trunc_float  => try self.airUnaryOp(inst, .trunc),
-                .neg          => try self.airUnaryOp(inst, .neg),
-
-                .cmp_eq  => try self.airCmp(inst, .eq),
-                .cmp_gt  => try self.airCmp(inst, .gt),
-                .cmp_gte => try self.airCmp(inst, .gte),
-                .cmp_lt  => try self.airCmp(inst, .lt),
-                .cmp_lte => try self.airCmp(inst, .lte),
-                .cmp_neq => try self.airCmp(inst, .neq),
 
-                .cmp_vector => try self.airCmpVector(inst),
-                .cmp_lt_errors_len => try self.airCmpLtErrorsLen(inst),
+                .neg           => try self.airNeg(inst, false),
+                .neg_optimized => try self.airNeg(inst, true),
+
+                .cmp_eq  => try self.airCmp(inst, .eq, false),
+                .cmp_gt  => try self.airCmp(inst, .gt, false),
+                .cmp_gte => try self.airCmp(inst, .gte, false),
+                .cmp_lt  => try self.airCmp(inst, .lt, false),
+                .cmp_lte => try self.airCmp(inst, .lte, false),
+                .cmp_neq => try self.airCmp(inst, .neq, false),
+                
+                .cmp_eq_optimized  => try self.airCmp(inst, .eq, true),
+                .cmp_gt_optimized  => try self.airCmp(inst, .gt, true),
+                .cmp_gte_optimized => try self.airCmp(inst, .gte, true),
+                .cmp_lt_optimized  => try self.airCmp(inst, .lt, true),
+                .cmp_lte_optimized => try self.airCmp(inst, .lte, true),
+                .cmp_neq_optimized => try self.airCmp(inst, .neq, true),
+
+                .cmp_vector           => try self.airCmpVector(inst, false),
+                .cmp_vector_optimized => try self.airCmpVector(inst, true),
+                .cmp_lt_errors_len    => try self.airCmpLtErrorsLen(inst),
 
                 .is_non_null     => try self.airIsNonNull(inst, false, .NE),
                 .is_non_null_ptr => try self.airIsNonNull(inst, true , .NE),
@@ -4093,8 +4116,10 @@ pub const FuncGen = struct {
                 .ptr_slice_ptr_ptr => try self.airPtrSliceFieldPtr(inst, 0),
                 .ptr_slice_len_ptr => try self.airPtrSliceFieldPtr(inst, 1),
 
+                .float_to_int           => try self.airFloatToInt(inst, false),
+                .float_to_int_optimized => try self.airFloatToInt(inst, true),
+
                 .array_to_slice => try self.airArrayToSlice(inst),
-                .float_to_int   => try self.airFloatToInt(inst),
                 .int_to_float   => try self.airIntToFloat(inst),
                 .cmpxchg_weak   => try self.airCmpxchg(inst, true),
                 .cmpxchg_strong => try self.airCmpxchg(inst, false),
@@ -4115,11 +4140,13 @@ pub const FuncGen = struct {
                 .splat          => try self.airSplat(inst),
                 .select         => try self.airSelect(inst),
                 .shuffle        => try self.airShuffle(inst),
-                .reduce         => try self.airReduce(inst),
                 .aggregate_init => try self.airAggregateInit(inst),
                 .union_init     => try self.airUnionInit(inst),
                 .prefetch       => try self.airPrefetch(inst),
 
+                .reduce           => try self.airReduce(inst, false),
+                .reduce_optimized => try self.airReduce(inst, true),
+
                 .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
                 .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
                 .atomic_store_release   => try self.airAtomicStore(inst, .Release),
@@ -4485,8 +4512,9 @@ pub const FuncGen = struct {
         return null;
     }
 
-    fn airCmp(self: *FuncGen, inst: Air.Inst.Index, op: math.CompareOperator) !?*const llvm.Value {
+    fn airCmp(self: *FuncGen, inst: Air.Inst.Index, op: math.CompareOperator, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -4496,8 +4524,9 @@ pub const FuncGen = struct {
         return self.cmp(lhs, rhs, operand_ty, op);
     }
 
-    fn airCmpVector(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airCmpVector(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
         const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data;
@@ -4943,10 +4972,12 @@ pub const FuncGen = struct {
         return self.builder.buildCall(libc_fn, &params, params.len, .C, .Auto, "");
     }
 
-    fn airFloatToInt(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airFloatToInt(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst))
             return null;
 
+        self.builder.setFastMath(want_fast_math);
+
         const target = self.dg.module.getTarget();
         const ty_op = self.air.instructions.items(.data)[inst].ty_op;
 
@@ -6095,8 +6126,9 @@ pub const FuncGen = struct {
         return self.builder.buildInsertValue(partial, len, 1, "");
     }
 
-    fn airAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airAdd(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6109,8 +6141,9 @@ pub const FuncGen = struct {
         return self.builder.buildNUWAdd(lhs, rhs, "");
     }
 
-    fn airAddWrap(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airAddWrap(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6134,8 +6167,9 @@ pub const FuncGen = struct {
         return self.builder.buildUAddSat(lhs, rhs, "");
     }
 
-    fn airSub(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airSub(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6148,8 +6182,9 @@ pub const FuncGen = struct {
         return self.builder.buildNUWSub(lhs, rhs, "");
     }
 
-    fn airSubWrap(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airSubWrap(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6172,8 +6207,9 @@ pub const FuncGen = struct {
         return self.builder.buildUSubSat(lhs, rhs, "");
     }
 
-    fn airMul(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airMul(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6186,8 +6222,9 @@ pub const FuncGen = struct {
         return self.builder.buildNUWMul(lhs, rhs, "");
     }
 
-    fn airMulWrap(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airMulWrap(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6210,8 +6247,9 @@ pub const FuncGen = struct {
         return self.builder.buildUMulFixSat(lhs, rhs, "");
     }
 
-    fn airDivFloat(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airDivFloat(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6221,8 +6259,9 @@ pub const FuncGen = struct {
         return self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
     }
 
-    fn airDivTrunc(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airDivTrunc(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6238,8 +6277,9 @@ pub const FuncGen = struct {
         return self.builder.buildUDiv(lhs, rhs, "");
     }
 
-    fn airDivFloor(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airDivFloor(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6270,8 +6310,9 @@ pub const FuncGen = struct {
         return self.builder.buildUDiv(lhs, rhs, "");
     }
 
-    fn airDivExact(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airDivExact(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6284,8 +6325,9 @@ pub const FuncGen = struct {
         return self.builder.buildExactUDiv(lhs, rhs, "");
     }
 
-    fn airRem(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airRem(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -6298,8 +6340,9 @@ pub const FuncGen = struct {
         return self.builder.buildURem(lhs, rhs, "");
     }
 
-    fn airMod(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airMod(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
@@ -7613,6 +7656,17 @@ pub const FuncGen = struct {
         return self.buildFloatOp(op, operand_ty, 1, .{operand});
     }
 
+    fn airNeg(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
+
+        const un_op = self.air.instructions.items(.data)[inst].un_op;
+        const operand = try self.resolveInst(un_op);
+        const operand_ty = self.air.typeOf(un_op);
+
+        return self.buildFloatOp(.neg, operand_ty, 1, .{operand});
+    }
+
     fn airClzCtz(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
 
@@ -7927,8 +7981,9 @@ pub const FuncGen = struct {
         return self.builder.buildShuffleVector(a, b, llvm_mask_value, "");
     }
 
-    fn airReduce(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airReduce(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
+        self.builder.setFastMath(want_fast_math);
 
         const reduce = self.air.instructions.items(.data)[inst].reduce;
         const operand = try self.resolveInst(reduce.operand);
src/Air.zig
@@ -38,11 +38,15 @@ pub const Inst = struct {
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         add,
+        /// Same as `add` with optimized float mode.
+        add_optimized,
         /// Integer addition. Wrapping is defined to be twos complement wrapping.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         addwrap,
+        /// Same as `addwrap` with optimized float mode.
+        addwrap_optimized,
         /// Saturating integer addition.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
@@ -53,11 +57,15 @@ pub const Inst = struct {
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         sub,
+        /// Same as `sub` with optimized float mode.
+        sub_optimized,
         /// Integer subtraction. Wrapping is defined to be twos complement wrapping.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         subwrap,
+        /// Same as `sub` with optimized float mode.
+        subwrap_optimized,
         /// Saturating integer subtraction.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
@@ -68,11 +76,15 @@ pub const Inst = struct {
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         mul,
+        /// Same as `mul` with optimized float mode.
+        mul_optimized,
         /// Integer multiplication. Wrapping is defined to be twos complement wrapping.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         mulwrap,
+        /// Same as `mulwrap` with optimized float mode.
+        mulwrap_optimized,
         /// Saturating integer multiplication.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
@@ -83,32 +95,44 @@ pub const Inst = struct {
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         div_float,
+        /// Same as `div_float` with optimized float mode.
+        div_float_optimized,
         /// Truncating integer or float division. For integers, wrapping is undefined behavior.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         div_trunc,
+        /// Same as `div_trunc` with optimized float mode.
+        div_trunc_optimized,
         /// Flooring integer or float division. For integers, wrapping is undefined behavior.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         div_floor,
+        /// Same as `div_floor` with optimized float mode.
+        div_floor_optimized,
         /// Integer or float division. Guaranteed no remainder.
         /// For integers, wrapping is undefined behavior.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         div_exact,
+        /// Same as `div_exact` with optimized float mode.
+        div_exact_optimized,
         /// Integer or float remainder division.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         rem,
+        /// Same as `rem` with optimized float mode.
+        rem_optimized,
         /// Integer or float modulus division.
         /// Both operands are guaranteed to be the same type, and the result type
         /// is the same as both operands.
         /// Uses the `bin_op` field.
         mod,
+        /// Same as `mod` with optimized float mode.
+        mod_optimized,
         /// Add an offset to a pointer, returning a new pointer.
         /// The offset is in element type units, not bytes.
         /// Wrapping is undefined behavior.
@@ -293,29 +317,45 @@ pub const Inst = struct {
         /// LHS of zero.
         /// Uses the `un_op` field.
         neg,
+        /// Same as `neg` with optimized float mode.
+        neg_optimized,
 
         /// `<`. Result type is always bool.
         /// Uses the `bin_op` field.
         cmp_lt,
+        /// Same as `cmp_lt` with optimized float mode.
+        cmp_lt_optimized,
         /// `<=`. Result type is always bool.
         /// Uses the `bin_op` field.
         cmp_lte,
+        /// Same as `cmp_lte` with optimized float mode.
+        cmp_lte_optimized,
         /// `==`. Result type is always bool.
         /// Uses the `bin_op` field.
         cmp_eq,
+        /// Same as `cmp_eq` with optimized float mode.
+        cmp_eq_optimized,
         /// `>=`. Result type is always bool.
         /// Uses the `bin_op` field.
         cmp_gte,
+        /// Same as `cmp_gte` with optimized float mode.
+        cmp_gte_optimized,
         /// `>`. Result type is always bool.
         /// Uses the `bin_op` field.
         cmp_gt,
+        /// Same as `cmp_gt` with optimized float mode.
+        cmp_gt_optimized,
         /// `!=`. Result type is always bool.
         /// Uses the `bin_op` field.
         cmp_neq,
+        /// Same as `cmp_neq` with optimized float mode.
+        cmp_neq_optimized,
         /// Conditional between two vectors.
         /// Result type is always a vector of bools.
         /// Uses the `ty_pl` field, payload is `VectorCmp`.
         cmp_vector,
+        /// Same as `cmp_vector` with optimized float mode.
+        cmp_vector_optimized,
 
         /// Conditional branch.
         /// Result type is always noreturn; no instructions in a block follow this one.
@@ -553,6 +593,8 @@ pub const Inst = struct {
         /// Given a float operand, return the integer with the closest mathematical meaning.
         /// Uses the `ty_op` field.
         float_to_int,
+        /// Same as `float_to_int` with optimized float mode.
+        float_to_int_optimized,
         /// Given an integer operand, return the float with the closest mathematical meaning.
         /// Uses the `ty_op` field.
         int_to_float,
@@ -564,6 +606,8 @@ pub const Inst = struct {
         ///  * min, max, add, mul => integer or float
         /// Uses the `reduce` field.
         reduce,
+        /// Same as `reduce` with optimized float mode.
+        reduce_optimized,
         /// Given an integer, bool, float, or pointer operand, return a vector with all elements
         /// equal to the scalar value.
         /// Uses the `ty_op` field.
@@ -676,25 +720,25 @@ pub const Inst = struct {
         /// Sets the operand as the current error return trace,
         set_err_return_trace,
 
-        pub fn fromCmpOp(op: std.math.CompareOperator) Tag {
-            return switch (op) {
-                .lt => .cmp_lt,
-                .lte => .cmp_lte,
-                .eq => .cmp_eq,
-                .gte => .cmp_gte,
-                .gt => .cmp_gt,
-                .neq => .cmp_neq,
-            };
+        pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag {
+            switch (op) {
+                .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt,
+                .lte => return if (optimized) .cmp_lte_optimized else .cmp_lte,
+                .eq => return if (optimized) .cmp_eq_optimized else .cmp_eq,
+                .gte => return if (optimized) .cmp_gte_optimized else .cmp_gte,
+                .gt => return if (optimized) .cmp_gt_optimized else .cmp_gt,
+                .neq => return if (optimized) .cmp_neq_optimized else .cmp_neq,
+            }
         }
 
         pub fn toCmpOp(tag: Tag) ?std.math.CompareOperator {
             return switch (tag) {
-                .cmp_lt => .lt,
-                .cmp_lte => .lte,
-                .cmp_eq => .eq,
-                .cmp_gte => .gte,
-                .cmp_gt => .gt,
-                .cmp_neq => .neq,
+                .cmp_lt, .cmp_lt_optimized => .lt,
+                .cmp_lte, .cmp_lte_optimized => .lte,
+                .cmp_eq, .cmp_eq_optimized => .eq,
+                .cmp_gte, .cmp_gte_optimized => .gte,
+                .cmp_gt, .cmp_gt_optimized => .gt,
+                .cmp_neq, .cmp_neq_optimized => .neq,
                 else => null,
             };
         }
@@ -959,6 +1003,18 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .max,
         .bool_and,
         .bool_or,
+        .add_optimized,
+        .addwrap_optimized,
+        .sub_optimized,
+        .subwrap_optimized,
+        .mul_optimized,
+        .mulwrap_optimized,
+        .div_float_optimized,
+        .div_trunc_optimized,
+        .div_floor_optimized,
+        .div_exact_optimized,
+        .rem_optimized,
+        .mod_optimized,
         => return air.typeOf(datas[inst].bin_op.lhs),
 
         .sqrt,
@@ -976,6 +1032,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .round,
         .trunc_float,
         .neg,
+        .neg_optimized,
         => return air.typeOf(datas[inst].un_op),
 
         .cmp_lt,
@@ -984,6 +1041,12 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .cmp_gte,
         .cmp_gt,
         .cmp_neq,
+        .cmp_lt_optimized,
+        .cmp_lte_optimized,
+        .cmp_eq_optimized,
+        .cmp_gte_optimized,
+        .cmp_gt_optimized,
+        .cmp_neq_optimized,
         .cmp_lt_errors_len,
         .is_null,
         .is_non_null,
@@ -1018,6 +1081,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .union_init,
         .field_parent_ptr,
         .cmp_vector,
+        .cmp_vector_optimized,
         .add_with_overflow,
         .sub_with_overflow,
         .mul_with_overflow,
@@ -1054,6 +1118,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .struct_field_ptr_index_3,
         .array_to_slice,
         .float_to_int,
+        .float_to_int_optimized,
         .int_to_float,
         .splat,
         .get_union_tag,
@@ -1129,7 +1194,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
             return ptr_ty.elemType();
         },
 
-        .reduce => return air.typeOf(datas[inst].reduce.operand).childType(),
+        .reduce, .reduce_optimized => return air.typeOf(datas[inst].reduce.operand).childType(),
 
         .mul_add => return air.typeOf(datas[inst].pl_op.operand),
         .select => {
src/Liveness.zig
@@ -173,6 +173,25 @@ pub fn categorizeOperand(
         .shr_exact,
         .min,
         .max,
+        .add_optimized,
+        .addwrap_optimized,
+        .sub_optimized,
+        .subwrap_optimized,
+        .mul_optimized,
+        .mulwrap_optimized,
+        .div_float_optimized,
+        .div_trunc_optimized,
+        .div_floor_optimized,
+        .div_exact_optimized,
+        .rem_optimized,
+        .mod_optimized,
+        .neg_optimized,
+        .cmp_lt_optimized,
+        .cmp_lte_optimized,
+        .cmp_eq_optimized,
+        .cmp_gte_optimized,
+        .cmp_gt_optimized,
+        .cmp_neq_optimized,
         => {
             const o = air_datas[inst].bin_op;
             if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
@@ -239,6 +258,7 @@ pub fn categorizeOperand(
         .struct_field_ptr_index_3,
         .array_to_slice,
         .float_to_int,
+        .float_to_int_optimized,
         .int_to_float,
         .get_union_tag,
         .clz,
@@ -381,12 +401,12 @@ pub fn categorizeOperand(
             if (extra.b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
             return .none;
         },
-        .reduce => {
+        .reduce, .reduce_optimized => {
             const reduce = air_datas[inst].reduce;
             if (reduce.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
             return .none;
         },
-        .cmp_vector => {
+        .cmp_vector, .cmp_vector_optimized => {
             const extra = air.extraData(Air.VectorCmp, air_datas[inst].ty_pl.payload).data;
             if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
             if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
@@ -701,29 +721,47 @@ fn analyzeInst(
 
     switch (inst_tags[inst]) {
         .add,
+        .add_optimized,
         .addwrap,
+        .addwrap_optimized,
         .add_sat,
         .sub,
+        .sub_optimized,
         .subwrap,
+        .subwrap_optimized,
         .sub_sat,
         .mul,
+        .mul_optimized,
         .mulwrap,
+        .mulwrap_optimized,
         .mul_sat,
         .div_float,
+        .div_float_optimized,
         .div_trunc,
+        .div_trunc_optimized,
         .div_floor,
+        .div_floor_optimized,
         .div_exact,
+        .div_exact_optimized,
         .rem,
+        .rem_optimized,
         .mod,
+        .mod_optimized,
         .bit_and,
         .bit_or,
         .xor,
         .cmp_lt,
+        .cmp_lt_optimized,
         .cmp_lte,
+        .cmp_lte_optimized,
         .cmp_eq,
+        .cmp_eq_optimized,
         .cmp_gte,
+        .cmp_gte_optimized,
         .cmp_gt,
+        .cmp_gt_optimized,
         .cmp_neq,
+        .cmp_neq_optimized,
         .bool_and,
         .bool_or,
         .store,
@@ -794,6 +832,7 @@ fn analyzeInst(
         .struct_field_ptr_index_3,
         .array_to_slice,
         .float_to_int,
+        .float_to_int_optimized,
         .int_to_float,
         .get_union_tag,
         .clz,
@@ -836,6 +875,7 @@ fn analyzeInst(
         .round,
         .trunc_float,
         .neg,
+        .neg_optimized,
         .cmp_lt_errors_len,
         .set_err_return_trace,
         => {
@@ -903,11 +943,11 @@ fn analyzeInst(
             const extra = a.air.extraData(Air.Shuffle, inst_datas[inst].ty_pl.payload).data;
             return trackOperands(a, new_set, inst, main_tomb, .{ extra.a, extra.b, .none });
         },
-        .reduce => {
+        .reduce, .reduce_optimized => {
             const reduce = inst_datas[inst].reduce;
             return trackOperands(a, new_set, inst, main_tomb, .{ reduce.operand, .none, .none });
         },
-        .cmp_vector => {
+        .cmp_vector, .cmp_vector_optimized => {
             const extra = a.air.extraData(Air.VectorCmp, inst_datas[inst].ty_pl.payload).data;
             return trackOperands(a, new_set, inst, main_tomb, .{ extra.lhs, extra.rhs, .none });
         },
src/print_air.zig
@@ -138,6 +138,24 @@ const Writer = struct {
             .set_union_tag,
             .min,
             .max,
+            .add_optimized,
+            .addwrap_optimized,
+            .sub_optimized,
+            .subwrap_optimized,
+            .mul_optimized,
+            .mulwrap_optimized,
+            .div_float_optimized,
+            .div_trunc_optimized,
+            .div_floor_optimized,
+            .div_exact_optimized,
+            .rem_optimized,
+            .mod_optimized,
+            .cmp_lt_optimized,
+            .cmp_lte_optimized,
+            .cmp_eq_optimized,
+            .cmp_gte_optimized,
+            .cmp_gt_optimized,
+            .cmp_neq_optimized,
             => try w.writeBinOp(s, inst),
 
             .is_null,
@@ -169,6 +187,7 @@ const Writer = struct {
             .round,
             .trunc_float,
             .neg,
+            .neg_optimized,
             .cmp_lt_errors_len,
             .set_err_return_trace,
             => try w.writeUnOp(s, inst),
@@ -216,6 +235,7 @@ const Writer = struct {
             .int_to_float,
             .splat,
             .float_to_int,
+            .float_to_int_optimized,
             .get_union_tag,
             .clz,
             .ctz,
@@ -280,8 +300,8 @@ const Writer = struct {
             .mul_add => try w.writeMulAdd(s, inst),
             .select => try w.writeSelect(s, inst),
             .shuffle => try w.writeShuffle(s, inst),
-            .reduce => try w.writeReduce(s, inst),
-            .cmp_vector => try w.writeCmpVector(s, inst),
+            .reduce, .reduce_optimized => try w.writeReduce(s, inst),
+            .cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst),
 
             .dbg_block_begin, .dbg_block_end => {},
         }
src/Sema.zig
@@ -144,6 +144,9 @@ pub const Block = struct {
     /// when null, it is determined by build mode, changed by @setRuntimeSafety
     want_safety: ?bool = null,
 
+    /// What mode to generate float operations in, set by @setFloatMode
+    float_mode: std.builtin.FloatMode = .Strict,
+
     c_import_buf: ?*std.ArrayList(u8) = null,
 
     /// type of `err` in `else => |err|`
@@ -206,6 +209,7 @@ pub const Block = struct {
             .runtime_loop = parent.runtime_loop,
             .runtime_index = parent.runtime_index,
             .want_safety = parent.want_safety,
+            .float_mode = parent.float_mode,
             .c_import_buf = parent.c_import_buf,
             .switch_else_err_ty = parent.switch_else_err_ty,
         };
@@ -414,7 +418,7 @@ pub const Block = struct {
 
     fn addCmpVector(block: *Block, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref, cmp_op: std.math.CompareOperator, vector_ty: Air.Inst.Ref) !Air.Inst.Ref {
         return block.addInst(.{
-            .tag = .cmp_vector,
+            .tag = if (block.float_mode == .Optimized) .cmp_vector_optimized else .cmp_vector,
             .data = .{ .ty_pl = .{
                 .ty = vector_ty,
                 .payload = try block.sema.addExtra(Air.VectorCmp{
@@ -714,10 +718,10 @@ fn analyzeBodyInner(
             .closure_get                  => try sema.zirClosureGet(block, inst),
             .cmp_lt                       => try sema.zirCmp(block, inst, .lt),
             .cmp_lte                      => try sema.zirCmp(block, inst, .lte),
-            .cmp_eq                       => try sema.zirCmpEq(block, inst, .eq, .cmp_eq),
+            .cmp_eq                       => try sema.zirCmpEq(block, inst, .eq, Air.Inst.Tag.fromCmpOp(.eq, block.float_mode == .Optimized)),
             .cmp_gte                      => try sema.zirCmp(block, inst, .gte),
             .cmp_gt                       => try sema.zirCmp(block, inst, .gt),
-            .cmp_neq                      => try sema.zirCmpEq(block, inst, .neq, .cmp_neq),
+            .cmp_neq                      => try sema.zirCmpEq(block, inst, .neq, Air.Inst.Tag.fromCmpOp(.neq, block.float_mode == .Optimized)),
             .coerce_result_ptr            => try sema.zirCoerceResultPtr(block, inst),
             .decl_ref                     => try sema.zirDeclRef(block, inst),
             .decl_val                     => try sema.zirDeclVal(block, inst),
@@ -4705,6 +4709,7 @@ fn zirBlock(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileErro
         .inlining = parent_block.inlining,
         .is_comptime = parent_block.is_comptime,
         .want_safety = parent_block.want_safety,
+        .float_mode = parent_block.float_mode,
     };
 
     defer child_block.instructions.deinit(gpa);
@@ -5042,13 +5047,7 @@ fn zirSetCold(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!voi
 fn zirSetFloatMode(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!void {
     const extra = sema.code.extraData(Zir.Inst.UnNode, extended.operand).data;
     const src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = extra.node };
-    const float_mode = try sema.resolveBuiltinEnum(block, src, extra.operand, "FloatMode", "operand to @setFloatMode must be comptime known");
-    switch (float_mode) {
-        .Strict => return,
-        .Optimized => {
-            // TODO implement optimized float mode
-        },
-    }
+    block.float_mode = try sema.resolveBuiltinEnum(block, src, extra.operand, "FloatMode", "operand to @setFloatMode must be comptime known");
 }
 
 fn zirSetRuntimeSafety(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void {
@@ -8092,7 +8091,7 @@ fn intCast(
                 const ok = if (is_vector) ok: {
                     const is_in_range = try block.addCmpVector(diff_unsigned, dest_range, .lte, try sema.addType(operand_ty));
                     const all_in_range = try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                         .data = .{ .reduce = .{
                             .operand = is_in_range,
                             .operation = .And,
@@ -8109,7 +8108,7 @@ fn intCast(
                 const ok = if (is_vector) ok: {
                     const is_in_range = try block.addCmpVector(diff, dest_max, .lte, try sema.addType(operand_ty));
                     const all_in_range = try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                         .data = .{ .reduce = .{
                             .operand = is_in_range,
                             .operation = .And,
@@ -8130,7 +8129,7 @@ fn intCast(
                 const zero_inst = try sema.addConstant(operand_ty, zero_val);
                 const is_in_range = try block.addCmpVector(operand, zero_inst, .gte, try sema.addType(operand_ty));
                 const all_in_range = try block.addInst(.{
-                    .tag = .reduce,
+                    .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                     .data = .{ .reduce = .{
                         .operand = is_in_range,
                         .operation = .And,
@@ -9391,7 +9390,7 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError
         } else {
             for (items) |item_ref| {
                 const item = try sema.resolveInst(item_ref);
-                const cmp_ok = try case_block.addBinOp(.cmp_eq, operand, item);
+                const cmp_ok = try case_block.addBinOp(if (case_block.float_mode == .Optimized) .cmp_eq_optimized else .cmp_eq, operand, item);
                 if (any_ok != .none) {
                     any_ok = try case_block.addBinOp(.bool_or, any_ok, cmp_ok);
                 } else {
@@ -9411,12 +9410,12 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError
 
                 // operand >= first and operand <= last
                 const range_first_ok = try case_block.addBinOp(
-                    .cmp_gte,
+                    if (case_block.float_mode == .Optimized) .cmp_gte_optimized else .cmp_gte,
                     operand,
                     item_first,
                 );
                 const range_last_ok = try case_block.addBinOp(
-                    .cmp_lte,
+                    if (case_block.float_mode == .Optimized) .cmp_lte_optimized else .cmp_lte,
                     operand,
                     item_last,
                 );
@@ -10023,7 +10022,7 @@ fn zirShl(
         const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty);
         const any_ov_bit = if (lhs_ty.zigTypeTag() == .Vector)
             try block.addInst(.{
-                .tag = .reduce,
+                .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                 .data = .{ .reduce = .{
                     .operand = ov_bit,
                     .operation = .Or,
@@ -10120,7 +10119,7 @@ fn zirShr(
         const ok = if (rhs_ty.zigTypeTag() == .Vector) ok: {
             const eql = try block.addCmpVector(lhs, back, .eq, try sema.addType(rhs_ty));
             break :ok try block.addInst(.{
-                .tag = .reduce,
+                .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                 .data = .{ .reduce = .{
                     .operand = eql,
                     .operation = .And,
@@ -10719,7 +10718,7 @@ fn zirNegate(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
             return sema.addConstant(rhs_ty, try rhs_val.floatNeg(rhs_ty, sema.arena, target));
         }
         try sema.requireRuntimeBlock(block, src, null);
-        return block.addUnOp(.neg, rhs);
+        return block.addUnOp(if (block.float_mode == .Optimized) .neg_optimized else .neg, rhs);
     }
 
     const lhs = if (rhs_ty.zigTypeTag() == .Vector)
@@ -11078,6 +11077,7 @@ fn analyzeArithmetic(
                         return casted_lhs;
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .add_optimized else .add;
                 if (maybe_lhs_val) |lhs_val| {
                     if (lhs_val.isUndef()) {
                         if (is_int) {
@@ -11100,8 +11100,8 @@ fn analyzeArithmetic(
                                 try sema.floatAdd(lhs_val, rhs_val, resolved_type),
                             );
                         }
-                    } else break :rs .{ .src = rhs_src, .air_tag = .add };
-                } else break :rs .{ .src = lhs_src, .air_tag = .add };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
             },
             .addwrap => {
                 // Integers only; floats are checked above.
@@ -11112,6 +11112,7 @@ fn analyzeArithmetic(
                         return casted_rhs;
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .addwrap_optimized else .addwrap;
                 if (maybe_rhs_val) |rhs_val| {
                     if (rhs_val.isUndef()) {
                         return sema.addConstUndef(resolved_type);
@@ -11124,8 +11125,8 @@ fn analyzeArithmetic(
                             resolved_type,
                             try sema.numberAddWrap(block, src, lhs_val, rhs_val, resolved_type),
                         );
-                    } else break :rs .{ .src = lhs_src, .air_tag = .addwrap };
-                } else break :rs .{ .src = rhs_src, .air_tag = .addwrap };
+                    } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
             },
             .add_sat => {
                 // Integers only; floats are checked above.
@@ -11173,6 +11174,7 @@ fn analyzeArithmetic(
                         return casted_lhs;
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .sub_optimized else .sub;
                 if (maybe_lhs_val) |lhs_val| {
                     if (lhs_val.isUndef()) {
                         if (is_int) {
@@ -11195,8 +11197,8 @@ fn analyzeArithmetic(
                                 try sema.floatSub(lhs_val, rhs_val, resolved_type),
                             );
                         }
-                    } else break :rs .{ .src = rhs_src, .air_tag = .sub };
-                } else break :rs .{ .src = lhs_src, .air_tag = .sub };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
             },
             .subwrap => {
                 // Integers only; floats are checked above.
@@ -11210,6 +11212,7 @@ fn analyzeArithmetic(
                         return casted_lhs;
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .subwrap_optimized else .subwrap;
                 if (maybe_lhs_val) |lhs_val| {
                     if (lhs_val.isUndef()) {
                         return sema.addConstUndef(resolved_type);
@@ -11219,8 +11222,8 @@ fn analyzeArithmetic(
                             resolved_type,
                             try sema.numberSubWrap(block, src, lhs_val, rhs_val, resolved_type),
                         );
-                    } else break :rs .{ .src = rhs_src, .air_tag = .subwrap };
-                } else break :rs .{ .src = lhs_src, .air_tag = .subwrap };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
             },
             .sub_sat => {
                 // Integers only; floats are checked above.
@@ -11327,14 +11330,14 @@ fn analyzeArithmetic(
                         if (is_int) {
                             break :rs .{ .src = rhs_src, .air_tag = .div_trunc };
                         } else {
-                            break :rs .{ .src = rhs_src, .air_tag = .div_float };
+                            break :rs .{ .src = rhs_src, .air_tag = if (block.float_mode == .Optimized) .div_float_optimized else .div_float };
                         }
                     }
                 } else {
                     if (is_int) {
                         break :rs .{ .src = lhs_src, .air_tag = .div_trunc };
                     } else {
-                        break :rs .{ .src = lhs_src, .air_tag = .div_float };
+                        break :rs .{ .src = lhs_src, .air_tag = if (block.float_mode == .Optimized) .div_float_optimized else .div_float };
                     }
                 }
             },
@@ -11373,6 +11376,7 @@ fn analyzeArithmetic(
                         return sema.failWithDivideByZero(block, rhs_src);
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .div_trunc_optimized else .div_trunc;
                 if (maybe_lhs_val) |lhs_val| {
                     if (lhs_val.isUndef()) {
                         if (lhs_scalar_ty.isSignedInt() and rhs_scalar_ty.isSignedInt()) {
@@ -11398,8 +11402,8 @@ fn analyzeArithmetic(
                                 try lhs_val.floatDivTrunc(rhs_val, resolved_type, sema.arena, target),
                             );
                         }
-                    } else break :rs .{ .src = rhs_src, .air_tag = .div_trunc };
-                } else break :rs .{ .src = lhs_src, .air_tag = .div_trunc };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
             },
             .div_floor => {
                 // For integers:
@@ -11436,6 +11440,7 @@ fn analyzeArithmetic(
                         return sema.failWithDivideByZero(block, rhs_src);
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .div_floor_optimized else .div_floor;
                 if (maybe_lhs_val) |lhs_val| {
                     if (lhs_val.isUndef()) {
                         if (lhs_scalar_ty.isSignedInt() and rhs_scalar_ty.isSignedInt()) {
@@ -11461,8 +11466,8 @@ fn analyzeArithmetic(
                                 try lhs_val.floatDivFloor(rhs_val, resolved_type, sema.arena, target),
                             );
                         }
-                    } else break :rs .{ .src = rhs_src, .air_tag = .div_floor };
-                } else break :rs .{ .src = lhs_src, .air_tag = .div_floor };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
             },
             .div_exact => {
                 // For integers:
@@ -11498,6 +11503,7 @@ fn analyzeArithmetic(
                         return sema.failWithDivideByZero(block, rhs_src);
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .div_exact_optimized else .div_exact;
                 if (maybe_lhs_val) |lhs_val| {
                     if (maybe_rhs_val) |rhs_val| {
                         if (is_int) {
@@ -11513,8 +11519,8 @@ fn analyzeArithmetic(
                                 try lhs_val.floatDiv(rhs_val, resolved_type, sema.arena, target),
                             );
                         }
-                    } else break :rs .{ .src = rhs_src, .air_tag = .div_exact };
-                } else break :rs .{ .src = lhs_src, .air_tag = .div_exact };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
             },
             .mul => {
                 // For integers:
@@ -11535,6 +11541,7 @@ fn analyzeArithmetic(
                         }
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .mul_optimized else .mul;
                 if (maybe_rhs_val) |rhs_val| {
                     if (rhs_val.isUndef()) {
                         if (is_int) {
@@ -11570,8 +11577,8 @@ fn analyzeArithmetic(
                                 try lhs_val.floatMul(rhs_val, resolved_type, sema.arena, target),
                             );
                         }
-                    } else break :rs .{ .src = lhs_src, .air_tag = .mul };
-                } else break :rs .{ .src = rhs_src, .air_tag = .mul };
+                    } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
             },
             .mulwrap => {
                 // Integers only; floats are handled above.
@@ -11588,6 +11595,7 @@ fn analyzeArithmetic(
                         }
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .mulwrap_optimized else .mulwrap;
                 if (maybe_rhs_val) |rhs_val| {
                     if (rhs_val.isUndef()) {
                         return sema.addConstUndef(resolved_type);
@@ -11606,8 +11614,8 @@ fn analyzeArithmetic(
                             resolved_type,
                             try lhs_val.numberMulWrap(rhs_val, resolved_type, sema.arena, target),
                         );
-                    } else break :rs .{ .src = lhs_src, .air_tag = .mulwrap };
-                } else break :rs .{ .src = rhs_src, .air_tag = .mulwrap };
+                    } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
             },
             .mul_sat => {
                 // Integers only; floats are checked above.
@@ -11777,6 +11785,7 @@ fn analyzeArithmetic(
                         return sema.failWithDivideByZero(block, rhs_src);
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .rem_optimized else .rem;
                 if (maybe_lhs_val) |lhs_val| {
                     if (lhs_val.isUndef()) {
                         return sema.addConstUndef(resolved_type);
@@ -11786,8 +11795,8 @@ fn analyzeArithmetic(
                             resolved_type,
                             try lhs_val.floatRem(rhs_val, resolved_type, sema.arena, target),
                         );
-                    } else break :rs .{ .src = rhs_src, .air_tag = .rem };
-                } else break :rs .{ .src = lhs_src, .air_tag = .rem };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
             },
             .mod => {
                 // For integers:
@@ -11834,6 +11843,7 @@ fn analyzeArithmetic(
                         return sema.failWithDivideByZero(block, rhs_src);
                     }
                 }
+                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .mod_optimized else .mod;
                 if (maybe_lhs_val) |lhs_val| {
                     if (lhs_val.isUndef()) {
                         return sema.addConstUndef(resolved_type);
@@ -11843,8 +11853,8 @@ fn analyzeArithmetic(
                             resolved_type,
                             try lhs_val.floatMod(rhs_val, resolved_type, sema.arena, target),
                         );
-                    } else break :rs .{ .src = rhs_src, .air_tag = .mod };
-                } else break :rs .{ .src = lhs_src, .air_tag = .mod };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
             },
             else => unreachable,
         }
@@ -11874,7 +11884,7 @@ fn analyzeArithmetic(
                 const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty);
                 const any_ov_bit = if (resolved_type.zigTypeTag() == .Vector)
                     try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                         .data = .{ .reduce = .{
                             .operand = ov_bit,
                             .operation = .Or,
@@ -11890,13 +11900,17 @@ fn analyzeArithmetic(
             }
         }
         switch (rs.air_tag) {
-            .div_float, .div_exact, .div_trunc, .div_floor => {
+            // zig fmt: off
+            .div_float, .div_exact, .div_trunc, .div_floor, .div_float_optimized,
+            .div_exact_optimized, .div_trunc_optimized, .div_floor_optimized
+            // zig fmt: on
+            => if (scalar_tag == .Int or block.float_mode == .Optimized) {
                 const ok = if (resolved_type.zigTypeTag() == .Vector) ok: {
                     const zero_val = try Value.Tag.repeated.create(sema.arena, Value.zero);
                     const zero = try sema.addConstant(sema.typeOf(casted_rhs), zero_val);
                     const ok = try block.addCmpVector(casted_rhs, zero, .neq, try sema.addType(resolved_type));
                     break :ok try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                         .data = .{ .reduce = .{
                             .operand = ok,
                             .operation = .And,
@@ -11904,17 +11918,17 @@ fn analyzeArithmetic(
                     });
                 } else ok: {
                     const zero = try sema.addConstant(sema.typeOf(casted_rhs), Value.zero);
-                    break :ok try block.addBinOp(.cmp_neq, casted_rhs, zero);
+                    break :ok try block.addBinOp(if (block.float_mode == .Optimized) .cmp_neq_optimized else .cmp_neq, casted_rhs, zero);
                 };
                 try sema.addSafetyCheck(block, ok, .divide_by_zero);
             },
-            .rem, .mod => {
+            .rem, .mod, .rem_optimized, .mod_optimized => {
                 const ok = if (resolved_type.zigTypeTag() == .Vector) ok: {
                     const zero_val = try Value.Tag.repeated.create(sema.arena, Value.zero);
                     const zero = try sema.addConstant(sema.typeOf(casted_rhs), zero_val);
                     const ok = try block.addCmpVector(casted_rhs, zero, if (scalar_tag == .Int) .gt else .neq, try sema.addType(resolved_type));
                     break :ok try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                         .data = .{ .reduce = .{
                             .operand = ok,
                             .operation = .And,
@@ -11922,13 +11936,19 @@ fn analyzeArithmetic(
                     });
                 } else ok: {
                     const zero = try sema.addConstant(sema.typeOf(casted_rhs), Value.zero);
-                    break :ok try block.addBinOp(if (scalar_tag == .Int) .cmp_gt else .cmp_neq, casted_rhs, zero);
+                    const air_tag = if (scalar_tag == .Int)
+                        Air.Inst.Tag.cmp_gt
+                    else if (block.float_mode == .Optimized)
+                        Air.Inst.Tag.cmp_neq_optimized
+                    else
+                        Air.Inst.Tag.cmp_neq;
+                    break :ok try block.addBinOp(air_tag, casted_rhs, zero);
                 };
                 try sema.addSafetyCheck(block, ok, .remainder_division_zero_negative);
             },
             else => {},
         }
-        if (rs.air_tag == .div_exact) {
+        if (rs.air_tag == .div_exact or rs.air_tag == .div_exact_optimized) {
             const result = try block.addBinOp(.div_exact, casted_lhs, casted_rhs);
             const ok = if (scalar_tag == .Float) ok: {
                 const floored = try block.addUnOp(.floor, result);
@@ -11936,14 +11956,14 @@ fn analyzeArithmetic(
                 if (resolved_type.zigTypeTag() == .Vector) {
                     const eql = try block.addCmpVector(result, floored, .eq, try sema.addType(resolved_type));
                     break :ok try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                         .data = .{ .reduce = .{
                             .operand = eql,
                             .operation = .And,
                         } },
                     });
                 } else {
-                    const is_in_range = try block.addBinOp(.cmp_eq, result, floored);
+                    const is_in_range = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_eq_optimized else .cmp_eq, result, floored);
                     break :ok is_in_range;
                 }
             } else ok: {
@@ -11962,7 +11982,7 @@ fn analyzeArithmetic(
                     });
                 } else {
                     const zero = try sema.addConstant(sema.typeOf(casted_rhs), Value.zero);
-                    const is_in_range = try block.addBinOp(.cmp_eq, remainder, zero);
+                    const is_in_range = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_eq_optimized else .cmp_eq, remainder, zero);
                     break :ok is_in_range;
                 }
             };
@@ -12476,7 +12496,7 @@ fn cmpSelf(
         const result_ty_ref = try sema.addType(result_ty);
         return block.addCmpVector(casted_lhs, casted_rhs, op, result_ty_ref);
     }
-    const tag = Air.Inst.Tag.fromCmpOp(op);
+    const tag = Air.Inst.Tag.fromCmpOp(op, block.float_mode == .Optimized);
     return block.addBinOp(tag, casted_lhs, casted_rhs);
 }
 
@@ -15954,12 +15974,12 @@ fn zirFloatToInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!
     }
 
     try sema.requireRuntimeBlock(block, inst_data.src(), operand_src);
-    const result = try block.addTyOp(.float_to_int, dest_ty, operand);
+    const result = try block.addTyOp(if (block.float_mode == .Optimized) .float_to_int_optimized else .float_to_int, dest_ty, operand);
     if (block.wantSafety()) {
         const back = try block.addTyOp(.int_to_float, operand_ty, result);
         const diff = try block.addBinOp(.sub, operand, back);
-        const ok_pos = try block.addBinOp(.cmp_lt, diff, try sema.addConstant(operand_ty, Value.one));
-        const ok_neg = try block.addBinOp(.cmp_gt, diff, try sema.addConstant(operand_ty, Value.negative_one));
+        const ok_pos = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_lt_optimized else .cmp_lt, diff, try sema.addConstant(operand_ty, Value.one));
+        const ok_neg = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_gt_optimized else .cmp_gt, diff, try sema.addConstant(operand_ty, Value.negative_one));
         const ok = try block.addBinOp(.bool_and, ok_pos, ok_neg);
         try sema.addSafetyCheck(block, ok, .integer_part_out_of_bounds);
     }
@@ -17194,7 +17214,7 @@ fn zirReduce(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
 
     try sema.requireRuntimeBlock(block, inst_data.src(), operand_src);
     return block.addInst(.{
-        .tag = .reduce,
+        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
         .data = .{ .reduce = .{
             .operand = operand,
             .operation = operation,
@@ -24489,7 +24509,7 @@ fn cmpNumeric(
         };
         const casted_lhs = try sema.coerce(block, dest_ty, lhs, lhs_src);
         const casted_rhs = try sema.coerce(block, dest_ty, rhs, rhs_src);
-        return block.addBinOp(Air.Inst.Tag.fromCmpOp(op), casted_lhs, casted_rhs);
+        return block.addBinOp(Air.Inst.Tag.fromCmpOp(op, block.float_mode == .Optimized), casted_lhs, casted_rhs);
     }
     // For mixed unsigned integer sizes, implicit cast both operands to the larger integer.
     // For mixed signed and unsigned integers, implicit cast both operands to a signed
@@ -24610,7 +24630,7 @@ fn cmpNumeric(
     const casted_lhs = try sema.coerce(block, dest_ty, lhs, lhs_src);
     const casted_rhs = try sema.coerce(block, dest_ty, rhs, rhs_src);
 
-    return block.addBinOp(Air.Inst.Tag.fromCmpOp(op), casted_lhs, casted_rhs);
+    return block.addBinOp(Air.Inst.Tag.fromCmpOp(op, block.float_mode == .Optimized), casted_lhs, casted_rhs);
 }
 
 /// Asserts that lhs and rhs types are both vectors.