Commit 03ed0f0d28

Andrew Kelley <andrew@ziglang.org>
2022-05-13 01:44:44
C backend: implement overflow arithmetic
Most of the work here was additions to zig.h. The lowering code is mainly responsible for calling the correct function name depending on the operand type. Some of the compiler-rt calls here are not implemented yet and are non-standard symbols due to the C programming language not needing them. After this commit, the behavior tests with -ofmt=c are passing again.
1 parent a5ea22d
Changed files (3)
lib
src
codegen
link
lib/std/builtin.zig
@@ -767,8 +767,7 @@ pub fn default_panic(msg: []const u8, error_return_trace: ?*StackTrace) noreturn
 
     // Until self-hosted catches up with stage1 language features, we have a simpler
     // default panic function:
-    if ((builtin.zig_backend == .stage2_llvm and builtin.link_libc) or
-        builtin.zig_backend == .stage2_c or
+    if (builtin.zig_backend == .stage2_c or
         builtin.zig_backend == .stage2_wasm or
         builtin.zig_backend == .stage2_arm or
         builtin.zig_backend == .stage2_aarch64 or
src/codegen/c.zig
@@ -1766,10 +1766,10 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
 
             .mul_add => try airMulAdd(f, inst),
 
-            .add_with_overflow => try airAddWithOverflow(f, inst),
-            .sub_with_overflow => try airSubWithOverflow(f, inst),
-            .mul_with_overflow => try airMulWithOverflow(f, inst),
-            .shl_with_overflow => try airShlWithOverflow(f, inst),
+            .add_with_overflow => try airOverflow(f, inst, "addo_"),
+            .sub_with_overflow => try airOverflow(f, inst, "subo_"),
+            .mul_with_overflow => try airOverflow(f, inst, "mulo_"),
+            .shl_with_overflow => try airOverflow(f, inst, "shlo_"),
 
             .min => try airMinMax(f, inst, "<"),
             .max => try airMinMax(f, inst, ">"),
@@ -2295,7 +2295,8 @@ fn airWrapOp(
 
     const bin_op = f.air.instructions.items(.data)[inst].bin_op;
     const inst_ty = f.air.typeOfIndex(inst);
-    const int_info = inst_ty.intInfo(f.object.dg.module.getTarget());
+    const target = f.object.dg.module.getTarget();
+    const int_info = inst_ty.intInfo(target);
     const bits = int_info.bits;
 
     // if it's an unsigned int with non-arbitrary bit size then we can just add
@@ -2313,47 +2314,8 @@ fn airWrapOp(
         return f.fail("TODO: C backend: airWrapOp for large integers", .{});
     }
 
-    var min_buf: [80]u8 = undefined;
-    const min = switch (int_info.signedness) {
-        .unsigned => "0",
-        else => switch (inst_ty.tag()) {
-            .c_short => "SHRT_MIN",
-            .c_int => "INT_MIN",
-            .c_long => "LONG_MIN",
-            .c_longlong => "LLONG_MIN",
-            .isize => "INTPTR_MIN",
-            else => blk: {
-                const val = -1 * std.math.pow(i64, 2, @intCast(i64, bits - 1));
-                break :blk std.fmt.bufPrint(&min_buf, "{d}", .{val}) catch |err| switch (err) {
-                    error.NoSpaceLeft => unreachable,
-                };
-            },
-        },
-    };
-
     var max_buf: [80]u8 = undefined;
-    const max = switch (inst_ty.tag()) {
-        .c_short => "SHRT_MAX",
-        .c_ushort => "USHRT_MAX",
-        .c_int => "INT_MAX",
-        .c_uint => "UINT_MAX",
-        .c_long => "LONG_MAX",
-        .c_ulong => "ULONG_MAX",
-        .c_longlong => "LLONG_MAX",
-        .c_ulonglong => "ULLONG_MAX",
-        .isize => "INTPTR_MAX",
-        .usize => "UINTPTR_MAX",
-        else => blk: {
-            const pow_bits = switch (int_info.signedness) {
-                .signed => bits - 1,
-                .unsigned => bits,
-            };
-            const val = std.math.pow(u64, 2, pow_bits) - 1;
-            break :blk std.fmt.bufPrint(&max_buf, "{}", .{val}) catch |err| switch (err) {
-                error.NoSpaceLeft => unreachable,
-            };
-        },
-    };
+    const max = intMax(inst_ty, target, &max_buf);
 
     const lhs = try f.resolveInst(bin_op.lhs);
     const rhs = try f.resolveInst(bin_op.rhs);
@@ -2369,10 +2331,7 @@ fn airWrapOp(
         .c_long => try w.writeAll("long"),
         .c_longlong => try w.writeAll("longlong"),
         else => {
-            const prefix_byte: u8 = switch (int_info.signedness) {
-                .signed => 'i',
-                .unsigned => 'u',
-            };
+            const prefix_byte: u8 = signAbbrev(int_info.signedness);
             for ([_]u8{ 8, 16, 32, 64 }) |nbits| {
                 if (bits <= nbits) {
                     try w.print("{c}{d}", .{ prefix_byte, nbits });
@@ -2390,6 +2349,9 @@ fn airWrapOp(
     try f.writeCValue(w, rhs);
 
     if (int_info.signedness == .signed) {
+        var min_buf: [80]u8 = undefined;
+        const min = intMin(inst_ty, target, &min_buf);
+
         try w.print(", {s}", .{min});
     }
 
@@ -2475,10 +2437,7 @@ fn airSatOp(f: *Function, inst: Air.Inst.Index, fn_op: [*:0]const u8) !CValue {
         .c_long => try w.writeAll("long"),
         .c_longlong => try w.writeAll("longlong"),
         else => {
-            const prefix_byte: u8 = switch (int_info.signedness) {
-                .signed => 'i',
-                .unsigned => 'u',
-            };
+            const prefix_byte: u8 = signAbbrev(int_info.signedness);
             for ([_]u8{ 8, 16, 32, 64 }) |nbits| {
                 if (bits <= nbits) {
                     try w.print("{c}{d}", .{ prefix_byte, nbits });
@@ -2505,28 +2464,63 @@ fn airSatOp(f: *Function, inst: Air.Inst.Index, fn_op: [*:0]const u8) !CValue {
     return ret;
 }
 
-fn airAddWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO add with overflow", .{});
-}
+fn airOverflow(f: *Function, inst: Air.Inst.Index, op_abbrev: [*:0]const u8) !CValue {
+    if (f.liveness.isUnused(inst))
+        return CValue.none;
 
-fn airSubWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO sub with overflow", .{});
-}
+    const ty_pl = f.air.instructions.items(.data)[inst].ty_pl;
+    const bin_op = f.air.extraData(Air.Bin, ty_pl.payload).data;
 
-fn airMulWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO mul with overflow", .{});
-}
+    const lhs = try f.resolveInst(bin_op.lhs);
+    const rhs = try f.resolveInst(bin_op.rhs);
 
-fn airShlWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO shl with overflow", .{});
+    const inst_ty = f.air.typeOfIndex(inst);
+    const scalar_ty = f.air.typeOf(bin_op.lhs).scalarType();
+    const target = f.object.dg.module.getTarget();
+    const int_info = scalar_ty.intInfo(target);
+    const w = f.object.writer();
+    const c_bits = toCIntBits(int_info.bits) orelse
+        return f.fail("TODO: C backend: implement integer arithmetic larger than 128 bits", .{});
+
+    var max_buf: [80]u8 = undefined;
+    const max = intMax(scalar_ty, target, &max_buf);
+
+    const ret = try f.allocLocal(inst_ty, .Mut);
+    try w.writeAll(";");
+    try f.object.indent_writer.insertNewline();
+    try f.writeCValue(w, ret);
+
+    switch (int_info.signedness) {
+        .unsigned => {
+            try w.print(".field_1 = zig_{s}u{d}(", .{
+                op_abbrev, c_bits,
+            });
+            try f.writeCValue(w, lhs);
+            try w.writeAll(", ");
+            try f.writeCValue(w, rhs);
+            try w.writeAll(", &");
+            try f.writeCValue(w, ret);
+            try w.print(".field_0, {s}", .{max});
+        },
+        .signed => {
+            var min_buf: [80]u8 = undefined;
+            const min = intMin(scalar_ty, target, &min_buf);
+
+            try w.print(".field_1 = zig_{s}i{d}(", .{
+                op_abbrev, c_bits,
+            });
+            try f.writeCValue(w, lhs);
+            try w.writeAll(", ");
+            try f.writeCValue(w, rhs);
+            try w.writeAll(", &");
+            try f.writeCValue(w, ret);
+            try w.print(".field_0, {s}, {s}", .{ min, max });
+        },
+    }
+
+    try w.writeAll(");");
+    try f.object.indent_writer.insertNewline();
+    return ret;
 }
 
 fn airNot(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -3571,11 +3565,7 @@ fn airBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u8) !C
         return f.fail("TODO: C backend: implement integer types larger than 128 bits", .{});
 
     try writer.print(" = zig_{s}_", .{fn_name});
-    const prefix_byte: u8 = switch (int_info.signedness) {
-        .signed => 'i',
-        .unsigned => 'u',
-    };
-    try writer.print("{c}{d}(", .{ prefix_byte, c_bits });
+    try writer.print("{c}{d}(", .{ signAbbrev(int_info.signedness), c_bits });
     try f.writeCValue(writer, try f.resolveInst(operand));
     try writer.print(", {d});\n", .{int_info.bits});
     return local;
@@ -3596,11 +3586,7 @@ fn airBinOpBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u
         const int_info = lhs_ty.intInfo(target);
         const c_bits = toCIntBits(int_info.bits) orelse
             return f.fail("TODO: C backend: implement integer types larger than 128 bits", .{});
-        const prefix_byte: u8 = switch (int_info.signedness) {
-            .signed => 'i',
-            .unsigned => 'u',
-        };
-        try writer.print(" = zig_{s}_{c}{d}", .{ fn_name, prefix_byte, c_bits });
+        try writer.print(" = zig_{s}_{c}{d}", .{ fn_name, signAbbrev(int_info.signedness), c_bits });
     } else if (lhs_ty.isRuntimeFloat()) {
         const c_bits = lhs_ty.floatBits(target);
         try writer.print(" = zig_{s}_f{d}", .{ fn_name, c_bits });
@@ -4085,3 +4071,53 @@ fn toCIntBits(zig_bits: u32) ?u32 {
     }
     return null;
 }
+
+fn signAbbrev(signedness: std.builtin.Signedness) u8 {
+    return switch (signedness) {
+        .signed => 'i',
+        .unsigned => 'u',
+    };
+}
+
+fn intMax(ty: Type, target: std.Target, buf: []u8) []const u8 {
+    switch (ty.tag()) {
+        .c_short => return "SHRT_MAX",
+        .c_ushort => return "USHRT_MAX",
+        .c_int => return "INT_MAX",
+        .c_uint => return "UINT_MAX",
+        .c_long => return "LONG_MAX",
+        .c_ulong => return "ULONG_MAX",
+        .c_longlong => return "LLONG_MAX",
+        .c_ulonglong => return "ULLONG_MAX",
+        else => {
+            const int_info = ty.intInfo(target);
+            const rhs = @intCast(u7, int_info.bits - @boolToInt(int_info.signedness == .signed));
+            const val = (@as(u128, 1) << rhs) - 1;
+            // TODO make this integer literal have a suffix if necessary (such as "ull")
+            return std.fmt.bufPrint(buf, "{}", .{val}) catch |err| switch (err) {
+                error.NoSpaceLeft => unreachable,
+            };
+        },
+    }
+}
+
+fn intMin(ty: Type, target: std.Target, buf: []u8) []const u8 {
+    switch (ty.tag()) {
+        .c_short => return "SHRT_MIN",
+        .c_int => return "INT_MIN",
+        .c_long => return "LONG_MIN",
+        .c_longlong => return "LLONG_MIN",
+        else => {
+            const int_info = ty.intInfo(target);
+            assert(int_info.signedness == .signed);
+            const val = v: {
+                if (int_info.bits == 0) break :v 0;
+                const rhs = @intCast(u7, (int_info.bits - 1));
+                break :v -(@as(i128, 1) << rhs);
+            };
+            return std.fmt.bufPrint(buf, "{d}", .{val}) catch |err| switch (err) {
+                error.NoSpaceLeft => unreachable,
+            };
+        },
+    }
+}
src/link/C/zig.h
@@ -165,8 +165,24 @@
 
 #define int128_t __int128
 #define uint128_t unsigned __int128
+#define UINT128_MAX ((uint128_t)(0xffffffffffffffffull) | 0xffffffffffffffffull)
 ZIG_EXTERN_C void *memcpy (void *ZIG_RESTRICT, const void *ZIG_RESTRICT, size_t);
 ZIG_EXTERN_C void *memset (void *, int, size_t);
+ZIG_EXTERN_C int64_t    __addodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __addoti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __uaddodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __uaddoti4(uint128_t lhs, uint128_t rhs, int *overflow);
+ZIG_EXTERN_C int32_t    __subosi4(int32_t   lhs, int32_t   rhs, int *overflow);
+ZIG_EXTERN_C int64_t    __subodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __suboti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint32_t  __usubosi4(uint32_t  lhs, uint32_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __usubodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __usuboti4(uint128_t lhs, uint128_t rhs, int *overflow);
+ZIG_EXTERN_C int64_t    __mulodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __muloti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __umulodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __umuloti4(uint128_t lhs, uint128_t rhs, int *overflow);
+
 
 static inline uint8_t zig_addw_u8(uint8_t lhs, uint8_t rhs, uint8_t max) {
     uint8_t thresh = max - rhs;
@@ -396,6 +412,811 @@ static inline long long zig_subw_longlong(long long lhs, long long rhs, long lon
     return (long long)(((unsigned long long)lhs) - ((unsigned long long)rhs));
 }
 
+static inline bool zig_addo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs + (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs + (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs + (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_sadd_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_saddl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_saddll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __addodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_addo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_sadd_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_saddl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_saddll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __addoti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_addo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint16_t big_result = (uint16_t)lhs + (uint16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint16_t zig_addo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint32_t big_result = (uint32_t)lhs + (uint32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint32_t zig_addo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint64_t big_result = (uint64_t)lhs + (uint64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint64_t zig_addo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+    overflow = __builtin_uadd_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+    overflow = __builtin_uaddl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+    overflow = __builtin_uaddll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __uaddodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (*res > max && !overflow) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow;
+}
+
+static inline uint128_t zig_addo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    bool overflow;
+    *res = __uaddoti4(lhs, rhs, &overflow);
+    if (*res > max && !overflow) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow;
+}
+
+static inline bool zig_subo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs - (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs - (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs - (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_ssub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_ssubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_ssubll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __subodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_subo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_ssub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_ssubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_ssubll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __suboti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_subo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    return __builtin_usub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    return __builtin_usubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    return __builtin_usubll_overflow(lhs, rhs, res);
+#endif
+    if (rhs > lhs) {
+        *res = max - (rhs - lhs - 1);
+        return true;
+    }
+    *res = lhs - rhs;
+    return false;
+}
+
+static inline uint16_t zig_subo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    return __builtin_usub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    return __builtin_usubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    return __builtin_usubll_overflow(lhs, rhs, res);
+#endif
+    if (rhs > lhs) {
+        *res = max - (rhs - lhs - 1);
+        return true;
+    }
+    *res = lhs - rhs;
+    return false;
+}
+
+static inline uint32_t zig_subo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+    if (max == UINT32_MAX) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+        return __builtin_usub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+        return __builtin_usubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+        return __builtin_usubll_overflow(lhs, rhs, res);
+#endif
+        int int_overflow;
+        *res = __usubosi4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline uint64_t zig_subo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    if (max == UINT64_MAX) {
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+        return __builtin_usub_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+        return __builtin_usubl_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+        return __builtin_usubll_overflow(lhs, rhs, res);
+#else
+        int int_overflow;
+        *res = __usubodi4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+#endif
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline uint128_t zig_subo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    if (max == UINT128_MAX) {
+        int int_overflow;
+        *res = __usuboti4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline bool zig_mulo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs * (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs * (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs * (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_smul_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_smull_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_smulll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __mulodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_mulo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_smul_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_smull_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_smulll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __muloti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_mulo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint16_t big_result = (uint16_t)lhs * (uint16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint16_t zig_mulo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint32_t big_result = (uint32_t)lhs * (uint32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint32_t zig_mulo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, res);
+    }
+#endif
+    uint64_t big_result = (uint64_t)lhs * (uint64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint64_t zig_mulo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+    overflow = __builtin_umul_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+    overflow = __builtin_umull_overflow(lhs, rhs, res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+    overflow = __builtin_umulll_overflow(lhs, rhs, res);
+#else
+    int int_overflow;
+    *res = __umulodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (*res > max && !overflow) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow;
+}
+
+static inline uint128_t zig_mulo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    int overflow;
+    *res = __umuloti4(lhs, rhs, &overflow);
+    if (*res > max && overflow == 0) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow != 0;
+}
+
+static inline bool zig_shlo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+    int16_t big_result = (int16_t)lhs << (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_shlo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+    int32_t big_result = (int32_t)lhs << (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_shlo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+    int64_t big_result = (int64_t)lhs << (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_shlo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    int overflow;
+    *res = __shlodi4(lhs, rhs, &overflow);
+    if (overflow == 0) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow != 0;
+}
+
+static inline bool zig_shlo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    int overflow;
+    *res = __shloti4(lhs, rhs, &overflow);
+    if (overflow == 0) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow != 0;
+}
+
+static inline bool zig_shlo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+    uint16_t big_result = (uint16_t)lhs << (uint16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint16_t zig_shlo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+    uint32_t big_result = (uint32_t)lhs << (uint32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint32_t zig_shlo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+    uint64_t big_result = (uint64_t)lhs << (uint64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint64_t zig_shlo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    int overflow;
+    *res = __ushlodi4(lhs, rhs, &overflow);
+    if (*res > max && overflow == 0) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow != 0;
+}
+
+static inline uint128_t zig_shlo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    int overflow;
+    *res = __ushloti4(lhs, rhs, &overflow);
+    if (*res > max && overflow == 0) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow != 0;
+}
+
 static inline float zig_bitcast_f32_u32(uint32_t arg) {
     float dest;
     memcpy(&dest, &arg, sizeof dest);