master
  1const std = @import("std");
  2const builtin = @import("builtin");
  3const common = @import("common.zig");
  4const shr = std.math.shr;
  5const shl = std.math.shl;
  6
  7const max_limbs = std.math.divCeil(usize, 65535, 32) catch unreachable; // max supported type is u65535
  8
  9comptime {
 10    @export(&__udivei4, .{ .name = "__udivei4", .linkage = common.linkage, .visibility = common.visibility });
 11    @export(&__umodei4, .{ .name = "__umodei4", .linkage = common.linkage, .visibility = common.visibility });
 12}
 13
 14const endian = builtin.cpu.arch.endian();
 15
 16/// Get the value of a limb.
 17inline fn limb(x: []const u32, i: usize) u32 {
 18    return if (endian == .little) x[i] else x[x.len - 1 - i];
 19}
 20
 21/// Change the value of a limb.
 22inline fn limb_set(x: []u32, i: usize, v: u32) void {
 23    if (endian == .little) {
 24        x[i] = v;
 25    } else {
 26        x[x.len - 1 - i] = v;
 27    }
 28}
 29
 30/// Uses Knuth's Algorithm D, 4.3.1, p. 272.
 31pub fn divmod(q: ?[]u32, r: ?[]u32, u: []const u32, v: []const u32) !void {
 32    if (q) |q_| @memset(q_[0..], 0);
 33    if (r) |r_| @memset(r_[0..], 0);
 34
 35    if (u.len == 0 or v.len == 0) return error.DivisionByZero;
 36
 37    var m = u.len - 1;
 38    var n = v.len - 1;
 39    while (limb(u, m) == 0) : (m -= 1) {
 40        if (m == 0) return;
 41    }
 42    while (limb(v, n) == 0) : (n -= 1) {
 43        if (n == 0) return error.DivisionByZero;
 44    }
 45
 46    if (n > m) {
 47        if (r) |r_| @memcpy(r_[0..u.len], u);
 48        return;
 49    }
 50
 51    const s = @clz(limb(v, n));
 52
 53    var vn: [max_limbs]u32 = undefined;
 54    var i = n;
 55    while (i > 0) : (i -= 1) {
 56        limb_set(&vn, i, shl(u32, limb(v, i), s) | shr(u32, limb(v, i - 1), 32 - s));
 57    }
 58    limb_set(&vn, 0, shl(u32, limb(v, 0), s));
 59
 60    var un: [max_limbs + 1]u32 = undefined;
 61    limb_set(&un, m + 1, shr(u32, limb(u, m), 32 - s));
 62    i = m;
 63    while (i > 0) : (i -= 1) {
 64        limb_set(&un, i, shl(u32, limb(u, i), s) | shr(u32, limb(u, i - 1), 32 - s));
 65    }
 66    limb_set(&un, 0, shl(u32, limb(u, 0), s));
 67
 68    var j = m - n;
 69    while (true) : (j -= 1) {
 70        const uu = (@as(u64, limb(&un, j + n + 1)) << 32) + limb(&un, j + n);
 71        var qhat = uu / limb(&vn, n);
 72        var rhat = uu % limb(&vn, n);
 73
 74        while (true) {
 75            if (qhat >= (1 << 32) or (n > 0 and qhat * limb(&vn, n - 1) > (rhat << 32) + limb(&un, j + n - 1))) {
 76                qhat -= 1;
 77                rhat += limb(&vn, n);
 78                if (rhat < (1 << 32)) continue;
 79            }
 80            break;
 81        }
 82        var carry: i64 = 0;
 83        i = 0;
 84        while (i <= n) : (i += 1) {
 85            const p = qhat * limb(&vn, i);
 86            const t = limb(&un, i + j) - carry - @as(u32, @truncate(p));
 87            limb_set(&un, i + j, @as(u32, @truncate(@as(u64, @bitCast(t)))));
 88            carry = @as(i64, @intCast(p >> 32)) - @as(i64, @intCast(t >> 32));
 89        }
 90        const t = limb(&un, j + n + 1) -% carry;
 91        limb_set(&un, j + n + 1, @as(u32, @truncate(@as(u64, @bitCast(t)))));
 92        if (q) |q_| limb_set(q_, j, @as(u32, @truncate(qhat)));
 93        if (t < 0) {
 94            if (q) |q_| limb_set(q_, j, limb(q_, j) - 1);
 95            var carry2: u64 = 0;
 96            i = 0;
 97            while (i <= n) : (i += 1) {
 98                const t2 = @as(u64, limb(&un, i + j)) + @as(u64, limb(&vn, i)) + carry2;
 99                limb_set(&un, i + j, @as(u32, @truncate(t2)));
100                carry2 = t2 >> 32;
101            }
102            limb_set(&un, j + n + 1, @as(u32, @truncate(limb(&un, j + n + 1) + carry2)));
103        }
104        if (j == 0) break;
105    }
106    if (r) |r_| {
107        i = 0;
108        while (i <= n) : (i += 1) {
109            limb_set(r_, i, shr(u32, limb(&un, i), s) | shl(u32, limb(&un, i + 1), 32 - s));
110        }
111        limb_set(r_, n, shr(u32, limb(&un, n), s));
112    }
113}
114
115pub fn __udivei4(q_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void {
116    @setRuntimeSafety(common.test_safety);
117    const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits));
118    const q: []u32 = @ptrCast(@alignCast(q_p[0..byte_size]));
119    const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size]));
120    const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size]));
121    @call(.always_inline, divmod, .{ q, null, u, v }) catch unreachable;
122}
123
124pub fn __umodei4(r_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void {
125    @setRuntimeSafety(common.test_safety);
126    const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits));
127    const r: []u32 = @ptrCast(@alignCast(r_p[0..byte_size]));
128    const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size]));
129    const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size]));
130    @call(.always_inline, divmod, .{ null, r, u, v }) catch unreachable;
131}
132
133test "__udivei4/__umodei4" {
134    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
135    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
136    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
137
138    const RndGen = std.Random.DefaultPrng;
139    var rnd = RndGen.init(42);
140    var i: usize = 10000;
141    while (i > 0) : (i -= 1) {
142        const u = rnd.random().int(u1000);
143        const v = 1 + rnd.random().int(u1200);
144        const q = u / v;
145        const r = u % v;
146        const z = q * v + r;
147        try std.testing.expect(z == u);
148    }
149}