master
  1const std = @import("std");
  2const builtin = @import("builtin");
  3const Log2Int = std.math.Log2Int;
  4const common = @import("common.zig");
  5const HalveInt = common.HalveInt;
  6
  7const lo = switch (builtin.cpu.arch.endian()) {
  8    .big => 1,
  9    .little => 0,
 10};
 11const hi = 1 - lo;
 12
 13// Let _u1 and _u0 be the high and low limbs of U respectively.
 14// Returns U / v_ and sets r = U % v_.
 15fn divwide_generic(comptime T: type, _u1: T, _u0: T, v_: T, r: *T) T {
 16    const HalfT = HalveInt(T, false).HalfT;
 17    @setRuntimeSafety(common.test_safety);
 18    var v = v_;
 19
 20    const b = @as(T, 1) << (@bitSizeOf(T) / 2);
 21    var un64: T = undefined;
 22    var un10: T = undefined;
 23
 24    const s: Log2Int(T) = @intCast(@clz(v));
 25    if (s > 0) {
 26        // Normalize divisor
 27        v <<= s;
 28        un64 = (_u1 << s) | (_u0 >> @intCast((@bitSizeOf(T) - @as(T, @intCast(s)))));
 29        un10 = _u0 << s;
 30    } else {
 31        // Avoid undefined behavior of (u0 >> @bitSizeOf(T))
 32        un64 = _u1;
 33        un10 = _u0;
 34    }
 35
 36    // Break divisor up into two 32-bit digits
 37    const vn1 = v >> (@bitSizeOf(T) / 2);
 38    const vn0 = v & std.math.maxInt(HalfT);
 39
 40    // Break right half of dividend into two digits
 41    const un1 = un10 >> (@bitSizeOf(T) / 2);
 42    const un0 = un10 & std.math.maxInt(HalfT);
 43
 44    // Compute the first quotient digit, q1
 45    var q1 = un64 / vn1;
 46    var rhat = un64 -% q1 *% vn1;
 47
 48    // q1 has at most error 2. No more than 2 iterations
 49    while (q1 >= b or q1 * vn0 > b * rhat + un1) {
 50        q1 -= 1;
 51        rhat += vn1;
 52        if (rhat >= b) break;
 53    }
 54
 55    const un21 = un64 *% b +% un1 -% q1 *% v;
 56
 57    // Compute the second quotient digit
 58    var q0 = un21 / vn1;
 59    rhat = un21 -% q0 *% vn1;
 60
 61    // q0 has at most error 2. No more than 2 iterations.
 62    while (q0 >= b or q0 * vn0 > b * rhat + un0) {
 63        q0 -= 1;
 64        rhat += vn1;
 65        if (rhat >= b) break;
 66    }
 67
 68    r.* = (un21 *% b +% un0 -% q0 *% v) >> s;
 69    return q1 *% b +% q0;
 70}
 71
 72fn divwide(comptime T: type, _u1: T, _u0: T, v: T, r: *T) T {
 73    @setRuntimeSafety(common.test_safety);
 74    if (T == u64 and builtin.target.cpu.arch == .x86_64 and builtin.target.os.tag != .windows) {
 75        var rem: T = undefined;
 76        const quo = asm (
 77            \\divq %[v]
 78            : [_] "={rax}" (-> T),
 79              [_] "={rdx}" (rem),
 80            : [v] "r" (v),
 81              [_] "{rax}" (_u0),
 82              [_] "{rdx}" (_u1),
 83        );
 84        r.* = rem;
 85        return quo;
 86    } else {
 87        return divwide_generic(T, _u1, _u0, v, r);
 88    }
 89}
 90
 91// Returns a_ / b_ and sets maybe_rem = a_ % b.
 92pub fn udivmod(comptime T: type, a_: T, b_: T, maybe_rem: ?*T) T {
 93    @setRuntimeSafety(common.test_safety);
 94    const HalfT = HalveInt(T, false).HalfT;
 95    const SignedT = std.meta.Int(.signed, @bitSizeOf(T));
 96
 97    if (b_ > a_) {
 98        if (maybe_rem) |rem| {
 99            rem.* = a_;
100        }
101        return 0;
102    }
103
104    const a: [2]HalfT = @bitCast(a_);
105    const b: [2]HalfT = @bitCast(b_);
106    var q: [2]HalfT = undefined;
107    var r: [2]HalfT = undefined;
108
109    // When the divisor fits in 64 bits, we can use an optimized path
110    if (b[hi] == 0) {
111        r[hi] = 0;
112        if (a[hi] < b[lo]) {
113            // The result fits in 64 bits
114            q[hi] = 0;
115            q[lo] = divwide(HalfT, a[hi], a[lo], b[lo], &r[lo]);
116        } else {
117            // First, divide with the high part to get the remainder. After that a_hi < b_lo.
118            q[hi] = a[hi] / b[lo];
119            q[lo] = divwide(HalfT, a[hi] % b[lo], a[lo], b[lo], &r[lo]);
120        }
121        if (maybe_rem) |rem| {
122            rem.* = @bitCast(r);
123        }
124        return @bitCast(q);
125    }
126
127    // 0 <= shift <= 63
128    const shift: Log2Int(T) = @clz(b[hi]) - @clz(a[hi]);
129    var af: T = @bitCast(a);
130    var bf = @as(T, @bitCast(b)) << shift;
131    q = @bitCast(@as(T, 0));
132
133    for (0..shift + 1) |_| {
134        q[lo] <<= 1;
135        // Branchless version of:
136        // if (af >= bf) {
137        //     af -= bf;
138        //     q[lo] |= 1;
139        // }
140        const s = @as(SignedT, @bitCast(bf -% af -% 1)) >> (@bitSizeOf(T) - 1);
141        q[lo] |= @intCast(s & 1);
142        af -= bf & @as(T, @bitCast(s));
143        bf >>= 1;
144    }
145    if (maybe_rem) |rem| {
146        rem.* = @bitCast(af);
147    }
148    return @bitCast(q);
149}