Commit 53e6c719ef
Changed files (3)
lib
std
math
lib/std/math/big/int.zig
@@ -2,6 +2,8 @@ const std = @import("../../std.zig");
const math = std.math;
const Limb = std.math.big.Limb;
const limb_bits = @typeInfo(Limb).Int.bits;
+const HalfLimb = std.math.big.HalfLimb;
+const half_limb_bits = @typeInfo(HalfLimb).Int.bits;
const DoubleLimb = std.math.big.DoubleLimb;
const SignedDoubleLimb = std.math.big.SignedDoubleLimb;
const Log2Limb = std.math.big.Log2Limb;
@@ -1335,7 +1337,16 @@ pub const Mutable = struct {
const xy_trailing = math.min(x_trailing, y_trailing);
if (y.len - xy_trailing == 1) {
- lldiv1(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], y.limbs[y.len - 1]);
+ const divisor = y.limbs[y.len - 1];
+
+ // Optimization for small divisor. By using a half limb we can avoid requiring DoubleLimb
+ // divisions in the hot code path. This may often require compiler_rt software-emulation.
+ if (divisor < maxInt(HalfLimb)) {
+ lldiv0p5(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], @intCast(HalfLimb, divisor));
+ } else {
+ lldiv1(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], divisor);
+ }
+
q.normalize(x.len - xy_trailing);
q.positive = q_positive;
@@ -1939,7 +1950,8 @@ pub const Const = struct {
}
} else {
// Non power-of-two: batch divisions per word size.
- const digits_per_limb = math.log(Limb, base, maxInt(Limb));
+ // We use a HalfLimb here so the division uses the faster lldiv0p5 over lldiv1 codepath.
+ const digits_per_limb = math.log(HalfLimb, base, maxInt(HalfLimb));
var limb_base: Limb = 1;
var j: usize = 0;
while (j < digits_per_limb) : (j += 1) {
@@ -3208,6 +3220,30 @@ fn lldiv1(quo: []Limb, rem: *Limb, a: []const Limb, b: Limb) void {
}
}
+fn lldiv0p5(quo: []Limb, rem: *Limb, a: []const Limb, b: HalfLimb) void {
+ @setRuntimeSafety(debug_safety);
+ assert(a.len > 1 or a[0] >= b);
+ assert(quo.len >= a.len);
+
+ rem.* = 0;
+ for (a) |_, ri| {
+ const i = a.len - ri - 1;
+ const ai_high = a[i] >> half_limb_bits;
+ const ai_low = a[i] & ((1 << half_limb_bits) - 1);
+
+ // Split the division into two divisions acting on half a limb each. Carry remainder.
+ const ai_high_with_carry = (rem.* << half_limb_bits) | ai_high;
+ const ai_high_quo = ai_high_with_carry / b;
+ rem.* = ai_high_with_carry % b;
+
+ const ai_low_with_carry = (rem.* << half_limb_bits) | ai_low;
+ const ai_low_quo = ai_low_with_carry / b;
+ rem.* = ai_low_with_carry % b;
+
+ quo[i] = (ai_high_quo << half_limb_bits) | ai_low_quo;
+ }
+}
+
fn llshl(r: []Limb, a: []const Limb, shift: usize) void {
@setRuntimeSafety(debug_safety);
assert(a.len >= 1);
lib/std/math/big/int_test.zig
@@ -1064,7 +1064,7 @@ test "big.int mulWrap large" {
try testing.expect(b.eq(c));
}
-test "big.int div single-single no rem" {
+test "big.int div single-half no rem" {
var a = try Managed.initSet(testing.allocator, 50);
defer a.deinit();
var b = try Managed.initSet(testing.allocator, 5);
@@ -1080,7 +1080,7 @@ test "big.int div single-single no rem" {
try testing.expect((try r.to(u32)) == 0);
}
-test "big.int div single-single with rem" {
+test "big.int div single-half with rem" {
var a = try Managed.initSet(testing.allocator, 49);
defer a.deinit();
var b = try Managed.initSet(testing.allocator, 5);
@@ -1096,6 +1096,39 @@ test "big.int div single-single with rem" {
try testing.expect((try r.to(u32)) == 4);
}
+test "big.int div single-single no rem" {
+ // assumes usize is <= 64 bits.
+ var a = try Managed.initSet(testing.allocator, 1 << 52);
+ defer a.deinit();
+ var b = try Managed.initSet(testing.allocator, 1 << 35);
+ defer b.deinit();
+
+ var q = try Managed.init(testing.allocator);
+ defer q.deinit();
+ var r = try Managed.init(testing.allocator);
+ defer r.deinit();
+ try Managed.divTrunc(&q, &r, a.toConst(), b.toConst());
+
+ try testing.expect((try q.to(u32)) == 131072);
+ try testing.expect((try r.to(u32)) == 0);
+}
+
+test "big.int div single-single with rem" {
+ var a = try Managed.initSet(testing.allocator, (1 << 52) | (1 << 33));
+ defer a.deinit();
+ var b = try Managed.initSet(testing.allocator, (1 << 35));
+ defer b.deinit();
+
+ var q = try Managed.init(testing.allocator);
+ defer q.deinit();
+ var r = try Managed.init(testing.allocator);
+ defer r.deinit();
+ try Managed.divTrunc(&q, &r, a.toConst(), b.toConst());
+
+ try testing.expect((try q.to(u64)) == 131072);
+ try testing.expect((try r.to(u64)) == 8589934592);
+}
+
test "big.int div multi-single no rem" {
const op1 = 0xffffeeeeddddcccc;
const op2 = 34;
lib/std/math/big.zig
@@ -7,6 +7,7 @@ pub const Limb = usize;
const limb_info = @typeInfo(Limb).Int;
pub const SignedLimb = std.meta.Int(.signed, limb_info.bits);
pub const DoubleLimb = std.meta.Int(.unsigned, 2 * limb_info.bits);
+pub const HalfLimb = std.meta.Int(.unsigned, limb_info.bits / 2);
pub const SignedDoubleLimb = std.meta.Int(.signed, 2 * limb_info.bits);
pub const Log2Limb = std.math.Log2Int(Limb);