zig/lib/std/math/big/int.zig at master

   1const std = @import("../../std.zig");
   2const builtin = @import("builtin");
   3const math = std.math;
   4const Limb = std.math.big.Limb;
   5const limb_bits = @typeInfo(Limb).int.bits;
   6const HalfLimb = std.math.big.HalfLimb;
   7const half_limb_bits = @typeInfo(HalfLimb).int.bits;
   8const DoubleLimb = std.math.big.DoubleLimb;
   9const SignedDoubleLimb = std.math.big.SignedDoubleLimb;
  10const Log2Limb = std.math.big.Log2Limb;
  11const Allocator = std.mem.Allocator;
  12const mem = std.mem;
  13const maxInt = std.math.maxInt;
  14const minInt = std.math.minInt;
  15const assert = std.debug.assert;
  16const Endian = std.builtin.Endian;
  17const Signedness = std.builtin.Signedness;
  18const native_endian = builtin.cpu.arch.endian();
  19
  20/// Returns the number of limbs needed to store `scalar`, which must be a
  21/// primitive integer or float value.
  22/// Note: A comptime-known upper bound of this value that may be used
  23/// instead if `scalar` is not already comptime-known is
  24/// `calcTwosCompLimbCount(@typeInfo(@TypeOf(scalar)).int.bits)`
  25pub fn calcLimbLen(scalar: anytype) usize {
  26    switch (@typeInfo(@TypeOf(scalar))) {
  27        .int, .comptime_int => {
  28            if (scalar == 0) return 1;
  29            const w_value = @abs(scalar);
  30            return @as(usize, @intCast(@divFloor(@as(Limb, @intCast(math.log2(w_value))), limb_bits) + 1));
  31        },
  32        .float => {
  33            const repr: std.math.FloatRepr(@TypeOf(scalar)) = @bitCast(scalar);
  34            return switch (repr.exponent) {
  35                .denormal => 1,
  36                else => return calcNonZeroTwosCompLimbCount(@as(usize, 2) + @max(repr.exponent.unbias(), 0)),
  37                .infinite => 0,
  38            };
  39        },
  40        .comptime_float => return calcLimbLen(@as(f128, scalar)),
  41        else => @compileError("expected float or int, got " ++ @typeName(@TypeOf(scalar))),
  42    }
  43}
  44
  45pub fn calcToStringLimbsBufferLen(a_len: usize, base: u8) usize {
  46    if (math.isPowerOfTwo(base))
  47        return 0;
  48    return a_len + 2 + a_len + calcDivLimbsBufferLen(a_len, 1);
  49}
  50
  51pub fn calcDivLimbsBufferLen(a_len: usize, b_len: usize) usize {
  52    return a_len + b_len + 4;
  53}
  54
  55pub fn calcMulLimbsBufferLen(a_len: usize, b_len: usize, aliases: usize) usize {
  56    return aliases * @max(a_len, b_len);
  57}
  58
  59pub fn calcMulWrapLimbsBufferLen(bit_count: usize, a_len: usize, b_len: usize, aliases: usize) usize {
  60    const req_limbs = calcTwosCompLimbCount(bit_count);
  61    return aliases * @min(req_limbs, @max(a_len, b_len));
  62}
  63
  64pub fn calcSetStringLimbsBufferLen(base: u8, string_len: usize) usize {
  65    const limb_count = calcSetStringLimbCount(base, string_len);
  66    return calcMulLimbsBufferLen(limb_count, limb_count, 2);
  67}
  68
  69/// Assumes `string_len` doesn't account for minus signs if the number is negative.
  70pub fn calcSetStringLimbCount(base: u8, string_len: usize) usize {
  71    const base_f: f32 = @floatFromInt(base);
  72    const string_len_f: f32 = @floatFromInt(string_len);
  73    return 1 + @as(usize, @intFromFloat(@ceil(string_len_f * std.math.log2(base_f) / limb_bits)));
  74}
  75
  76pub fn calcPowLimbsBufferLen(a_bit_count: usize, y: usize) usize {
  77    // The 2 accounts for the minimum space requirement for llmulacc
  78    return 2 + (a_bit_count * y + (limb_bits - 1)) / limb_bits;
  79}
  80
  81pub fn calcSqrtLimbsBufferLen(a_bit_count: usize) usize {
  82    const a_limb_count = (a_bit_count - 1) / limb_bits + 1;
  83    const shift = (a_bit_count + 1) / 2;
  84    const u_s_rem_limb_count = 1 + ((shift / limb_bits) + 1);
  85    return a_limb_count + 3 * u_s_rem_limb_count + calcDivLimbsBufferLen(a_limb_count, u_s_rem_limb_count);
  86}
  87
  88/// Compute the number of limbs required to store a 2s-complement number of `bit_count` bits.
  89pub fn calcNonZeroTwosCompLimbCount(bit_count: usize) usize {
  90    assert(bit_count != 0);
  91    return calcTwosCompLimbCount(bit_count);
  92}
  93
  94/// Compute the number of limbs required to store a 2s-complement number of `bit_count` bits.
  95///
  96/// Special cases `bit_count == 0` to return 1. Zero-bit integers can only store the value zero
  97/// and this big integer implementation stores zero using one limb.
  98pub fn calcTwosCompLimbCount(bit_count: usize) usize {
  99    return @max(std.math.divCeil(usize, bit_count, @bitSizeOf(Limb)) catch unreachable, 1);
 100}
 101
 102/// a + b * c + *carry, sets carry to the overflow bits
 103pub fn addMulLimbWithCarry(a: Limb, b: Limb, c: Limb, carry: *Limb) Limb {
 104    // ov1[0] = a + *carry
 105    const ov1 = @addWithOverflow(a, carry.*);
 106
 107    // r2 = b * c
 108    const bc = @as(DoubleLimb, math.mulWide(Limb, b, c));
 109    const r2 = @as(Limb, @truncate(bc));
 110    const c2 = @as(Limb, @truncate(bc >> limb_bits));
 111
 112    // ov2[0] = ov1[0] + r2
 113    const ov2 = @addWithOverflow(ov1[0], r2);
 114
 115    // This never overflows, c1, c3 are either 0 or 1 and if both are 1 then
 116    // c2 is at least <= maxInt(Limb) - 2.
 117    carry.* = ov1[1] + c2 + ov2[1];
 118
 119    return ov2[0];
 120}
 121
 122/// a - b * c - *carry, sets carry to the overflow bits
 123fn subMulLimbWithBorrow(a: Limb, b: Limb, c: Limb, carry: *Limb) Limb {
 124    // ov1[0] = a - *carry
 125    const ov1 = @subWithOverflow(a, carry.*);
 126
 127    // r2 = b * c
 128    const bc = @as(DoubleLimb, std.math.mulWide(Limb, b, c));
 129    const r2 = @as(Limb, @truncate(bc));
 130    const c2 = @as(Limb, @truncate(bc >> limb_bits));
 131
 132    // ov2[0] = ov1[0] - r2
 133    const ov2 = @subWithOverflow(ov1[0], r2);
 134    carry.* = ov1[1] + c2 + ov2[1];
 135
 136    return ov2[0];
 137}
 138
 139/// Used to indicate either limit of a 2s-complement integer.
 140pub const TwosCompIntLimit = enum {
 141    // The low limit, either 0x00 (unsigned) or (-)0x80 (signed) for an 8-bit integer.
 142    min,
 143
 144    // The high limit, either 0xFF (unsigned) or 0x7F (signed) for an 8-bit integer.
 145    max,
 146};
 147
 148pub const Round = enum {
 149    /// Round to the nearest representable value, with ties broken by the representation
 150    /// that ends with a 0 bit.
 151    nearest_even,
 152    /// Round away from zero.
 153    away,
 154    /// Round towards zero.
 155    trunc,
 156    /// Round towards negative infinity.
 157    floor,
 158    /// Round towards positive infinity.
 159    ceil,
 160};
 161
 162pub const Exactness = enum { inexact, exact };
 163
 164/// A arbitrary-precision big integer, with a fixed set of mutable limbs.
 165pub const Mutable = struct {
 166    /// Raw digits. These are:
 167    ///
 168    /// * Little-endian ordered
 169    /// * limbs.len >= 1
 170    /// * Zero is represented as limbs.len == 1 with limbs[0] == 0.
 171    ///
 172    /// Accessing limbs directly should be avoided.
 173    /// These are allocated limbs; the `len` field tells the valid range.
 174    limbs: []Limb,
 175    len: usize,
 176    positive: bool,
 177
 178    pub fn toConst(self: Mutable) Const {
 179        return .{
 180            .limbs = self.limbs[0..self.len],
 181            .positive = self.positive,
 182        };
 183    }
 184
 185    pub const ConvertError = Const.ConvertError;
 186
 187    /// Convert `self` to `Int`.
 188    ///
 189    /// Returns an error if self cannot be narrowed into the requested type without truncation.
 190    pub fn toInt(self: Mutable, comptime Int: type) ConvertError!Int {
 191        return self.toConst().toInt(Int);
 192    }
 193
 194    /// Convert `self` to `Float`.
 195    pub fn toFloat(self: Mutable, comptime Float: type, round: Round) struct { Float, Exactness } {
 196        return self.toConst().toFloat(Float, round);
 197    }
 198
 199    /// Returns true if `a == 0`.
 200    pub fn eqlZero(self: Mutable) bool {
 201        return self.toConst().eqlZero();
 202    }
 203
 204    /// Asserts that the allocator owns the limbs memory. If this is not the case,
 205    /// use `toConst().toManaged()`.
 206    pub fn toManaged(self: Mutable, allocator: Allocator) Managed {
 207        return .{
 208            .allocator = allocator,
 209            .limbs = self.limbs,
 210            .metadata = if (self.positive)
 211                self.len & ~Managed.sign_bit
 212            else
 213                self.len | Managed.sign_bit,
 214        };
 215    }
 216
 217    /// `value` is a primitive integer type.
 218    /// Asserts the value fits within the provided `limbs_buffer`.
 219    /// Note: `calcLimbLen` can be used to figure out how big an array to allocate for `limbs_buffer`.
 220    pub fn init(limbs_buffer: []Limb, value: anytype) Mutable {
 221        limbs_buffer[0] = 0;
 222        var self: Mutable = .{
 223            .limbs = limbs_buffer,
 224            .len = 1,
 225            .positive = true,
 226        };
 227        self.set(value);
 228        return self;
 229    }
 230
 231    /// Copies the value of a Const to an existing Mutable so that they both have the same value.
 232    /// Asserts the value fits in the limbs buffer.
 233    pub fn copy(self: *Mutable, other: Const) void {
 234        if (self.limbs.ptr != other.limbs.ptr) {
 235            @memcpy(self.limbs[0..other.limbs.len], other.limbs[0..other.limbs.len]);
 236        }
 237        // Normalize before setting `positive` so the `eqlZero` doesn't need to iterate
 238        // over the extra zero limbs.
 239        self.normalize(other.limbs.len);
 240        self.positive = other.positive or other.eqlZero();
 241    }
 242
 243    /// Efficiently swap an Mutable with another. This swaps the limb pointers and a full copy is not
 244    /// performed. The address of the limbs field will not be the same after this function.
 245    pub fn swap(self: *Mutable, other: *Mutable) void {
 246        mem.swap(Mutable, self, other);
 247    }
 248
 249    pub fn dump(self: Mutable) void {
 250        for (self.limbs[0..self.len]) |limb| {
 251            std.debug.print("{x} ", .{limb});
 252        }
 253        std.debug.print("len={} capacity={} positive={}\n", .{ self.len, self.limbs.len, self.positive });
 254    }
 255
 256    /// Clones an Mutable and returns a new Mutable with the same value. The new Mutable is a deep copy and
 257    /// can be modified separately from the original.
 258    /// Asserts that limbs is big enough to store the value.
 259    pub fn clone(other: Mutable, limbs: []Limb) Mutable {
 260        @memcpy(limbs[0..other.len], other.limbs[0..other.len]);
 261        return .{
 262            .limbs = limbs,
 263            .len = other.len,
 264            .positive = other.positive,
 265        };
 266    }
 267
 268    pub fn negate(self: *Mutable) void {
 269        self.positive = !self.positive;
 270    }
 271
 272    /// Modify to become the absolute value
 273    pub fn abs(self: *Mutable) void {
 274        self.positive = true;
 275    }
 276
 277    /// Sets the Mutable to value. Value must be an primitive integer type.
 278    /// Asserts the value fits within the limbs buffer.
 279    /// Note: `calcLimbLen` can be used to figure out how big the limbs buffer
 280    /// needs to be to store a specific value.
 281    pub fn set(self: *Mutable, value: anytype) void {
 282        const T = @TypeOf(value);
 283        const needed_limbs = calcLimbLen(value);
 284        assert(needed_limbs <= self.limbs.len); // value too big
 285
 286        self.len = needed_limbs;
 287        self.positive = value >= 0;
 288
 289        switch (@typeInfo(T)) {
 290            .int => |info| {
 291                var w_value = @abs(value);
 292
 293                if (info.bits <= limb_bits) {
 294                    self.limbs[0] = w_value;
 295                } else {
 296                    var i: usize = 0;
 297                    while (true) : (i += 1) {
 298                        self.limbs[i] = @as(Limb, @truncate(w_value));
 299                        w_value >>= limb_bits;
 300
 301                        if (w_value == 0) break;
 302                    }
 303                }
 304            },
 305            .comptime_int => {
 306                comptime var w_value = @abs(value);
 307
 308                if (w_value <= maxInt(Limb)) {
 309                    self.limbs[0] = w_value;
 310                } else {
 311                    const mask = (1 << limb_bits) - 1;
 312
 313                    comptime var i = 0;
 314                    inline while (true) : (i += 1) {
 315                        self.limbs[i] = w_value & mask;
 316                        w_value >>= limb_bits;
 317
 318                        if (w_value == 0) break;
 319                    }
 320                }
 321            },
 322            else => @compileError("cannot set Mutable using type " ++ @typeName(T)),
 323        }
 324    }
 325
 326    /// Set self from the string representation `value`.
 327    ///
 328    /// `value` must contain only digits <= `base` and is case insensitive.  Base prefixes are
 329    /// not allowed (e.g. 0x43 should simply be 43).  Underscores in the input string are
 330    /// ignored and can be used as digit separators.
 331    ///
 332    /// Asserts there is enough memory for the value in `self.limbs`. An upper bound on number of limbs can
 333    /// be determined with `calcSetStringLimbCount`.
 334    /// Asserts the base is in the range [2, 36].
 335    ///
 336    /// Returns an error if the value has invalid digits for the requested base.
 337    ///
 338    /// `limbs_buffer` is used for temporary storage. The size required can be found with
 339    /// `calcSetStringLimbsBufferLen`.
 340    ///
 341    /// If `allocator` is provided, it will be used for temporary storage to improve
 342    /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
 343    pub fn setString(
 344        self: *Mutable,
 345        base: u8,
 346        value: []const u8,
 347        limbs_buffer: []Limb,
 348        allocator: ?Allocator,
 349    ) error{InvalidCharacter}!void {
 350        assert(base >= 2);
 351        assert(base <= 36);
 352
 353        var i: usize = 0;
 354        var positive = true;
 355        if (value.len > 0 and value[0] == '-') {
 356            positive = false;
 357            i += 1;
 358        }
 359
 360        const ap_base: Const = .{ .limbs = &[_]Limb{base}, .positive = true };
 361        self.set(0);
 362
 363        for (value[i..]) |ch| {
 364            if (ch == '_') {
 365                continue;
 366            }
 367            const d = try std.fmt.charToDigit(ch, base);
 368            const ap_d: Const = .{ .limbs = &[_]Limb{d}, .positive = true };
 369
 370            self.mul(self.toConst(), ap_base, limbs_buffer, allocator);
 371            self.add(self.toConst(), ap_d);
 372        }
 373        self.positive = positive;
 374    }
 375
 376    /// Set self to either bound of a 2s-complement integer.
 377    /// Note: The result is still sign-magnitude, not twos complement! In order to convert the
 378    /// result to twos complement, it is sufficient to take the absolute value.
 379    ///
 380    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
 381    /// r is `calcTwosCompLimbCount(bit_count)`.
 382    pub fn setTwosCompIntLimit(
 383        r: *Mutable,
 384        limit: TwosCompIntLimit,
 385        signedness: Signedness,
 386        bit_count: usize,
 387    ) void {
 388        // Handle zero-bit types.
 389        if (bit_count == 0) {
 390            r.set(0);
 391            return;
 392        }
 393
 394        const req_limbs = calcTwosCompLimbCount(bit_count);
 395        const bit: Log2Limb = @truncate(bit_count - 1);
 396        const signmask = @as(Limb, 1) << bit; // 0b0..010..0 where 1 is the sign bit.
 397        const mask = (signmask << 1) -% 1; // 0b0..011..1 where the leftmost 1 is the sign bit.
 398
 399        r.positive = true;
 400
 401        switch (signedness) {
 402            .signed => switch (limit) {
 403                .min => {
 404                    // Negative bound, signed = -0x80.
 405                    r.len = req_limbs;
 406                    @memset(r.limbs[0 .. r.len - 1], 0);
 407                    r.limbs[r.len - 1] = signmask;
 408                    r.positive = false;
 409                },
 410                .max => {
 411                    // Positive bound, signed = 0x7F
 412                    // Note, in this branch we need to normalize because the first bit is
 413                    // supposed to be 0.
 414
 415                    // Special case for 1-bit integers.
 416                    if (bit_count == 1) {
 417                        r.set(0);
 418                    } else {
 419                        const new_req_limbs = calcTwosCompLimbCount(bit_count - 1);
 420                        const msb = @as(Log2Limb, @truncate(bit_count - 2));
 421                        const new_signmask = @as(Limb, 1) << msb; // 0b0..010..0 where 1 is the sign bit.
 422                        const new_mask = (new_signmask << 1) -% 1; // 0b0..001..1 where the rightmost 0 is the sign bit.
 423
 424                        r.len = new_req_limbs;
 425                        @memset(r.limbs[0 .. r.len - 1], maxInt(Limb));
 426                        r.limbs[r.len - 1] = new_mask;
 427                    }
 428                },
 429            },
 430            .unsigned => switch (limit) {
 431                .min => {
 432                    // Min bound, unsigned = 0x00
 433                    r.set(0);
 434                },
 435                .max => {
 436                    // Max bound, unsigned = 0xFF
 437                    r.len = req_limbs;
 438                    @memset(r.limbs[0 .. r.len - 1], maxInt(Limb));
 439                    r.limbs[r.len - 1] = mask;
 440                },
 441            },
 442        }
 443    }
 444
 445    /// Sets the Mutable to a float value rounded according to `round`.
 446    /// Returns whether the conversion was exact (`round` had no effect on the result).
 447    pub fn setFloat(self: *Mutable, value: anytype, round: Round) Exactness {
 448        const Float = @TypeOf(value);
 449        if (Float == comptime_float) return self.setFloat(@as(f128, value), round);
 450        const abs_value = @abs(value);
 451        if (abs_value < 1.0) {
 452            if (abs_value == 0.0) {
 453                self.set(0);
 454                return .exact;
 455            }
 456            self.set(@as(i2, round: switch (round) {
 457                .nearest_even => if (abs_value <= 0.5) 0 else continue :round .away,
 458                .away => if (value < 0.0) -1 else 1,
 459                .trunc => 0,
 460                .floor => -@as(i2, @intFromBool(value < 0.0)),
 461                .ceil => @intFromBool(value > 0.0),
 462            }));
 463            return .inexact;
 464        }
 465        const Repr = std.math.FloatRepr(Float);
 466        const repr: Repr = @bitCast(value);
 467        const exponent = repr.exponent.unbias();
 468        assert(exponent >= 0);
 469        const int_bit: Repr.Mantissa = 1 << (@bitSizeOf(Repr.Mantissa) - 1);
 470        const mantissa = int_bit | repr.mantissa;
 471        if (exponent >= @bitSizeOf(Repr.Normalized.Fraction)) {
 472            self.set(mantissa);
 473            self.shiftLeft(self.toConst(), @intCast(exponent - @bitSizeOf(Repr.Normalized.Fraction)));
 474            self.positive = repr.sign == .positive;
 475            return .exact;
 476        }
 477        self.set(mantissa >> @intCast(@bitSizeOf(Repr.Normalized.Fraction) - exponent));
 478        const round_bits: Repr.Normalized.Fraction = @truncate(mantissa << @intCast(exponent));
 479        if (round_bits == 0) {
 480            self.positive = repr.sign == .positive;
 481            return .exact;
 482        }
 483        round: switch (round) {
 484            .nearest_even => {
 485                const half: Repr.Normalized.Fraction = 1 << (@bitSizeOf(Repr.Normalized.Fraction) - 1);
 486                if (round_bits >= half) self.addScalar(self.toConst(), 1);
 487                if (round_bits == half) self.limbs[0] &= ~@as(Limb, 1);
 488            },
 489            .away => self.addScalar(self.toConst(), 1),
 490            .trunc => {},
 491            .floor => switch (repr.sign) {
 492                .positive => {},
 493                .negative => continue :round .away,
 494            },
 495            .ceil => switch (repr.sign) {
 496                .positive => continue :round .away,
 497                .negative => {},
 498            },
 499        }
 500        self.positive = repr.sign == .positive;
 501        return .inexact;
 502    }
 503
 504    /// r = a + scalar
 505    ///
 506    /// r and a may be aliases.
 507    /// scalar is a primitive integer type.
 508    ///
 509    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
 510    /// r is `@max(a.limbs.len, calcLimbLen(scalar)) + 1`.
 511    pub fn addScalar(r: *Mutable, a: Const, scalar: anytype) void {
 512        // Normally we could just determine the number of limbs needed with calcLimbLen,
 513        // but that is not comptime-known when scalar is not a comptime_int.  Instead, we
 514        // use calcTwosCompLimbCount for a non-comptime_int scalar, which can be pessimistic
 515        // in the case that scalar happens to be small in magnitude within its type, but it
 516        // is well worth being able to use the stack and not needing an allocator passed in.
 517        // Note that Mutable.init still sets len to calcLimbLen(scalar) in any case.
 518        const limbs_len = comptime switch (@typeInfo(@TypeOf(scalar))) {
 519            .comptime_int => calcLimbLen(scalar),
 520            .int => |info| calcTwosCompLimbCount(info.bits),
 521            else => @compileError("expected scalar to be an int"),
 522        };
 523        var limbs: [limbs_len]Limb = undefined;
 524        const operand = init(&limbs, scalar).toConst();
 525        return add(r, a, operand);
 526    }
 527
 528    /// Base implementation for addition. Adds `@max(a.limbs.len, b.limbs.len)` elements from a and b,
 529    /// and returns whether any overflow occurred.
 530    /// r, a and b may be aliases.
 531    ///
 532    /// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`.
 533    fn addCarry(r: *Mutable, a: Const, b: Const) bool {
 534        if (a.eqlZero()) {
 535            r.copy(b);
 536            return false;
 537        } else if (b.eqlZero()) {
 538            r.copy(a);
 539            return false;
 540        } else if (a.positive != b.positive) {
 541            if (a.positive) {
 542                // (a) + (-b) => a - b
 543                return r.subCarry(a, b.abs());
 544            } else {
 545                // (-a) + (b) => b - a
 546                return r.subCarry(b, a.abs());
 547            }
 548        } else {
 549            r.positive = a.positive;
 550            if (a.limbs.len >= b.limbs.len) {
 551                const c = lladdcarry(r.limbs, a.limbs, b.limbs);
 552                r.normalize(a.limbs.len);
 553                return c != 0;
 554            } else {
 555                const c = lladdcarry(r.limbs, b.limbs, a.limbs);
 556                r.normalize(b.limbs.len);
 557                return c != 0;
 558            }
 559        }
 560    }
 561
 562    /// r = a + b
 563    ///
 564    /// r, a and b may be aliases.
 565    ///
 566    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
 567    /// r is `@max(a.limbs.len, b.limbs.len) + 1`.
 568    pub fn add(r: *Mutable, a: Const, b: Const) void {
 569        if (r.addCarry(a, b)) {
 570            // Fix up the result. Note that addCarry normalizes by a.limbs.len or b.limbs.len,
 571            // so we need to set the length here.
 572            const msl = @max(a.limbs.len, b.limbs.len);
 573            // `[add|sub]Carry` normalizes by `msl`, so we need to fix up the result manually here.
 574            // Note, the fact that it normalized means that the intermediary limbs are zero here.
 575            r.len = msl + 1;
 576            r.limbs[msl] = 1; // If this panics, there wasn't enough space in `r`.
 577        }
 578    }
 579
 580    /// r = a + b with 2s-complement wrapping semantics. Returns whether overflow occurred.
 581    /// r, a and b may be aliases
 582    ///
 583    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
 584    /// r is `calcTwosCompLimbCount(bit_count)`.
 585    pub fn addWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool {
 586        const req_limbs = calcTwosCompLimbCount(bit_count);
 587
 588        // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
 589        // if an overflow occurred.
 590        const x = Const{
 591            .positive = a.positive,
 592            .limbs = a.limbs[0..@min(req_limbs, a.limbs.len)],
 593        };
 594
 595        const y = Const{
 596            .positive = b.positive,
 597            .limbs = b.limbs[0..@min(req_limbs, b.limbs.len)],
 598        };
 599
 600        var carry_truncated = false;
 601        if (r.addCarry(x, y)) {
 602            // There are two possibilities here:
 603            // - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by
 604            //   truncate anyway.
 605            // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
 606            //   Note: after this we still might need to wrap.
 607            const msl = @max(a.limbs.len, b.limbs.len);
 608            if (msl < req_limbs) {
 609                r.limbs[msl] = 1;
 610                r.len = req_limbs;
 611                @memset(r.limbs[msl + 1 .. req_limbs], 0);
 612            } else {
 613                carry_truncated = true;
 614            }
 615        }
 616
 617        if (!r.toConst().fitsInTwosComp(signedness, bit_count)) {
 618            r.truncate(r.toConst(), signedness, bit_count);
 619            return true;
 620        }
 621
 622        return carry_truncated;
 623    }
 624
 625    /// r = a + b with 2s-complement saturating semantics.
 626    /// r, a and b may be aliases.
 627    ///
 628    /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
 629    /// r is `calcTwosCompLimbCount(bit_count)`.
 630    pub fn addSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void {
 631        const req_limbs = calcTwosCompLimbCount(bit_count);
 632
 633        // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
 634        // if an overflow occurred.
 635        const x = Const{
 636            .positive = a.positive,
 637            .limbs = a.limbs[0..@min(req_limbs, a.limbs.len)],
 638        };
 639
 640        const y = Const{
 641            .positive = b.positive,
 642            .limbs = b.limbs[0..@min(req_limbs, b.limbs.len)],
 643        };
 644
 645        if (r.addCarry(x, y)) {
 646            // There are two possibilities here:
 647            // - We overflowed req_limbs, in which case we need to saturate.
 648            // - a and b had less elements than req_limbs, and those were overflowed.
 649            //   Note: In this case, might _also_ need to saturate.
 650            const msl = @max(a.limbs.len, b.limbs.len);
 651            if (msl < req_limbs) {
 652                r.limbs[msl] = 1;
 653                r.len = req_limbs;
 654                // Note: Saturation may still be required if msl == req_limbs - 1
 655            } else {
 656                // Overflowed req_limbs, definitely saturate.
 657                r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
 658            }
 659        }
 660
 661        // Saturate if the result didn't fit.
 662        r.saturate(r.toConst(), signedness, bit_count);
 663    }
 664
 665    /// Base implementation for subtraction. Subtracts `@max(a.limbs.len, b.limbs.len)` elements from a and b,
 666    /// and returns whether any overflow occurred.
 667    /// r, a and b may be aliases.
 668    ///
 669    /// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`.
 670    fn subCarry(r: *Mutable, a: Const, b: Const) bool {
 671        if (a.eqlZero()) {
 672            r.copy(b);
 673            r.positive = !b.positive;
 674            return false;
 675        } else if (b.eqlZero()) {
 676            r.copy(a);
 677            return false;
 678        } else if (a.positive != b.positive) {
 679            if (a.positive) {
 680                // (a) - (-b) => a + b
 681                return r.addCarry(a, b.abs());
 682            } else {
 683                // (-a) - (b) => -a + -b
 684                return r.addCarry(a, b.negate());
 685            }
 686        } else if (a.positive) {
 687            if (a.order(b) != .lt) {
 688                // (a) - (b) => a - b
 689                const c = llsubcarry(r.limbs, a.limbs, b.limbs);
 690                r.normalize(a.limbs.len);
 691                r.positive = true;
 692                return c != 0;
 693            } else {
 694                // (a) - (b) => -b + a => -(b - a)
 695                const c = llsubcarry(r.limbs, b.limbs, a.limbs);
 696                r.normalize(b.limbs.len);
 697                r.positive = false;
 698                return c != 0;
 699            }
 700        } else {
 701            if (a.order(b) == .lt) {
 702                // (-a) - (-b) => -(a - b)
 703                const c = llsubcarry(r.limbs, a.limbs, b.limbs);
 704                r.normalize(a.limbs.len);
 705                r.positive = false;
 706                return c != 0;
 707            } else {
 708                // (-a) - (-b) => --b + -a => b - a
 709                const c = llsubcarry(r.limbs, b.limbs, a.limbs);
 710                r.normalize(b.limbs.len);
 711                r.positive = true;
 712                return c != 0;
 713            }
 714        }
 715    }
 716
 717    /// r = a - b
 718    ///
 719    /// r, a and b may be aliases.
 720    ///
 721    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
 722    /// r is `@max(a.limbs.len, b.limbs.len) + 1`. The +1 is not needed if both operands are positive.
 723    pub fn sub(r: *Mutable, a: Const, b: Const) void {
 724        r.add(a, b.negate());
 725    }
 726
 727    /// r = a - b with 2s-complement wrapping semantics. Returns whether any overflow occurred.
 728    ///
 729    /// r, a and b may be aliases
 730    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
 731    /// r is `calcTwosCompLimbCount(bit_count)`.
 732    pub fn subWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool {
 733        return r.addWrap(a, b.negate(), signedness, bit_count);
 734    }
 735
 736    /// r = a - b with 2s-complement saturating semantics.
 737    /// r, a and b may be aliases.
 738    ///
 739    /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
 740    /// r is `calcTwosCompLimbCount(bit_count)`.
 741    pub fn subSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void {
 742        r.addSat(a, b.negate(), signedness, bit_count);
 743    }
 744
 745    /// rma = a * b
 746    ///
 747    /// `rma` may alias with `a` or `b`.
 748    /// `a` and `b` may alias with each other.
 749    ///
 750    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
 751    /// rma is given by `a.limbs.len + b.limbs.len`.
 752    ///
 753    /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulLimbsBufferLen`.
 754    pub fn mul(rma: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, allocator: ?Allocator) void {
 755        var buf_index: usize = 0;
 756
 757        const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: {
 758            const start = buf_index;
 759            @memcpy(limbs_buffer[buf_index..][0..a.limbs.len], a.limbs);
 760            buf_index += a.limbs.len;
 761            break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
 762        } else a;
 763
 764        const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: {
 765            const start = buf_index;
 766            @memcpy(limbs_buffer[buf_index..][0..b.limbs.len], b.limbs);
 767            buf_index += b.limbs.len;
 768            break :blk b.toMutable(limbs_buffer[start..buf_index]).toConst();
 769        } else b;
 770
 771        return rma.mulNoAlias(a_copy, b_copy, allocator);
 772    }
 773
 774    /// rma = a * b
 775    ///
 776    /// `rma` may not alias with `a` or `b`.
 777    /// `a` and `b` may alias with each other.
 778    ///
 779    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
 780    /// rma is given by `a.limbs.len + b.limbs.len`.
 781    ///
 782    /// If `allocator` is provided, it will be used for temporary storage to improve
 783    /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
 784    pub fn mulNoAlias(rma: *Mutable, a: Const, b: Const, allocator: ?Allocator) void {
 785        assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
 786        assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing
 787
 788        if (a.limbs.len == 1 and b.limbs.len == 1) {
 789            rma.limbs[0], const overflow_bit = @mulWithOverflow(a.limbs[0], b.limbs[0]);
 790            if (overflow_bit == 0) {
 791                rma.len = 1;
 792                rma.positive = (a.positive == b.positive) or rma.limbs[0] == 0;
 793                return;
 794            }
 795        }
 796
 797        @memset(rma.limbs[0 .. a.limbs.len + b.limbs.len], 0);
 798
 799        llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs);
 800
 801        rma.normalize(a.limbs.len + b.limbs.len);
 802        rma.positive = (a.positive == b.positive);
 803    }
 804
 805    /// rma = a * b with 2s-complement wrapping semantics.
 806    ///
 807    /// `rma` may alias with `a` or `b`.
 808    /// `a` and `b` may alias with each other.
 809    ///
 810    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
 811    /// rma is given by `a.limbs.len + b.limbs.len`.
 812    ///
 813    /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulWrapLimbsBufferLen`.
 814    pub fn mulWrap(
 815        rma: *Mutable,
 816        a: Const,
 817        b: Const,
 818        signedness: Signedness,
 819        bit_count: usize,
 820        limbs_buffer: []Limb,
 821        allocator: ?Allocator,
 822    ) void {
 823        var buf_index: usize = 0;
 824        const req_limbs = calcTwosCompLimbCount(bit_count);
 825
 826        const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: {
 827            const start = buf_index;
 828            const a_len = @min(req_limbs, a.limbs.len);
 829            @memcpy(limbs_buffer[buf_index..][0..a_len], a.limbs[0..a_len]);
 830            buf_index += a_len;
 831            break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
 832        } else a;
 833
 834        const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: {
 835            const start = buf_index;
 836            const b_len = @min(req_limbs, b.limbs.len);
 837            @memcpy(limbs_buffer[buf_index..][0..b_len], b.limbs[0..b_len]);
 838            buf_index += b_len;
 839            break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
 840        } else b;
 841
 842        return rma.mulWrapNoAlias(a_copy, b_copy, signedness, bit_count, allocator);
 843    }
 844
 845    /// rma = a * b with 2s-complement wrapping semantics.
 846    ///
 847    /// `rma` may not alias with `a` or `b`.
 848    /// `a` and `b` may alias with each other.
 849    ///
 850    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
 851    /// rma is given by `a.limbs.len + b.limbs.len`.
 852    ///
 853    /// If `allocator` is provided, it will be used for temporary storage to improve
 854    /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
 855    pub fn mulWrapNoAlias(
 856        rma: *Mutable,
 857        a: Const,
 858        b: Const,
 859        signedness: Signedness,
 860        bit_count: usize,
 861        allocator: ?Allocator,
 862    ) void {
 863        assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
 864        assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing
 865
 866        const req_limbs = calcTwosCompLimbCount(bit_count);
 867
 868        // We can ignore the upper bits here, those results will be discarded anyway.
 869        const a_limbs = a.limbs[0..@min(req_limbs, a.limbs.len)];
 870        const b_limbs = b.limbs[0..@min(req_limbs, b.limbs.len)];
 871
 872        @memset(rma.limbs[0..req_limbs], 0);
 873
 874        llmulacc(.add, allocator, rma.limbs, a_limbs, b_limbs);
 875        rma.normalize(@min(req_limbs, a.limbs.len + b.limbs.len));
 876        rma.positive = (a.positive == b.positive);
 877        rma.truncate(rma.toConst(), signedness, bit_count);
 878    }
 879
 880    /// r = @bitReverse(a) with 2s-complement semantics.
 881    /// r and a may be aliases.
 882    ///
 883    /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by
 884    /// r is `calcTwosCompLimbCount(bit_count)`.
 885    pub fn bitReverse(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
 886        if (bit_count == 0) return;
 887
 888        r.copy(a);
 889
 890        const limbs_required = calcTwosCompLimbCount(bit_count);
 891
 892        if (!a.positive) {
 893            r.positive = true; // Negate.
 894            r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
 895            r.addScalar(r.toConst(), 1); // Add one.
 896        } else if (limbs_required > a.limbs.len) {
 897            // Zero-extend to our output length
 898            for (r.limbs[a.limbs.len..limbs_required]) |*limb| {
 899                limb.* = 0;
 900            }
 901            r.len = limbs_required;
 902        }
 903
 904        // 0b0..01..1000 with @log2(@sizeOf(Limb)) consecutive ones
 905        const endian_mask: usize = (@sizeOf(Limb) - 1) << 3;
 906
 907        const bytes = std.mem.sliceAsBytes(r.limbs);
 908
 909        var k: usize = 0;
 910        while (k < ((bit_count + 1) / 2)) : (k += 1) {
 911            var i = k;
 912            var rev_i = bit_count - i - 1;
 913
 914            // This "endian mask" remaps a low (LE) byte to the corresponding high
 915            // (BE) byte in the Limb, without changing which limbs we are indexing
 916            if (native_endian == .big) {
 917                i ^= endian_mask;
 918                rev_i ^= endian_mask;
 919            }
 920
 921            const bit_i = std.mem.readPackedInt(u1, bytes, i, .little);
 922            const bit_rev_i = std.mem.readPackedInt(u1, bytes, rev_i, .little);
 923            std.mem.writePackedInt(u1, bytes, i, bit_rev_i, .little);
 924            std.mem.writePackedInt(u1, bytes, rev_i, bit_i, .little);
 925        }
 926
 927        // Calculate signed-magnitude representation for output
 928        if (signedness == .signed) {
 929            const last_bit = switch (native_endian) {
 930                .little => std.mem.readPackedInt(u1, bytes, bit_count - 1, .little),
 931                .big => std.mem.readPackedInt(u1, bytes, (bit_count - 1) ^ endian_mask, .little),
 932            };
 933            if (last_bit == 1) {
 934                r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
 935                r.addScalar(r.toConst(), 1); // Add one.
 936                r.positive = false; // Negate.
 937            }
 938        }
 939        r.normalize(r.len);
 940    }
 941
 942    /// r = @byteSwap(a) with 2s-complement semantics.
 943    /// r and a may be aliases.
 944    ///
 945    /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by
 946    /// r is `calcTwosCompLimbCount(8*byte_count)`.
 947    pub fn byteSwap(r: *Mutable, a: Const, signedness: Signedness, byte_count: usize) void {
 948        if (byte_count == 0) return;
 949
 950        r.copy(a);
 951        const limbs_required = calcTwosCompLimbCount(8 * byte_count);
 952
 953        if (!a.positive) {
 954            r.positive = true; // Negate.
 955            r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT.
 956            r.addScalar(r.toConst(), 1); // Add one.
 957        } else if (limbs_required > a.limbs.len) {
 958            // Zero-extend to our output length
 959            for (r.limbs[a.limbs.len..limbs_required]) |*limb| {
 960                limb.* = 0;
 961            }
 962            r.len = limbs_required;
 963        }
 964
 965        // 0b0..01..1 with @log2(@sizeOf(Limb)) trailing ones
 966        const endian_mask: usize = @sizeOf(Limb) - 1;
 967
 968        var bytes = std.mem.sliceAsBytes(r.limbs);
 969        assert(bytes.len >= byte_count);
 970
 971        var k: usize = 0;
 972        while (k < (byte_count + 1) / 2) : (k += 1) {
 973            var i = k;
 974            var rev_i = byte_count - k - 1;
 975
 976            // This "endian mask" remaps a low (LE) byte to the corresponding high
 977            // (BE) byte in the Limb, without changing which limbs we are indexing
 978            if (native_endian == .big) {
 979                i ^= endian_mask;
 980                rev_i ^= endian_mask;
 981            }
 982
 983            const byte_i = bytes[i];
 984            const byte_rev_i = bytes[rev_i];
 985            bytes[rev_i] = byte_i;
 986            bytes[i] = byte_rev_i;
 987        }
 988
 989        // Calculate signed-magnitude representation for output
 990        if (signedness == .signed) {
 991            const last_byte = switch (native_endian) {
 992                .little => bytes[byte_count - 1],
 993                .big => bytes[(byte_count - 1) ^ endian_mask],
 994            };
 995
 996            if (last_byte & (1 << 7) != 0) { // Check sign bit of last byte
 997                r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT.
 998                r.addScalar(r.toConst(), 1); // Add one.
 999                r.positive = false; // Negate.
1000            }
1001        }
1002        r.normalize(r.len);
1003    }
1004
1005    /// r = @popCount(a) with 2s-complement semantics.
1006    /// r and a may be aliases.
1007    ///
1008    /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
1009    /// r is `calcTwosCompLimbCount(bit_count)`.
1010    pub fn popCount(r: *Mutable, a: Const, bit_count: usize) void {
1011        r.copy(a);
1012
1013        if (!a.positive) {
1014            r.positive = true; // Negate.
1015            r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
1016            r.addScalar(r.toConst(), 1); // Add one.
1017        }
1018
1019        var sum: Limb = 0;
1020        for (r.limbs[0..r.len]) |limb| {
1021            sum += @popCount(limb);
1022        }
1023        r.set(sum);
1024    }
1025
1026    /// rma = a * a
1027    ///
1028    /// `rma` may not alias with `a`.
1029    ///
1030    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
1031    /// rma is given by `2 * a.limbs.len + 1`.
1032    ///
1033    /// If `allocator` is provided, it will be used for temporary storage to improve
1034    /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
1035    pub fn sqrNoAlias(rma: *Mutable, a: Const, opt_allocator: ?Allocator) void {
1036        _ = opt_allocator;
1037        assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
1038
1039        @memset(rma.limbs, 0);
1040
1041        llsquareBasecase(rma.limbs, a.limbs);
1042
1043        rma.normalize(2 * a.limbs.len + 1);
1044        rma.positive = true;
1045    }
1046
1047    /// q = a / b (rem r)
1048    ///
1049    /// a / b are floored (rounded towards 0).
1050    /// q may alias with a or b.
1051    ///
1052    /// Asserts there is enough memory to store q and r.
1053    /// The upper bound for r limb count is `b.limbs.len`.
1054    /// The upper bound for q limb count is given by `a.limbs`.
1055    ///
1056    /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`.
1057    pub fn divFloor(
1058        q: *Mutable,
1059        r: *Mutable,
1060        a: Const,
1061        b: Const,
1062        limbs_buffer: []Limb,
1063    ) void {
1064        const sep = a.limbs.len + 2;
1065        var x = a.toMutable(limbs_buffer[0..sep]);
1066        var y = b.toMutable(limbs_buffer[sep..]);
1067
1068        div(q, r, &x, &y);
1069
1070        // Note, `div` performs truncating division, which satisfies
1071        // @divTrunc(a, b) * b + @rem(a, b) = a
1072        // so r = a - @divTrunc(a, b) * b
1073        // Note,  @rem(a, -b) = @rem(-b, a) = -@rem(a, b) = -@rem(-a, -b)
1074        // For divTrunc, we want to perform
1075        // @divFloor(a, b) * b + @mod(a, b) = a
1076        // Note:
1077        // @divFloor(-a, b)
1078        // = @divFloor(a, -b)
1079        // = -@divCeil(a, b)
1080        // = -@divFloor(a + b - 1, b)
1081        // = -@divTrunc(a + b - 1, b)
1082
1083        // Note (1):
1084        // @divTrunc(a + b - 1, b) * b + @rem(a + b - 1, b) = a + b - 1
1085        // = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) = a + b - 1
1086        // = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = a
1087
1088        if (a.positive and b.positive) {
1089            // Positive-positive case, don't need to do anything.
1090        } else if (a.positive and !b.positive) {
1091            // a/-b -> q is negative, and so we need to fix flooring.
1092            // Subtract one to make the division flooring.
1093
1094            // @divFloor(a, -b) * -b + @mod(a, -b) = a
1095            // If b divides a exactly, we have @divFloor(a, -b) * -b = a
1096            // Else, we have @divFloor(a, -b) * -b > a, so @mod(a, -b) becomes negative
1097
1098            // We have:
1099            // @divFloor(a, -b) * -b + @mod(a, -b) = a
1100            // = -@divTrunc(a + b - 1, b) * -b + @mod(a, -b) = a
1101            // = @divTrunc(a + b - 1, b) * b + @mod(a, -b) = a
1102
1103            // Substitute a for (1):
1104            // @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b + @mod(a, -b)
1105            // Yields:
1106            // @mod(a, -b) = @rem(a - 1, b) - b + 1
1107            // Note that `r` holds @rem(a, b) at this point.
1108            //
1109            // If @rem(a, b) is not 0:
1110            //   @rem(a - 1, b) = @rem(a, b) - 1
1111            //   => @mod(a, -b) = @rem(a, b) - 1 - b + 1 = @rem(a, b) - b
1112            // Else:
1113            //   @rem(a - 1, b) = @rem(a + b - 1, b) = @rem(b - 1, b) = b - 1
1114            //   => @mod(a, -b) = b - 1 - b + 1 = 0
1115            if (!r.eqlZero()) {
1116                q.addScalar(q.toConst(), -1);
1117                r.positive = true;
1118                r.sub(r.toConst(), y.toConst().abs());
1119            }
1120        } else if (!a.positive and b.positive) {
1121            // -a/b -> q is negative, and so we need to fix flooring.
1122            // Subtract one to make the division flooring.
1123
1124            // @divFloor(-a, b) * b + @mod(-a, b) = a
1125            // If b divides a exactly, we have @divFloor(-a, b) * b = -a
1126            // Else, we have @divFloor(-a, b) * b < -a, so @mod(-a, b) becomes positive
1127
1128            // We have:
1129            // @divFloor(-a, b) * b + @mod(-a, b) = -a
1130            // = -@divTrunc(a + b - 1, b) * b + @mod(-a, b) = -a
1131            // = @divTrunc(a + b - 1, b) * b - @mod(-a, b) = a
1132
1133            // Substitute a for (1):
1134            // @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b - @mod(-a, b)
1135            // Yields:
1136            // @rem(a - 1, b) - b + 1 = -@mod(-a, b)
1137            // => -@mod(-a, b) = @rem(a - 1, b) - b + 1
1138            // => @mod(-a, b) = -(@rem(a - 1, b) - b + 1) = -@rem(a - 1, b) + b - 1
1139            //
1140            // If @rem(a, b) is not 0:
1141            //   @rem(a - 1, b) = @rem(a, b) - 1
1142            //   => @mod(-a, b) = -(@rem(a, b) - 1) + b - 1 = -@rem(a, b) + 1 + b - 1 = -@rem(a, b) + b
1143            // Else :
1144            //   @rem(a - 1, b) = b - 1
1145            //   => @mod(-a, b) = -(b - 1) + b - 1 = 0
1146            if (!r.eqlZero()) {
1147                q.addScalar(q.toConst(), -1);
1148                r.positive = false;
1149                r.add(r.toConst(), y.toConst().abs());
1150            }
1151        } else if (!a.positive and !b.positive) {
1152            // a/b -> q is positive, don't need to do anything to fix flooring.
1153
1154            // @divFloor(-a, -b) * -b + @mod(-a, -b) = -a
1155            // If b divides a exactly, we have @divFloor(-a, -b) * -b = -a
1156            // Else, we have @divFloor(-a, -b) * -b > -a, so @mod(-a, -b) becomes negative
1157
1158            // We have:
1159            // @divFloor(-a, -b) * -b + @mod(-a, -b) = -a
1160            // = @divTrunc(a, b) * -b + @mod(-a, -b) = -a
1161            // = @divTrunc(a, b) * b - @mod(-a, -b) = a
1162
1163            // We also have:
1164            // @divTrunc(a, b) * b + @rem(a, b) = a
1165
1166            // Substitute a:
1167            // @divTrunc(a, b) * b + @rem(a, b) = @divTrunc(a, b) * b - @mod(-a, -b)
1168            // => @rem(a, b) = -@mod(-a, -b)
1169            // => @mod(-a, -b) = -@rem(a, b)
1170            r.positive = false;
1171        }
1172    }
1173
1174    /// q = a / b (rem r)
1175    ///
1176    /// a / b are truncated (rounded towards -inf).
1177    /// q may alias with a or b.
1178    ///
1179    /// Asserts there is enough memory to store q and r.
1180    /// The upper bound for r limb count is `b.limbs.len`.
1181    /// The upper bound for q limb count is given by `a.limbs.len`.
1182    ///
1183    /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`.
1184    pub fn divTrunc(
1185        q: *Mutable,
1186        r: *Mutable,
1187        a: Const,
1188        b: Const,
1189        limbs_buffer: []Limb,
1190    ) void {
1191        const sep = a.limbs.len + 2;
1192        var x = a.toMutable(limbs_buffer[0..sep]);
1193        var y = b.toMutable(limbs_buffer[sep..]);
1194
1195        div(q, r, &x, &y);
1196    }
1197
1198    /// r = a << shift, in other words, r = a * 2^shift
1199    ///
1200    /// r and a may alias.
1201    ///
1202    /// Asserts there is enough memory to fit the result. The upper bound Limb count is
1203    /// `a.limbs.len + (shift / (@sizeOf(Limb) * 8))`.
1204    pub fn shiftLeft(r: *Mutable, a: Const, shift: usize) void {
1205        const new_len = llshl(r.limbs, a.limbs, shift);
1206        r.normalize(new_len);
1207        r.positive = a.positive;
1208    }
1209
1210    /// r = a <<| shift with 2s-complement saturating semantics.
1211    ///
1212    /// r and a may alias.
1213    ///
1214    /// Asserts there is enough memory to fit the result. The upper bound Limb count is
1215    /// r is `calcTwosCompLimbCount(bit_count)`.
1216    pub fn shiftLeftSat(r: *Mutable, a: Const, shift: usize, signedness: Signedness, bit_count: usize) void {
1217        // Special case: When the argument is negative, but the result is supposed to be unsigned,
1218        // return 0 in all cases.
1219        if (!a.positive and signedness == .unsigned) {
1220            r.set(0);
1221            return;
1222        }
1223
1224        // Check whether the shift is going to overflow. This is the case
1225        // when (in 2s complement) any bit above `bit_count - shift` is set in the unshifted value.
1226        // Note, the sign bit is not counted here.
1227
1228        // Handle shifts larger than the target type. This also deals with
1229        // 0-bit integers.
1230        if (bit_count <= shift) {
1231            // In this case, there is only no overflow if `a` is zero.
1232            if (a.eqlZero()) {
1233                r.set(0);
1234            } else {
1235                r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
1236            }
1237            return;
1238        }
1239
1240        const checkbit = bit_count - shift - @intFromBool(signedness == .signed);
1241        // If `checkbit` and more significant bits are zero, no overflow will take place.
1242
1243        if (checkbit >= a.limbs.len * limb_bits) {
1244            // `checkbit` is outside the range of a, so definitely no overflow will take place. We
1245            // can defer to a normal shift.
1246            // Note that if `a` is normalized (which we assume), this checks for set bits in the upper limbs.
1247
1248            // Note, in this case r should already have enough limbs required to perform the normal shift.
1249            // In this case the shift of the most significant limb may still overflow.
1250            r.shiftLeft(a, shift);
1251            return;
1252        } else if (checkbit < (a.limbs.len - 1) * limb_bits) {
1253            // `checkbit` is not in the most significant limb. If `a` is normalized the most significant
1254            // limb will not be zero, so in this case we need to saturate. Note that `a.limbs.len` must be
1255            // at least one according to normalization rules.
1256
1257            r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
1258            return;
1259        }
1260
1261        // Generate a mask with the bits to check in the most significant limb. We'll need to check
1262        // all bits with equal or more significance than checkbit.
1263        // const msb = @truncate(Log2Limb, checkbit);
1264        // const checkmask = (@as(Limb, 1) << msb) -% 1;
1265
1266        if (a.limbs[a.limbs.len - 1] >> @as(Log2Limb, @truncate(checkbit)) != 0) {
1267            // Need to saturate.
1268            r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
1269            return;
1270        }
1271
1272        // This shift should not be able to overflow, so invoke llshl and normalize manually
1273        // to avoid the extra required limb.
1274        const new_len = llshl(r.limbs, a.limbs, shift);
1275        r.normalize(new_len);
1276        r.positive = a.positive;
1277    }
1278
1279    /// r = a >> shift
1280    /// r and a may alias.
1281    ///
1282    /// Asserts there is enough memory to fit the result. The upper bound Limb count is
1283    /// `a.limbs.len - (shift / (@bitSizeOf(Limb)))`.
1284    pub fn shiftRight(r: *Mutable, a: Const, shift: usize) void {
1285        const full_limbs_shifted_out = shift / limb_bits;
1286        const remaining_bits_shifted_out = shift % limb_bits;
1287        if (a.limbs.len <= full_limbs_shifted_out) {
1288            // Shifting negative numbers converges to -1 instead of 0
1289            if (a.positive) {
1290                r.len = 1;
1291                r.positive = true;
1292                r.limbs[0] = 0;
1293            } else {
1294                r.len = 1;
1295                r.positive = false;
1296                r.limbs[0] = 1;
1297            }
1298            return;
1299        }
1300        const nonzero_negative_shiftout = if (a.positive) false else nonzero: {
1301            for (a.limbs[0..full_limbs_shifted_out]) |x| {
1302                if (x != 0)
1303                    break :nonzero true;
1304            }
1305            if (remaining_bits_shifted_out == 0)
1306                break :nonzero false;
1307            const not_covered: Log2Limb = @intCast(limb_bits - remaining_bits_shifted_out);
1308            break :nonzero a.limbs[full_limbs_shifted_out] << not_covered != 0;
1309        };
1310
1311        const new_len = llshr(r.limbs, a.limbs, shift);
1312
1313        r.len = new_len;
1314        r.positive = a.positive;
1315        if (nonzero_negative_shiftout) r.addScalar(r.toConst(), -1);
1316        r.normalize(r.len);
1317    }
1318
1319    /// r = ~a under 2s complement wrapping semantics.
1320    /// r may alias with a.
1321    ///
1322    /// Assets that r has enough limbs to store the result. The upper bound Limb count is
1323    /// r is `calcTwosCompLimbCount(bit_count)`.
1324    pub fn bitNotWrap(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
1325        r.copy(a.negate());
1326        const negative_one = Const{ .limbs = &.{1}, .positive = false };
1327        _ = r.addWrap(r.toConst(), negative_one, signedness, bit_count);
1328    }
1329
1330    /// r = a | b under 2s complement semantics.
1331    /// r may alias with a or b.
1332    ///
1333    /// a and b are zero-extended to the longer of a or b.
1334    ///
1335    /// Asserts that r has enough limbs to store the result. Upper bound is `@max(a.limbs.len, b.limbs.len)`.
1336    pub fn bitOr(r: *Mutable, a: Const, b: Const) void {
1337        // Trivial cases, llsignedor does not support zero.
1338        if (a.eqlZero()) {
1339            r.copy(b);
1340            return;
1341        } else if (b.eqlZero()) {
1342            r.copy(a);
1343            return;
1344        }
1345
1346        if (a.limbs.len >= b.limbs.len) {
1347            r.positive = llsignedor(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
1348            r.normalize(if (b.positive) a.limbs.len else b.limbs.len);
1349        } else {
1350            r.positive = llsignedor(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
1351            r.normalize(if (a.positive) b.limbs.len else a.limbs.len);
1352        }
1353    }
1354
1355    /// r = a & b under 2s complement semantics.
1356    /// r may alias with a or b.
1357    ///
1358    /// Asserts that r has enough limbs to store the result.
1359    /// If only a is positive, the upper bound is `a.limbs.len`.
1360    /// If only b is positive, the upper bound is `b.limbs.len`.
1361    /// If a and b are positive, the upper bound is `@min(a.limbs.len, b.limbs.len)`.
1362    /// If a and b are negative, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`.
1363    pub fn bitAnd(r: *Mutable, a: Const, b: Const) void {
1364        // Trivial cases, llsignedand does not support zero.
1365        if (a.eqlZero()) {
1366            r.copy(a);
1367            return;
1368        } else if (b.eqlZero()) {
1369            r.copy(b);
1370            return;
1371        }
1372
1373        if (a.limbs.len >= b.limbs.len) {
1374            r.positive = llsignedand(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
1375            r.normalize(if (b.positive) b.limbs.len else if (a.positive) a.limbs.len else a.limbs.len + 1);
1376        } else {
1377            r.positive = llsignedand(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
1378            r.normalize(if (a.positive) a.limbs.len else if (b.positive) b.limbs.len else b.limbs.len + 1);
1379        }
1380    }
1381
1382    /// r = a ^ b under 2s complement semantics.
1383    /// r may alias with a or b.
1384    ///
1385    /// Asserts that r has enough limbs to store the result. If a and b share the same signedness, the
1386    /// upper bound is `@max(a.limbs.len, b.limbs.len)`. Otherwise, if either a or b is negative
1387    /// but not both, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`.
1388    pub fn bitXor(r: *Mutable, a: Const, b: Const) void {
1389        // Trivial cases, because llsignedxor does not support negative zero.
1390        if (a.eqlZero()) {
1391            r.copy(b);
1392            return;
1393        } else if (b.eqlZero()) {
1394            r.copy(a);
1395            return;
1396        }
1397
1398        if (a.limbs.len > b.limbs.len) {
1399            r.positive = llsignedxor(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
1400            r.normalize(a.limbs.len + @intFromBool(a.positive != b.positive));
1401        } else {
1402            r.positive = llsignedxor(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
1403            r.normalize(b.limbs.len + @intFromBool(a.positive != b.positive));
1404        }
1405    }
1406
1407    /// rma may alias x or y.
1408    /// x and y may alias each other.
1409    /// Asserts that `rma` has enough limbs to store the result. Upper bound is
1410    /// `@min(x.limbs.len, y.limbs.len)`.
1411    ///
1412    /// `limbs_buffer` is used for temporary storage during the operation. When this function returns,
1413    /// it will have the same length as it had when the function was called.
1414    pub fn gcd(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.array_list.Managed(Limb)) !void {
1415        const prev_len = limbs_buffer.items.len;
1416        defer limbs_buffer.shrinkRetainingCapacity(prev_len);
1417        const x_copy = if (rma.limbs.ptr == x.limbs.ptr) blk: {
1418            const start = limbs_buffer.items.len;
1419            try limbs_buffer.appendSlice(x.limbs);
1420            break :blk x.toMutable(limbs_buffer.items[start..]).toConst();
1421        } else x;
1422        const y_copy = if (rma.limbs.ptr == y.limbs.ptr) blk: {
1423            const start = limbs_buffer.items.len;
1424            try limbs_buffer.appendSlice(y.limbs);
1425            break :blk y.toMutable(limbs_buffer.items[start..]).toConst();
1426        } else y;
1427
1428        return gcdLehmer(rma, x_copy, y_copy, limbs_buffer);
1429    }
1430
1431    /// q = a ^ b
1432    ///
1433    /// r may not alias a.
1434    ///
1435    /// Asserts that `r` has enough limbs to store the result. Upper bound is
1436    /// `calcPowLimbsBufferLen(a.bitCountAbs(), b)`.
1437    ///
1438    /// `limbs_buffer` is used for temporary storage.
1439    /// The amount required is given by `calcPowLimbsBufferLen`.
1440    pub fn pow(r: *Mutable, a: Const, b: u32, limbs_buffer: []Limb) void {
1441        assert(r.limbs.ptr != a.limbs.ptr); // illegal aliasing
1442
1443        // Handle all the trivial cases first
1444        switch (b) {
1445            0 => {
1446                // a^0 = 1
1447                return r.set(1);
1448            },
1449            1 => {
1450                // a^1 = a
1451                return r.copy(a);
1452            },
1453            else => {},
1454        }
1455
1456        if (a.eqlZero()) {
1457            // 0^b = 0
1458            return r.set(0);
1459        } else if (a.limbs.len == 1 and a.limbs[0] == 1) {
1460            // 1^b = 1 and -1^b = ±1
1461            r.set(1);
1462            r.positive = a.positive or (b & 1) == 0;
1463            return;
1464        }
1465
1466        // Here a>1 and b>1
1467        const needed_limbs = calcPowLimbsBufferLen(a.bitCountAbs(), b);
1468        assert(r.limbs.len >= needed_limbs);
1469        assert(limbs_buffer.len >= needed_limbs);
1470
1471        llpow(r.limbs, a.limbs, b, limbs_buffer);
1472
1473        r.normalize(needed_limbs);
1474        r.positive = a.positive or (b & 1) == 0;
1475    }
1476
1477    /// r = ⌊√a⌋
1478    ///
1479    /// r may alias a.
1480    ///
1481    /// Asserts that `r` has enough limbs to store the result. Upper bound is
1482    /// `(a.limbs.len - 1) / 2 + 1`.
1483    ///
1484    /// `limbs_buffer` is used for temporary storage.
1485    /// The amount required is given by `calcSqrtLimbsBufferLen`.
1486    pub fn sqrt(
1487        r: *Mutable,
1488        a: Const,
1489        limbs_buffer: []Limb,
1490    ) void {
1491        // Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 SqrtInt
1492        // https://members.loria.fr/PZimmermann/mca/pub226.html
1493        var buf_index: usize = 0;
1494        var t = b: {
1495            const start = buf_index;
1496            buf_index += a.limbs.len;
1497            break :b Mutable.init(limbs_buffer[start..buf_index], 0);
1498        };
1499        var u = b: {
1500            const start = buf_index;
1501            const shift = (a.bitCountAbs() + 1) / 2;
1502            buf_index += 1 + ((shift / limb_bits) + 1);
1503            var m = Mutable.init(limbs_buffer[start..buf_index], 1);
1504            m.shiftLeft(m.toConst(), shift); // u must be >= ⌊√a⌋, and should be as small as possible for efficiency
1505            break :b m;
1506        };
1507        var s = b: {
1508            const start = buf_index;
1509            buf_index += u.limbs.len;
1510            break :b u.toConst().toMutable(limbs_buffer[start..buf_index]);
1511        };
1512        var rem = b: {
1513            const start = buf_index;
1514            buf_index += s.limbs.len;
1515            break :b Mutable.init(limbs_buffer[start..buf_index], 0);
1516        };
1517
1518        while (true) {
1519            t.divFloor(&rem, a, s.toConst(), limbs_buffer[buf_index..]);
1520            t.add(t.toConst(), s.toConst());
1521            u.shiftRight(t.toConst(), 1);
1522
1523            if (u.toConst().order(s.toConst()).compare(.gte)) {
1524                r.copy(s.toConst());
1525                return;
1526            }
1527
1528            // Avoid copying u to s by swapping u and s
1529            const tmp_s = s;
1530            s = u;
1531            u = tmp_s;
1532        }
1533    }
1534
1535    /// rma may not alias x or y.
1536    /// x and y may alias each other.
1537    /// Asserts that `rma` has enough limbs to store the result. Upper bound is given by `calcGcdNoAliasLimbLen`.
1538    ///
1539    /// `limbs_buffer` is used for temporary storage during the operation.
1540    pub fn gcdNoAlias(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.array_list.Managed(Limb)) !void {
1541        assert(rma.limbs.ptr != x.limbs.ptr); // illegal aliasing
1542        assert(rma.limbs.ptr != y.limbs.ptr); // illegal aliasing
1543        return gcdLehmer(rma, x, y, limbs_buffer);
1544    }
1545
1546    fn gcdLehmer(result: *Mutable, xa: Const, ya: Const, limbs_buffer: *std.array_list.Managed(Limb)) !void {
1547        var x = try xa.toManaged(limbs_buffer.allocator);
1548        defer x.deinit();
1549        x.abs();
1550
1551        var y = try ya.toManaged(limbs_buffer.allocator);
1552        defer y.deinit();
1553        y.abs();
1554
1555        if (x.toConst().order(y.toConst()) == .lt) {
1556            x.swap(&y);
1557        }
1558
1559        var t_big = try Managed.init(limbs_buffer.allocator);
1560        defer t_big.deinit();
1561
1562        var r = try Managed.init(limbs_buffer.allocator);
1563        defer r.deinit();
1564
1565        var tmp_x = try Managed.init(limbs_buffer.allocator);
1566        defer tmp_x.deinit();
1567
1568        while (y.len() > 1 and !y.eqlZero()) {
1569            assert(x.isPositive() and y.isPositive());
1570            assert(x.len() >= y.len());
1571
1572            var xh: SignedDoubleLimb = x.limbs[x.len() - 1];
1573            var yh: SignedDoubleLimb = if (x.len() > y.len()) 0 else y.limbs[x.len() - 1];
1574
1575            var A: SignedDoubleLimb = 1;
1576            var B: SignedDoubleLimb = 0;
1577            var C: SignedDoubleLimb = 0;
1578            var D: SignedDoubleLimb = 1;
1579
1580            while (yh + C != 0 and yh + D != 0) {
1581                const q = @divFloor(xh + A, yh + C);
1582                const qp = @divFloor(xh + B, yh + D);
1583                if (q != qp) {
1584                    break;
1585                }
1586
1587                var t = A - q * C;
1588                A = C;
1589                C = t;
1590                t = B - q * D;
1591                B = D;
1592                D = t;
1593
1594                t = xh - q * yh;
1595                xh = yh;
1596                yh = t;
1597            }
1598
1599            if (B == 0) {
1600                // t_big = x % y, r is unused
1601                try r.divTrunc(&t_big, &x, &y);
1602                assert(t_big.isPositive());
1603
1604                x.swap(&y);
1605                y.swap(&t_big);
1606            } else {
1607                var storage: [8]Limb = undefined;
1608                const Ap = fixedIntFromSignedDoubleLimb(A, storage[0..2]).toManaged(limbs_buffer.allocator);
1609                const Bp = fixedIntFromSignedDoubleLimb(B, storage[2..4]).toManaged(limbs_buffer.allocator);
1610                const Cp = fixedIntFromSignedDoubleLimb(C, storage[4..6]).toManaged(limbs_buffer.allocator);
1611                const Dp = fixedIntFromSignedDoubleLimb(D, storage[6..8]).toManaged(limbs_buffer.allocator);
1612
1613                // t_big = Ax + By
1614                try r.mul(&x, &Ap);
1615                try t_big.mul(&y, &Bp);
1616                try t_big.add(&r, &t_big);
1617
1618                // u = Cx + Dy, r as u
1619                try tmp_x.copy(x.toConst());
1620                try x.mul(&tmp_x, &Cp);
1621                try r.mul(&y, &Dp);
1622                try r.add(&x, &r);
1623
1624                x.swap(&t_big);
1625                y.swap(&r);
1626            }
1627        }
1628
1629        // euclidean algorithm
1630        assert(x.toConst().order(y.toConst()) != .lt);
1631
1632        while (!y.toConst().eqlZero()) {
1633            try t_big.divTrunc(&r, &x, &y);
1634            x.swap(&y);
1635            y.swap(&r);
1636        }
1637
1638        result.copy(x.toConst());
1639    }
1640
1641    // Truncates by default.
1642    fn div(q: *Mutable, r: *Mutable, x: *Mutable, y: *Mutable) void {
1643        assert(!y.eqlZero()); // division by zero
1644        assert(q != r); // illegal aliasing
1645
1646        const q_positive = (x.positive == y.positive);
1647        const r_positive = x.positive;
1648
1649        if (x.toConst().orderAbs(y.toConst()) == .lt) {
1650            // q may alias x so handle r first.
1651            r.copy(x.toConst());
1652            r.positive = r_positive;
1653
1654            q.set(0);
1655            return;
1656        }
1657
1658        // Handle trailing zero-words of divisor/dividend. These are not handled in the following
1659        // algorithms.
1660        // Note, there must be a non-zero limb for either.
1661        // const x_trailing = std.mem.indexOfScalar(Limb, x.limbs[0..x.len], 0).?;
1662        // const y_trailing = std.mem.indexOfScalar(Limb, y.limbs[0..y.len], 0).?;
1663
1664        const x_trailing = for (x.limbs[0..x.len], 0..) |xi, i| {
1665            if (xi != 0) break i;
1666        } else unreachable;
1667
1668        const y_trailing = for (y.limbs[0..y.len], 0..) |yi, i| {
1669            if (yi != 0) break i;
1670        } else unreachable;
1671
1672        const xy_trailing = @min(x_trailing, y_trailing);
1673
1674        if (y.len - xy_trailing == 1) {
1675            const divisor = y.limbs[y.len - 1];
1676
1677            // Optimization for small divisor. By using a half limb we can avoid requiring DoubleLimb
1678            // divisions in the hot code path. This may often require compiler_rt software-emulation.
1679            if (divisor < maxInt(HalfLimb)) {
1680                lldiv0p5(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], @as(HalfLimb, @intCast(divisor)));
1681            } else {
1682                lldiv1(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], divisor);
1683            }
1684
1685            q.normalize(x.len - xy_trailing);
1686            q.positive = q_positive;
1687
1688            r.len = 1;
1689            r.positive = r_positive;
1690        } else {
1691            // Shrink x, y such that the trailing zero limbs shared between are removed.
1692            var x0 = Mutable{
1693                .limbs = x.limbs[xy_trailing..],
1694                .len = x.len - xy_trailing,
1695                .positive = true,
1696            };
1697
1698            var y0 = Mutable{
1699                .limbs = y.limbs[xy_trailing..],
1700                .len = y.len - xy_trailing,
1701                .positive = true,
1702            };
1703
1704            divmod(q, r, &x0, &y0);
1705            q.positive = q_positive;
1706
1707            r.positive = r_positive;
1708        }
1709
1710        if (xy_trailing != 0 and r.limbs[r.len - 1] != 0) {
1711            // Manually shift here since we know its limb aligned.
1712            @memmove(r.limbs[xy_trailing..][0..r.len], r.limbs[0..r.len]);
1713            @memset(r.limbs[0..xy_trailing], 0);
1714            r.len += xy_trailing;
1715        }
1716    }
1717
1718    /// Handbook of Applied Cryptography, 14.20
1719    ///
1720    /// x = qy + r where 0 <= r < y
1721    /// y is modified but returned intact.
1722    fn divmod(
1723        q: *Mutable,
1724        r: *Mutable,
1725        x: *Mutable,
1726        y: *Mutable,
1727    ) void {
1728        // 0.
1729        // Normalize so that y[t] > b/2
1730        const lz = @clz(y.limbs[y.len - 1]);
1731        const norm_shift = if (lz == 0 and y.toConst().isOdd())
1732            limb_bits // Force an extra limb so that y is even.
1733        else
1734            lz;
1735
1736        x.shiftLeft(x.toConst(), norm_shift);
1737        y.shiftLeft(y.toConst(), norm_shift);
1738
1739        const n = x.len - 1;
1740        const t = y.len - 1;
1741        const shift = n - t;
1742
1743        // 1.
1744        // for 0 <= j <= n - t, set q[j] to 0
1745        q.len = shift + 1;
1746        q.positive = true;
1747        @memset(q.limbs[0..q.len], 0);
1748
1749        // 2.
1750        // while x >= y * b^(n - t):
1751        //    x -= y * b^(n - t)
1752        //    q[n - t] += 1
1753        // Note, this algorithm is performed only once if y[t] > base/2 and y is even, which we
1754        // enforced in step 0. This means we can replace the while with an if.
1755        // Note, multiplication by b^(n - t) comes down to shifting to the right by n - t limbs.
1756        // We can also replace x >= y * b^(n - t) by x/b^(n - t) >= y, and use shifts for that.
1757        {
1758            // x >= y * b^(n - t) can be replaced by x/b^(n - t) >= y.
1759
1760            // 'divide' x by b^(n - t)
1761            var tmp = Mutable{
1762                .limbs = x.limbs[shift..],
1763                .len = x.len - shift,
1764                .positive = true,
1765            };
1766
1767            if (tmp.toConst().order(y.toConst()) != .lt) {
1768                // Perform x -= y * b^(n - t)
1769                // Note, we can subtract y from x[n - t..] and get the result without shifting.
1770                // We can also re-use tmp which already contains the relevant part of x. Note that
1771                // this also edits x.
1772                // Due to the check above, this cannot underflow.
1773                tmp.sub(tmp.toConst(), y.toConst());
1774
1775                // tmp.sub normalized tmp, but we need to normalize x now.
1776                x.limbs.len = tmp.limbs.len + shift;
1777
1778                q.limbs[shift] += 1;
1779            }
1780        }
1781
1782        // 3.
1783        // for i from n down to t + 1, do
1784        var i = n;
1785        while (i >= t + 1) : (i -= 1) {
1786            const k = i - t - 1;
1787            // 3.1.
1788            // if x_i == y_t:
1789            //   q[i - t - 1] = b - 1
1790            // else:
1791            //   q[i - t - 1] = (x[i] * b + x[i - 1]) / y[t]
1792            if (x.limbs[i] == y.limbs[t]) {
1793                q.limbs[k] = maxInt(Limb);
1794            } else {
1795                const q0 = (@as(DoubleLimb, x.limbs[i]) << limb_bits) | @as(DoubleLimb, x.limbs[i - 1]);
1796                const n0 = @as(DoubleLimb, y.limbs[t]);
1797                q.limbs[k] = @as(Limb, @intCast(q0 / n0));
1798            }
1799
1800            // 3.2
1801            // while q[i - t - 1] * (y[t] * b + y[t - 1] > x[i] * b * b + x[i - 1] + x[i - 2]:
1802            //   q[i - t - 1] -= 1
1803            // Note, if y[t] > b / 2 this part is repeated no more than twice.
1804
1805            // Extract from y.
1806            const y0 = if (t > 0) y.limbs[t - 1] else 0;
1807            const y1 = y.limbs[t];
1808
1809            // Extract from x.
1810            // Note, big endian.
1811            const tmp0 = [_]Limb{
1812                x.limbs[i],
1813                if (i >= 1) x.limbs[i - 1] else 0,
1814                if (i >= 2) x.limbs[i - 2] else 0,
1815            };
1816
1817            while (true) {
1818                // Ad-hoc 2x1 multiplication with q[i - t - 1].
1819                // Note, big endian.
1820                var tmp1 = [_]Limb{ 0, undefined, undefined };
1821                tmp1[2] = addMulLimbWithCarry(0, y0, q.limbs[k], &tmp1[0]);
1822                tmp1[1] = addMulLimbWithCarry(0, y1, q.limbs[k], &tmp1[0]);
1823
1824                // Big-endian compare
1825                if (mem.order(Limb, &tmp1, &tmp0) != .gt)
1826                    break;
1827
1828                q.limbs[k] -= 1;
1829            }
1830
1831            // 3.3.
1832            // x -= q[i - t - 1] * y * b^(i - t - 1)
1833            // Note, we multiply by a single limb here.
1834            // The shift doesn't need to be performed if we add the result of the first multiplication
1835            // to x[i - t - 1].
1836            const underflow = llmulLimb(.sub, x.limbs[k..x.len], y.limbs[0..y.len], q.limbs[k]);
1837
1838            // 3.4.
1839            // if x < 0:
1840            //   x += y * b^(i - t - 1)
1841            //   q[i - t - 1] -= 1
1842            // Note, we check for x < 0 using the underflow flag from the previous operation.
1843            if (underflow) {
1844                // While we didn't properly set the signedness of x, this operation should 'flow' it back to positive.
1845                llaccum(.add, x.limbs[k..x.len], y.limbs[0..y.len]);
1846                q.limbs[k] -= 1;
1847            }
1848        }
1849
1850        x.normalize(x.len);
1851        q.normalize(q.len);
1852
1853        // De-normalize r and y.
1854        r.shiftRight(x.toConst(), norm_shift);
1855        y.shiftRight(y.toConst(), norm_shift);
1856    }
1857
1858    /// Truncate an integer to a number of bits, following 2s-complement semantics.
1859    /// `r` may alias `a`.
1860    ///
1861    /// Asserts `r` has enough storage to compute the result.
1862    /// The upper bound is `calcTwosCompLimbCount(a.len)`.
1863    pub fn truncate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
1864        // Handle 0-bit integers.
1865        if (bit_count == 0) {
1866            @branchHint(.unlikely);
1867            r.set(0);
1868            return;
1869        }
1870
1871        const max_limbs = calcTwosCompLimbCount(bit_count);
1872        const sign_bit = @as(Limb, 1) << @truncate(bit_count - 1);
1873        const mask = @as(Limb, maxInt(Limb)) >> @truncate(-%bit_count);
1874
1875        // Guess whether the result will have the same sign as `a`.
1876        //  * If the result will be signed zero, the guess is `true`.
1877        //  * If the result will be the minimum signed integer, the guess is `false`.
1878        //  * If the result will be unsigned zero, the guess is `a.positive`.
1879        //  * Otherwise the guess is correct.
1880        const same_sign_guess = switch (signedness) {
1881            .signed => max_limbs > a.limbs.len or a.limbs[max_limbs - 1] & sign_bit == 0,
1882            .unsigned => a.positive,
1883        };
1884
1885        const abs_trunc_a: Const = .{
1886            .positive = true,
1887            .limbs = a.limbs[0..llnormalize(a.limbs[0..@min(a.limbs.len, max_limbs)])],
1888        };
1889        if (same_sign_guess or abs_trunc_a.eqlZero()) {
1890            // One of the following is true:
1891            //  * The result is zero.
1892            //  * The result is non-zero and has the same sign as `a`.
1893            r.copy(abs_trunc_a);
1894            if (max_limbs <= r.len) r.limbs[max_limbs - 1] &= mask;
1895            r.normalize(r.len);
1896            r.positive = a.positive or r.eqlZero();
1897        } else {
1898            // One of the following is true:
1899            //  * The result is the minimum signed integer.
1900            //  * The result is unsigned zero.
1901            //  * The result is non-zero and has the opposite sign as `a`.
1902            r.addScalar(abs_trunc_a, -1);
1903            llnot(r.limbs[0..r.len]);
1904            @memset(r.limbs[r.len..max_limbs], maxInt(Limb));
1905            r.limbs[max_limbs - 1] &= mask;
1906            r.normalize(max_limbs);
1907            r.positive = switch (signedness) {
1908                // The only value with the sign bit still set is the minimum signed integer.
1909                .signed => !a.positive and r.limbs[max_limbs - 1] & sign_bit == 0,
1910                .unsigned => !a.positive or r.eqlZero(),
1911            };
1912        }
1913    }
1914
1915    /// Saturate an integer to a number of bits, following 2s-complement semantics.
1916    /// r may alias a.
1917    ///
1918    /// Asserts `r` has enough storage to store the result.
1919    /// The upper bound is `calcTwosCompLimbCount(a.len)`.
1920    pub fn saturate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
1921        if (!a.fitsInTwosComp(signedness, bit_count)) {
1922            r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
1923        }
1924    }
1925
1926    /// Read the value of `x` from `buffer`.
1927    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`.
1928    ///
1929    /// The contents of `buffer` are interpreted as if they were the contents of
1930    /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`
1931    /// and any required padding bits are expected on the MSB end.
1932    pub fn readTwosComplement(
1933        x: *Mutable,
1934        buffer: []const u8,
1935        bit_count: usize,
1936        endian: Endian,
1937        signedness: Signedness,
1938    ) void {
1939        return readPackedTwosComplement(x, buffer, 0, bit_count, endian, signedness);
1940    }
1941
1942    /// Read the value of `x` from a packed memory `buffer`.
1943    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`
1944    /// at offset `bit_offset`.
1945    ///
1946    /// This is equivalent to loading the value of an integer with `bit_count` bits as
1947    /// if it were a field in packed memory at the provided bit offset.
1948    pub fn readPackedTwosComplement(
1949        x: *Mutable,
1950        buffer: []const u8,
1951        bit_offset: usize,
1952        bit_count: usize,
1953        endian: Endian,
1954        signedness: Signedness,
1955    ) void {
1956        if (bit_count == 0) {
1957            x.limbs[0] = 0;
1958            x.len = 1;
1959            x.positive = true;
1960            return;
1961        }
1962
1963        // Check whether the input is negative
1964        var positive = true;
1965        if (signedness == .signed) {
1966            const total_bits = bit_offset + bit_count;
1967            const last_byte = switch (endian) {
1968                .little => ((total_bits + 7) / 8) - 1,
1969                .big => buffer.len - ((total_bits + 7) / 8),
1970            };
1971
1972            const sign_bit = @as(u8, 1) << @as(u3, @intCast((total_bits - 1) % 8));
1973            positive = ((buffer[last_byte] & sign_bit) == 0);
1974        }
1975
1976        // Copy all complete limbs
1977        var carry: u1 = 1;
1978        var limb_index: usize = 0;
1979        var bit_index: usize = 0;
1980        while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) {
1981            // Read one Limb of bits
1982            var limb = mem.readPackedInt(Limb, buffer, bit_index + bit_offset, endian);
1983            bit_index += @bitSizeOf(Limb);
1984
1985            // 2's complement (bitwise not, then add carry bit)
1986            if (!positive) {
1987                const ov = @addWithOverflow(~limb, carry);
1988                limb = ov[0];
1989                carry = ov[1];
1990            }
1991            x.limbs[limb_index] = limb;
1992        }
1993
1994        // Copy the remaining bits
1995        if (bit_count != bit_index) {
1996            // Read all remaining bits
1997            var limb = switch (signedness) {
1998                .unsigned => mem.readVarPackedInt(Limb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .unsigned),
1999                .signed => b: {
2000                    const SLimb = std.meta.Int(.signed, @bitSizeOf(Limb));
2001                    const limb = mem.readVarPackedInt(SLimb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .signed);
2002                    break :b @as(Limb, @bitCast(limb));
2003                },
2004            };
2005
2006            // 2's complement (bitwise not, then add carry bit)
2007            if (!positive) {
2008                const ov = @addWithOverflow(~limb, carry);
2009                assert(ov[1] == 0);
2010                limb = ov[0];
2011            }
2012            x.limbs[limb_index] = limb;
2013
2014            limb_index += 1;
2015        }
2016
2017        x.positive = positive;
2018        x.len = limb_index;
2019        x.normalize(x.len);
2020    }
2021
2022    /// Normalize a possible sequence of leading zeros.
2023    ///
2024    /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4]
2025    /// [1, 2, 0, 0, 0] -> [1, 2]
2026    /// [0, 0, 0, 0, 0] -> [0]
2027    pub fn normalize(r: *Mutable, length: usize) void {
2028        r.len = llnormalize(r.limbs[0..length]);
2029    }
2030
2031    pub fn format(self: Mutable, w: *std.Io.Writer) std.Io.Writer.Error!void {
2032        return formatNumber(self, w, .{});
2033    }
2034
2035    /// If the absolute value of integer is greater than or equal to `pow(2, 64 * @sizeOf(usize) * 8)`,
2036    /// this function will fail to print the string, printing "(BigInt)" instead of a number.
2037    /// This is because the rendering algorithm requires reversing a string, which requires O(N) memory.
2038    /// See `Const.toString` and `Const.toStringAlloc` for a way to print big integers without failure.
2039    pub fn formatNumber(self: Mutable, w: *std.Io.Writer, n: std.fmt.Number) std.Io.Writer.Error!void {
2040        return self.toConst().formatNumber(w, n);
2041    }
2042};
2043
2044/// A arbitrary-precision big integer, with a fixed set of immutable limbs.
2045pub const Const = struct {
2046    /// Raw digits. These are:
2047    ///
2048    /// * Little-endian ordered
2049    /// * limbs.len >= 1
2050    /// * Zero is represented as limbs.len == 1 with limbs[0] == 0.
2051    ///
2052    /// Accessing limbs directly should be avoided.
2053    limbs: []const Limb,
2054    positive: bool,
2055
2056    /// The result is an independent resource which is managed by the caller.
2057    pub fn toManaged(self: Const, allocator: Allocator) Allocator.Error!Managed {
2058        const limbs = try allocator.alloc(Limb, @max(Managed.default_capacity, self.limbs.len));
2059        @memcpy(limbs[0..self.limbs.len], self.limbs);
2060        return Managed{
2061            .allocator = allocator,
2062            .limbs = limbs,
2063            .metadata = if (self.positive)
2064                self.limbs.len & ~Managed.sign_bit
2065            else
2066                self.limbs.len | Managed.sign_bit,
2067        };
2068    }
2069
2070    /// Asserts `limbs` is big enough to store the value.
2071    pub fn toMutable(self: Const, limbs: []Limb) Mutable {
2072        @memcpy(limbs[0..self.limbs.len], self.limbs[0..self.limbs.len]);
2073        return .{
2074            .limbs = limbs,
2075            .positive = self.positive,
2076            .len = self.limbs.len,
2077        };
2078    }
2079
2080    pub fn dump(self: Const) void {
2081        for (self.limbs[0..self.limbs.len]) |limb| {
2082            std.debug.print("{x} ", .{limb});
2083        }
2084        std.debug.print("len={} positive={}\n", .{ self.len, self.positive });
2085    }
2086
2087    pub fn abs(self: Const) Const {
2088        return .{
2089            .limbs = self.limbs,
2090            .positive = true,
2091        };
2092    }
2093
2094    pub fn negate(self: Const) Const {
2095        return .{
2096            .limbs = self.limbs,
2097            .positive = !self.positive,
2098        };
2099    }
2100
2101    pub fn isOdd(self: Const) bool {
2102        return self.limbs[0] & 1 != 0;
2103    }
2104
2105    pub fn isEven(self: Const) bool {
2106        return !self.isOdd();
2107    }
2108
2109    /// Returns the number of bits required to represent the absolute value of an integer.
2110    pub fn bitCountAbs(self: Const) usize {
2111        return (self.limbs.len - 1) * limb_bits + (limb_bits - @clz(self.limbs[self.limbs.len - 1]));
2112    }
2113
2114    /// Returns the number of bits required to represent the integer in twos-complement form.
2115    ///
2116    /// If the integer is negative the value returned is the number of bits needed by a signed
2117    /// integer to represent the value. If positive the value is the number of bits for an
2118    /// unsigned integer. Any unsigned integer will fit in the signed integer with bitcount
2119    /// one greater than the returned value.
2120    ///
2121    /// e.g. -127 returns 8 as it will fit in an i8. 127 returns 7 since it fits in a u7.
2122    pub fn bitCountTwosComp(self: Const) usize {
2123        var bits = self.bitCountAbs();
2124
2125        // If the entire value has only one bit set (e.g. 0b100000000) then the negation in twos
2126        // complement requires one less bit.
2127        if (!self.positive) block: {
2128            bits += 1;
2129
2130            if (@popCount(self.limbs[self.limbs.len - 1]) == 1) {
2131                for (self.limbs[0 .. self.limbs.len - 1]) |limb| {
2132                    if (@popCount(limb) != 0) {
2133                        break :block;
2134                    }
2135                }
2136
2137                bits -= 1;
2138            }
2139        }
2140
2141        return bits;
2142    }
2143
2144    /// Returns the number of bits required to represent the integer in twos-complement form
2145    /// with the given signedness.
2146    pub fn bitCountTwosCompForSignedness(self: Const, signedness: std.builtin.Signedness) usize {
2147        return self.bitCountTwosComp() + @intFromBool(self.positive and signedness == .signed);
2148    }
2149
2150    /// @popCount with two's complement semantics.
2151    ///
2152    /// This returns the number of 1 bits set when the value would be represented in
2153    /// two's complement with the given integer width (bit_count).
2154    /// This includes the leading sign bit, which will be set for negative values.
2155    ///
2156    /// Asserts that bit_count is enough to represent value in two's compliment
2157    /// and that the final result fits in a usize.
2158    /// Asserts that there are no trailing empty limbs on the most significant end,
2159    /// i.e. that limb count matches `calcLimbLen()` and zero is not negative.
2160    pub fn popCount(self: Const, bit_count: usize) usize {
2161        var sum: usize = 0;
2162        if (self.positive) {
2163            for (self.limbs) |limb| {
2164                sum += @popCount(limb);
2165            }
2166        } else {
2167            assert(self.fitsInTwosComp(.signed, bit_count));
2168            assert(self.limbs[self.limbs.len - 1] != 0);
2169
2170            var remaining_bits = bit_count;
2171            var carry: u1 = 1;
2172            var add_res: Limb = undefined;
2173
2174            // All but the most significant limb.
2175            for (self.limbs[0 .. self.limbs.len - 1]) |limb| {
2176                const ov = @addWithOverflow(~limb, carry);
2177                add_res = ov[0];
2178                carry = ov[1];
2179                sum += @popCount(add_res);
2180                remaining_bits -= limb_bits; // Asserted not to underflow by fitsInTwosComp
2181            }
2182
2183            // The most significant limb may have fewer than @bitSizeOf(Limb) meaningful bits,
2184            // which we can detect with @clz().
2185            // There may also be fewer limbs than needed to fill bit_count.
2186            const limb = self.limbs[self.limbs.len - 1];
2187            const leading_zeroes = @clz(limb);
2188            // The most significant limb is asserted not to be all 0s (above),
2189            // so ~limb cannot be all 1s, and ~limb + 1 cannot overflow.
2190            sum += @popCount(~limb + carry);
2191            sum -= leading_zeroes; // All leading zeroes were flipped and added to sum, so undo those
2192            const remaining_ones = remaining_bits - (limb_bits - leading_zeroes); // All bits not covered by limbs
2193            sum += remaining_ones;
2194        }
2195        return sum;
2196    }
2197
2198    pub fn fitsInTwosComp(self: Const, signedness: Signedness, bit_count: usize) bool {
2199        if (self.eqlZero()) {
2200            return true;
2201        }
2202        if (signedness == .unsigned and !self.positive) {
2203            return false;
2204        }
2205        return bit_count >= self.bitCountTwosCompForSignedness(signedness);
2206    }
2207
2208    /// Returns whether self can fit into an integer of the requested type.
2209    pub fn fits(self: Const, comptime T: type) bool {
2210        const info = @typeInfo(T).int;
2211        return self.fitsInTwosComp(info.signedness, info.bits);
2212    }
2213
2214    /// Returns the approximate size of the integer in the given base. Negative values accommodate for
2215    /// the minus sign. This is used for determining the number of characters needed to print the
2216    /// value. It is inexact and may exceed the given value by ~1-2 bytes.
2217    /// TODO See if we can make this exact.
2218    pub fn sizeInBaseUpperBound(self: Const, base: usize) usize {
2219        const bit_count = @as(usize, @intFromBool(!self.positive)) + self.bitCountAbs();
2220        return (bit_count / math.log2(base)) + 2;
2221    }
2222
2223    pub const ConvertError = error{
2224        NegativeIntoUnsigned,
2225        TargetTooSmall,
2226    };
2227
2228    /// Convert `self` to `Int`.
2229    ///
2230    /// Returns an error if self cannot be narrowed into the requested type without truncation.
2231    pub fn toInt(self: Const, comptime Int: type) ConvertError!Int {
2232        switch (@typeInfo(Int)) {
2233            .int => |info| {
2234                // Make sure -0 is handled correctly.
2235                if (self.eqlZero()) return 0;
2236
2237                const Unsigned = std.meta.Int(.unsigned, info.bits);
2238
2239                if (!self.fitsInTwosComp(info.signedness, info.bits)) {
2240                    return error.TargetTooSmall;
2241                }
2242
2243                var r: Unsigned = 0;
2244
2245                if (@sizeOf(Unsigned) <= @sizeOf(Limb)) {
2246                    r = @intCast(self.limbs[0]);
2247                } else {
2248                    for (self.limbs[0..self.limbs.len], 0..) |_, ri| {
2249                        const limb = self.limbs[self.limbs.len - ri - 1];
2250                        r <<= limb_bits;
2251                        r |= limb;
2252                    }
2253                }
2254
2255                if (info.signedness == .unsigned) {
2256                    return if (self.positive) @intCast(r) else error.NegativeIntoUnsigned;
2257                } else {
2258                    if (self.positive) {
2259                        return @intCast(r);
2260                    } else {
2261                        if (math.cast(Int, r)) |ok| {
2262                            return -ok;
2263                        } else {
2264                            return minInt(Int);
2265                        }
2266                    }
2267                }
2268            },
2269            else => @compileError("expected int type, found '" ++ @typeName(Int) ++ "'"),
2270        }
2271    }
2272
2273    /// Convert self to `Float`.
2274    pub fn toFloat(self: Const, comptime Float: type, round: Round) struct { Float, Exactness } {
2275        if (Float == comptime_float) return self.toFloat(f128, round);
2276        const normalized_abs: Const = .{
2277            .limbs = self.limbs[0..llnormalize(self.limbs)],
2278            .positive = true,
2279        };
2280        if (normalized_abs.eqlZero()) return .{ if (self.positive) 0.0 else -0.0, .exact };
2281
2282        const Repr = std.math.FloatRepr(Float);
2283        var mantissa_limbs: [calcNonZeroTwosCompLimbCount(1 + @bitSizeOf(Repr.Mantissa))]Limb = undefined;
2284        var mantissa: Mutable = .{
2285            .limbs = &mantissa_limbs,
2286            .positive = undefined,
2287            .len = undefined,
2288        };
2289        var exponent = normalized_abs.bitCountAbs() - 1;
2290        const exactness: Exactness = exactness: {
2291            if (exponent <= @bitSizeOf(Repr.Normalized.Fraction)) {
2292                mantissa.shiftLeft(normalized_abs, @intCast(@bitSizeOf(Repr.Normalized.Fraction) - exponent));
2293                break :exactness .exact;
2294            }
2295            const shift: usize = @intCast(exponent - @bitSizeOf(Repr.Normalized.Fraction));
2296            mantissa.shiftRight(normalized_abs, shift);
2297            const final_limb_index = (shift - 1) / limb_bits;
2298            const round_bits = normalized_abs.limbs[final_limb_index] << @truncate(-%shift) |
2299                @intFromBool(!std.mem.allEqual(Limb, normalized_abs.limbs[0..final_limb_index], 0));
2300            if (round_bits == 0) break :exactness .exact;
2301            round: switch (round) {
2302                .nearest_even => {
2303                    const half: Limb = 1 << (limb_bits - 1);
2304                    if (round_bits >= half) mantissa.addScalar(mantissa.toConst(), 1);
2305                    if (round_bits == half) mantissa.limbs[0] &= ~@as(Limb, 1);
2306                },
2307                .away => mantissa.addScalar(mantissa.toConst(), 1),
2308                .trunc => {},
2309                .floor => if (!self.positive) continue :round .away,
2310                .ceil => if (self.positive) continue :round .away,
2311            }
2312            break :exactness .inexact;
2313        };
2314        const normalized_res: Repr.Normalized = .{
2315            .fraction = @truncate(mantissa.toInt(Repr.Mantissa) catch |err| switch (err) {
2316                error.NegativeIntoUnsigned => unreachable,
2317                error.TargetTooSmall => fraction: {
2318                    assert(mantissa.toConst().orderAgainstScalar(1 << @bitSizeOf(Repr.Mantissa)).compare(.eq));
2319                    exponent += 1;
2320                    break :fraction 1 << (@bitSizeOf(Repr.Mantissa) - 1);
2321                },
2322            }),
2323            .exponent = std.math.lossyCast(Repr.Normalized.Exponent, exponent),
2324        };
2325        return .{ normalized_res.reconstruct(if (self.positive) .positive else .negative), exactness };
2326    }
2327
2328    pub fn format(self: Const, w: *std.Io.Writer) std.Io.Writer.Error!void {
2329        return self.formatNumber(w, .{});
2330    }
2331
2332    /// If the absolute value of integer is greater than or equal to `pow(2, 64 * @sizeOf(usize) * 8)`,
2333    /// this function will fail to print the string, printing "(BigInt)" instead of a number.
2334    /// This is because the rendering algorithm requires reversing a string, which requires O(N) memory.
2335    /// See `toString` and `toStringAlloc` for a way to print big integers without failure.
2336    pub fn formatNumber(self: Const, w: *std.Io.Writer, number: std.fmt.Number) std.Io.Writer.Error!void {
2337        const available_len = 64;
2338        if (self.limbs.len > available_len)
2339            return w.writeAll("(BigInt)");
2340
2341        var limbs: [calcToStringLimbsBufferLen(available_len, 10)]Limb = undefined;
2342
2343        const biggest: Const = .{
2344            .limbs = &([1]Limb{comptime math.maxInt(Limb)} ** available_len),
2345            .positive = false,
2346        };
2347        var buf: [biggest.sizeInBaseUpperBound(2)]u8 = undefined;
2348        const base: u8 = number.mode.base() orelse @panic("TODO print big int in scientific form");
2349        const len = self.toString(&buf, base, number.case, &limbs);
2350        return w.writeAll(buf[0..len]);
2351    }
2352
2353    /// Converts self to a string in the requested base.
2354    /// Caller owns returned memory.
2355    /// Asserts that `base` is in the range [2, 36].
2356    /// See also `toString`, a lower level function than this.
2357    pub fn toStringAlloc(self: Const, allocator: Allocator, base: u8, case: std.fmt.Case) Allocator.Error![]u8 {
2358        assert(base >= 2);
2359        assert(base <= 36);
2360
2361        if (self.eqlZero()) {
2362            return allocator.dupe(u8, "0");
2363        }
2364        const string = try allocator.alloc(u8, self.sizeInBaseUpperBound(base));
2365        errdefer allocator.free(string);
2366
2367        const limbs = try allocator.alloc(Limb, calcToStringLimbsBufferLen(self.limbs.len, base));
2368        defer allocator.free(limbs);
2369
2370        return allocator.realloc(string, self.toString(string, base, case, limbs));
2371    }
2372
2373    /// Converts self to a string in the requested base.
2374    /// Asserts that `base` is in the range [2, 36].
2375    /// `string` is a caller-provided slice of at least `sizeInBaseUpperBound` bytes,
2376    /// where the result is written to.
2377    /// Returns the length of the string.
2378    /// `limbs_buffer` is caller-provided memory for `toString` to use as a working area. It must have
2379    /// length of at least `calcToStringLimbsBufferLen`.
2380    /// In the case of power-of-two base, `limbs_buffer` is ignored.
2381    /// See also `toStringAlloc`, a higher level function than this.
2382    pub fn toString(self: Const, string: []u8, base: u8, case: std.fmt.Case, limbs_buffer: []Limb) usize {
2383        assert(base >= 2);
2384        assert(base <= 36);
2385
2386        if (self.eqlZero()) {
2387            string[0] = '0';
2388            return 1;
2389        }
2390
2391        var digits_len: usize = 0;
2392
2393        // Power of two: can do a single pass and use masks to extract digits.
2394        if (math.isPowerOfTwo(base)) {
2395            const base_shift = math.log2_int(Limb, base);
2396
2397            outer: for (self.limbs[0..self.limbs.len]) |limb| {
2398                var shift: usize = 0;
2399                while (shift < limb_bits) : (shift += base_shift) {
2400                    const r = @as(u8, @intCast((limb >> @as(Log2Limb, @intCast(shift))) & @as(Limb, base - 1)));
2401                    const ch = std.fmt.digitToChar(r, case);
2402                    string[digits_len] = ch;
2403                    digits_len += 1;
2404                    // If we hit the end, it must be all zeroes from here.
2405                    if (digits_len == string.len) break :outer;
2406                }
2407            }
2408
2409            // Always will have a non-zero digit somewhere.
2410            while (string[digits_len - 1] == '0') {
2411                digits_len -= 1;
2412            }
2413        } else {
2414            // Non power-of-two: batch divisions per word size.
2415            // We use a HalfLimb here so the division uses the faster lldiv0p5 over lldiv1 codepath.
2416            const digits_per_limb = math.log(HalfLimb, base, maxInt(HalfLimb));
2417            var limb_base: Limb = 1;
2418            var j: usize = 0;
2419            while (j < digits_per_limb) : (j += 1) {
2420                limb_base *= base;
2421            }
2422            const b: Const = .{ .limbs = &[_]Limb{limb_base}, .positive = true };
2423
2424            var q: Mutable = .{
2425                .limbs = limbs_buffer[0 .. self.limbs.len + 2],
2426                .positive = true, // Make absolute by ignoring self.positive.
2427                .len = self.limbs.len,
2428            };
2429            @memcpy(q.limbs[0..self.limbs.len], self.limbs);
2430
2431            var r: Mutable = .{
2432                .limbs = limbs_buffer[q.limbs.len..][0..self.limbs.len],
2433                .positive = true,
2434                .len = 1,
2435            };
2436            r.limbs[0] = 0;
2437
2438            const rest_of_the_limbs_buf = limbs_buffer[q.limbs.len + r.limbs.len ..];
2439
2440            while (q.len >= 2) {
2441                // Passing an allocator here would not be helpful since this division is destroying
2442                // information, not creating it. [TODO citation needed]
2443                q.divTrunc(&r, q.toConst(), b, rest_of_the_limbs_buf);
2444
2445                var r_word = r.limbs[0];
2446                var i: usize = 0;
2447                while (i < digits_per_limb) : (i += 1) {
2448                    const ch = std.fmt.digitToChar(@as(u8, @intCast(r_word % base)), case);
2449                    r_word /= base;
2450                    string[digits_len] = ch;
2451                    digits_len += 1;
2452                }
2453            }
2454
2455            {
2456                assert(q.len == 1);
2457
2458                var r_word = q.limbs[0];
2459                while (r_word != 0) {
2460                    const ch = std.fmt.digitToChar(@as(u8, @intCast(r_word % base)), case);
2461                    r_word /= base;
2462                    string[digits_len] = ch;
2463                    digits_len += 1;
2464                }
2465            }
2466        }
2467
2468        if (!self.positive) {
2469            string[digits_len] = '-';
2470            digits_len += 1;
2471        }
2472
2473        const s = string[0..digits_len];
2474        mem.reverse(u8, s);
2475        return s.len;
2476    }
2477
2478    /// Write the value of `x` into `buffer`
2479    /// Asserts that `buffer` is large enough to store the value.
2480    ///
2481    /// `buffer` is filled so that its contents match what would be observed via
2482    /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`,
2483    /// and any required padding bits are added on the MSB end.
2484    pub fn writeTwosComplement(x: Const, buffer: []u8, endian: Endian) void {
2485        return writePackedTwosComplement(x, buffer, 0, 8 * buffer.len, endian);
2486    }
2487
2488    /// Write the value of `x` to a packed memory `buffer`.
2489    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`
2490    /// at offset `bit_offset`.
2491    ///
2492    /// This is equivalent to storing the value of an integer with `bit_count` bits as
2493    /// if it were a field in packed memory at the provided bit offset.
2494    pub fn writePackedTwosComplement(x: Const, buffer: []u8, bit_offset: usize, bit_count: usize, endian: Endian) void {
2495        assert(x.fitsInTwosComp(if (x.positive) .unsigned else .signed, bit_count));
2496
2497        // Copy all complete limbs
2498        var carry: u1 = 1;
2499        var limb_index: usize = 0;
2500        var bit_index: usize = 0;
2501        while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) {
2502            var limb: Limb = if (limb_index < x.limbs.len) x.limbs[limb_index] else 0;
2503
2504            // 2's complement (bitwise not, then add carry bit)
2505            if (!x.positive) {
2506                const ov = @addWithOverflow(~limb, carry);
2507                limb = ov[0];
2508                carry = ov[1];
2509            }
2510
2511            // Write one Limb of bits
2512            mem.writePackedInt(Limb, buffer, bit_index + bit_offset, limb, endian);
2513            bit_index += @bitSizeOf(Limb);
2514        }
2515
2516        // Copy the remaining bits
2517        if (bit_count != bit_index) {
2518            var limb: Limb = if (limb_index < x.limbs.len) x.limbs[limb_index] else 0;
2519
2520            // 2's complement (bitwise not, then add carry bit)
2521            if (!x.positive) limb = ~limb +% carry;
2522
2523            // Write all remaining bits
2524            mem.writeVarPackedInt(buffer, bit_index + bit_offset, bit_count - bit_index, limb, endian);
2525        }
2526    }
2527
2528    /// Returns `math.Order.lt`, `math.Order.eq`, `math.Order.gt` if
2529    /// `|a| < |b|`, `|a| == |b|`, or `|a| > |b|` respectively.
2530    pub fn orderAbs(a: Const, b: Const) math.Order {
2531        if (a.limbs.len < b.limbs.len) {
2532            return .lt;
2533        }
2534        if (a.limbs.len > b.limbs.len) {
2535            return .gt;
2536        }
2537
2538        var i: usize = a.limbs.len - 1;
2539        while (i != 0) : (i -= 1) {
2540            if (a.limbs[i] != b.limbs[i]) {
2541                break;
2542            }
2543        }
2544
2545        if (a.limbs[i] < b.limbs[i]) {
2546            return .lt;
2547        } else if (a.limbs[i] > b.limbs[i]) {
2548            return .gt;
2549        } else {
2550            return .eq;
2551        }
2552    }
2553
2554    /// Returns `math.Order.lt`, `math.Order.eq`, `math.Order.gt` if `a < b`, `a == b` or `a > b` respectively.
2555    pub fn order(a: Const, b: Const) math.Order {
2556        if (a.positive != b.positive) {
2557            if (eqlZero(a) and eqlZero(b)) {
2558                return .eq;
2559            } else {
2560                return if (a.positive) .gt else .lt;
2561            }
2562        } else {
2563            const r = orderAbs(a, b);
2564            return if (a.positive) r else switch (r) {
2565                .lt => math.Order.gt,
2566                .eq => math.Order.eq,
2567                .gt => math.Order.lt,
2568            };
2569        }
2570    }
2571
2572    /// Same as `order` but the right-hand operand is a primitive integer.
2573    pub fn orderAgainstScalar(lhs: Const, scalar: anytype) math.Order {
2574        // Normally we could just determine the number of limbs needed with calcLimbLen,
2575        // but that is not comptime-known when scalar is not a comptime_int.  Instead, we
2576        // use calcTwosCompLimbCount for a non-comptime_int scalar, which can be pessimistic
2577        // in the case that scalar happens to be small in magnitude within its type, but it
2578        // is well worth being able to use the stack and not needing an allocator passed in.
2579        // Note that Mutable.init still sets len to calcLimbLen(scalar) in any case.
2580        const limbs_len = comptime switch (@typeInfo(@TypeOf(scalar))) {
2581            .comptime_int => calcLimbLen(scalar),
2582            .int => |info| calcTwosCompLimbCount(info.bits),
2583            else => @compileError("expected scalar to be an int"),
2584        };
2585        var limbs: [limbs_len]Limb = undefined;
2586        const rhs = Mutable.init(&limbs, scalar);
2587        return order(lhs, rhs.toConst());
2588    }
2589
2590    /// Returns true if `a == 0`.
2591    pub fn eqlZero(a: Const) bool {
2592        var d: Limb = 0;
2593        for (a.limbs) |limb| d |= limb;
2594        return d == 0;
2595    }
2596
2597    /// Returns true if `|a| == |b|`.
2598    pub fn eqlAbs(a: Const, b: Const) bool {
2599        return orderAbs(a, b) == .eq;
2600    }
2601
2602    /// Returns true if `a == b`.
2603    pub fn eql(a: Const, b: Const) bool {
2604        return order(a, b) == .eq;
2605    }
2606
2607    /// Returns the number of leading zeros in twos-complement form.
2608    pub fn clz(a: Const, bits: Limb) Limb {
2609        // Limbs are stored in little-endian order but we need to iterate big-endian.
2610        if (!a.positive and !a.eqlZero()) return 0;
2611        var total_limb_lz: Limb = 0;
2612        var i: usize = a.limbs.len;
2613        const bits_per_limb = @bitSizeOf(Limb);
2614        while (i != 0) {
2615            i -= 1;
2616            const this_limb_lz = @clz(a.limbs[i]);
2617            total_limb_lz += this_limb_lz;
2618            if (this_limb_lz != bits_per_limb) break;
2619        }
2620        const total_limb_bits = a.limbs.len * bits_per_limb;
2621        return total_limb_lz + bits - total_limb_bits;
2622    }
2623
2624    /// Returns the number of trailing zeros in twos-complement form.
2625    pub fn ctz(a: Const, bits: Limb) Limb {
2626        // Limbs are stored in little-endian order. Converting a negative number to twos-complement
2627        // flips all bits above the lowest set bit, which does not affect the trailing zero count.
2628        if (a.eqlZero()) return bits;
2629        var result: Limb = 0;
2630        for (a.limbs) |limb| {
2631            const limb_tz = @ctz(limb);
2632            result += limb_tz;
2633            if (limb_tz != @bitSizeOf(Limb)) break;
2634        }
2635        return @min(result, bits);
2636    }
2637};
2638
2639/// An arbitrary-precision big integer along with an allocator which manages the memory.
2640///
2641/// Memory is allocated as needed to ensure operations never overflow. The range
2642/// is bounded only by available memory.
2643pub const Managed = struct {
2644    pub const sign_bit: usize = 1 << (@typeInfo(usize).int.bits - 1);
2645
2646    /// Default number of limbs to allocate on creation of a `Managed`.
2647    pub const default_capacity = 4;
2648
2649    /// Allocator used by the Managed when requesting memory.
2650    allocator: Allocator,
2651
2652    /// Raw digits. These are:
2653    ///
2654    /// * Little-endian ordered
2655    /// * limbs.len >= 1
2656    /// * Zero is represent as Managed.len() == 1 with limbs[0] == 0.
2657    ///
2658    /// Accessing limbs directly should be avoided.
2659    limbs: []Limb,
2660
2661    /// High bit is the sign bit. If set, Managed is negative, else Managed is positive.
2662    /// The remaining bits represent the number of limbs used by Managed.
2663    metadata: usize,
2664
2665    /// Creates a new `Managed`. `default_capacity` limbs will be allocated immediately.
2666    /// The integer value after initializing is `0`.
2667    pub fn init(allocator: Allocator) !Managed {
2668        return initCapacity(allocator, default_capacity);
2669    }
2670
2671    pub fn toMutable(self: Managed) Mutable {
2672        return .{
2673            .limbs = self.limbs,
2674            .positive = self.isPositive(),
2675            .len = self.len(),
2676        };
2677    }
2678
2679    pub fn toConst(self: Managed) Const {
2680        return .{
2681            .limbs = self.limbs[0..self.len()],
2682            .positive = self.isPositive(),
2683        };
2684    }
2685
2686    /// Creates a new `Managed` with value `value`.
2687    ///
2688    /// This is identical to an `init`, followed by a `set`.
2689    pub fn initSet(allocator: Allocator, value: anytype) !Managed {
2690        var s = try Managed.init(allocator);
2691        errdefer s.deinit();
2692        try s.set(value);
2693        return s;
2694    }
2695
2696    /// Creates a new Managed with a specific capacity. If capacity < default_capacity then the
2697    /// default capacity will be used instead.
2698    /// The integer value after initializing is `0`.
2699    pub fn initCapacity(allocator: Allocator, capacity: usize) !Managed {
2700        return Managed{
2701            .allocator = allocator,
2702            .metadata = 1,
2703            .limbs = block: {
2704                const limbs = try allocator.alloc(Limb, @max(default_capacity, capacity));
2705                limbs[0] = 0;
2706                break :block limbs;
2707            },
2708        };
2709    }
2710
2711    /// Returns the number of limbs currently in use.
2712    pub fn len(self: Managed) usize {
2713        return self.metadata & ~sign_bit;
2714    }
2715
2716    /// Returns whether an Managed is positive.
2717    pub fn isPositive(self: Managed) bool {
2718        return self.metadata & sign_bit == 0;
2719    }
2720
2721    /// Sets the sign of an Managed.
2722    pub fn setSign(self: *Managed, positive: bool) void {
2723        if (positive) {
2724            self.metadata &= ~sign_bit;
2725        } else {
2726            self.metadata |= sign_bit;
2727        }
2728    }
2729
2730    /// Sets the length of an Managed.
2731    ///
2732    /// If setLen is used, then the Managed must be normalized to suit.
2733    pub fn setLen(self: *Managed, new_len: usize) void {
2734        self.metadata &= sign_bit;
2735        self.metadata |= new_len;
2736    }
2737
2738    pub fn setMetadata(self: *Managed, positive: bool, length: usize) void {
2739        self.metadata = if (positive) length & ~sign_bit else length | sign_bit;
2740    }
2741
2742    /// Ensures an Managed has enough space allocated for capacity limbs. If the Managed does not have
2743    /// sufficient capacity, the exact amount will be allocated. This occurs even if the requested
2744    /// capacity is only greater than the current capacity by one limb.
2745    pub fn ensureCapacity(self: *Managed, capacity: usize) !void {
2746        if (capacity <= self.limbs.len) {
2747            return;
2748        }
2749        self.limbs = try self.allocator.realloc(self.limbs, capacity);
2750    }
2751
2752    /// Frees all associated memory.
2753    pub fn deinit(self: *Managed) void {
2754        self.allocator.free(self.limbs);
2755        self.* = undefined;
2756    }
2757
2758    /// Returns a `Managed` with the same value. The returned `Managed` is a deep copy and
2759    /// can be modified separately from the original, and its resources are managed
2760    /// separately from the original.
2761    pub fn clone(other: Managed) !Managed {
2762        return other.cloneWithDifferentAllocator(other.allocator);
2763    }
2764
2765    pub fn cloneWithDifferentAllocator(other: Managed, allocator: Allocator) !Managed {
2766        return Managed{
2767            .allocator = allocator,
2768            .metadata = other.metadata,
2769            .limbs = block: {
2770                const limbs = try allocator.alloc(Limb, other.len());
2771                @memcpy(limbs, other.limbs[0..other.len()]);
2772                break :block limbs;
2773            },
2774        };
2775    }
2776
2777    /// Copies the value of the integer to an existing `Managed` so that they both have the same value.
2778    /// Extra memory will be allocated if the receiver does not have enough capacity.
2779    pub fn copy(self: *Managed, other: Const) !void {
2780        if (self.limbs.ptr == other.limbs.ptr) return;
2781
2782        try self.ensureCapacity(other.limbs.len);
2783        @memcpy(self.limbs[0..other.limbs.len], other.limbs[0..other.limbs.len]);
2784        self.setMetadata(other.positive, other.limbs.len);
2785    }
2786
2787    /// Efficiently swap a `Managed` with another. This swaps the limb pointers and a full copy is not
2788    /// performed. The address of the limbs field will not be the same after this function.
2789    pub fn swap(self: *Managed, other: *Managed) void {
2790        mem.swap(Managed, self, other);
2791    }
2792
2793    /// Debugging tool: prints the state to stderr.
2794    pub fn dump(self: Managed) void {
2795        for (self.limbs[0..self.len()]) |limb| {
2796            std.debug.print("{x} ", .{limb});
2797        }
2798        std.debug.print("len={} capacity={} positive={}\n", .{ self.len(), self.limbs.len, self.isPositive() });
2799    }
2800
2801    /// Negate the sign.
2802    pub fn negate(self: *Managed) void {
2803        self.metadata ^= sign_bit;
2804    }
2805
2806    /// Make positive.
2807    pub fn abs(self: *Managed) void {
2808        self.metadata &= ~sign_bit;
2809    }
2810
2811    pub fn isOdd(self: Managed) bool {
2812        return self.limbs[0] & 1 != 0;
2813    }
2814
2815    pub fn isEven(self: Managed) bool {
2816        return !self.isOdd();
2817    }
2818
2819    /// Returns the number of bits required to represent the absolute value of an integer.
2820    pub fn bitCountAbs(self: Managed) usize {
2821        return self.toConst().bitCountAbs();
2822    }
2823
2824    /// Returns the number of bits required to represent the integer in twos-complement form.
2825    ///
2826    /// If the integer is negative the value returned is the number of bits needed by a signed
2827    /// integer to represent the value. If positive the value is the number of bits for an
2828    /// unsigned integer. Any unsigned integer will fit in the signed integer with bitcount
2829    /// one greater than the returned value.
2830    ///
2831    /// e.g. -127 returns 8 as it will fit in an i8. 127 returns 7 since it fits in a u7.
2832    pub fn bitCountTwosComp(self: Managed) usize {
2833        return self.toConst().bitCountTwosComp();
2834    }
2835
2836    pub fn fitsInTwosComp(self: Managed, signedness: Signedness, bit_count: usize) bool {
2837        return self.toConst().fitsInTwosComp(signedness, bit_count);
2838    }
2839
2840    /// Returns whether self can fit into an integer of the requested type.
2841    pub fn fits(self: Managed, comptime T: type) bool {
2842        return self.toConst().fits(T);
2843    }
2844
2845    /// Returns the approximate size of the integer in the given base. Negative values accommodate for
2846    /// the minus sign. This is used for determining the number of characters needed to print the
2847    /// value. It is inexact and may exceed the given value by ~1-2 bytes.
2848    pub fn sizeInBaseUpperBound(self: Managed, base: usize) usize {
2849        return self.toConst().sizeInBaseUpperBound(base);
2850    }
2851
2852    /// Sets an Managed to value. Value must be an primitive integer type.
2853    pub fn set(self: *Managed, value: anytype) Allocator.Error!void {
2854        try self.ensureCapacity(calcLimbLen(value));
2855        var m = self.toMutable();
2856        m.set(value);
2857        self.setMetadata(m.positive, m.len);
2858    }
2859
2860    pub const ConvertError = Const.ConvertError;
2861
2862    /// Convert `self` to `Int`.
2863    ///
2864    /// Returns an error if self cannot be narrowed into the requested type without truncation.
2865    pub fn toInt(self: Managed, comptime Int: type) ConvertError!Int {
2866        return self.toConst().toInt(Int);
2867    }
2868
2869    /// Convert `self` to `Float`.
2870    pub fn toFloat(self: Managed, comptime Float: type, round: Round) struct { Float, Exactness } {
2871        return self.toConst().toFloat(Float, round);
2872    }
2873
2874    /// Set self from the string representation `value`.
2875    ///
2876    /// `value` must contain only digits <= `base` and is case insensitive.  Base prefixes are
2877    /// not allowed (e.g. 0x43 should simply be 43).  Underscores in the input string are
2878    /// ignored and can be used as digit separators.
2879    ///
2880    /// Returns an error if memory could not be allocated or `value` has invalid digits for the
2881    /// requested base.
2882    ///
2883    /// self's allocator is used for temporary storage to boost multiplication performance.
2884    pub fn setString(self: *Managed, base: u8, value: []const u8) !void {
2885        if (base < 2 or base > 36) return error.InvalidBase;
2886        try self.ensureCapacity(calcSetStringLimbCount(base, value.len));
2887        const limbs_buffer = try self.allocator.alloc(Limb, calcSetStringLimbsBufferLen(base, value.len));
2888        defer self.allocator.free(limbs_buffer);
2889        var m = self.toMutable();
2890        try m.setString(base, value, limbs_buffer, self.allocator);
2891        self.setMetadata(m.positive, m.len);
2892    }
2893
2894    /// Set self to either bound of a 2s-complement integer.
2895    /// Note: The result is still sign-magnitude, not twos complement! In order to convert the
2896    /// result to twos complement, it is sufficient to take the absolute value.
2897    pub fn setTwosCompIntLimit(
2898        r: *Managed,
2899        limit: TwosCompIntLimit,
2900        signedness: Signedness,
2901        bit_count: usize,
2902    ) !void {
2903        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
2904        var m = r.toMutable();
2905        m.setTwosCompIntLimit(limit, signedness, bit_count);
2906        r.setMetadata(m.positive, m.len);
2907    }
2908
2909    /// Converts self to a string in the requested base. Memory is allocated from the provided
2910    /// allocator and not the one present in self.
2911    pub fn toString(self: Managed, allocator: Allocator, base: u8, case: std.fmt.Case) ![]u8 {
2912        if (base < 2 or base > 36) return error.InvalidBase;
2913        return self.toConst().toStringAlloc(allocator, base, case);
2914    }
2915
2916    /// To allow `std.fmt.format` to work with `Managed`.
2917    pub fn format(self: Managed, w: *std.Io.Writer) std.Io.Writer.Error!void {
2918        return formatNumber(self, w, .{});
2919    }
2920
2921    /// If the absolute value of integer is greater than or equal to `pow(2, 64 * @sizeOf(usize) * 8)`,
2922    /// this function will fail to print the string, printing "(BigInt)" instead of a number.
2923    /// This is because the rendering algorithm requires reversing a string, which requires O(N) memory.
2924    /// See `toString` and `toStringAlloc` for a way to print big integers without failure.
2925    pub fn formatNumber(self: Managed, w: *std.Io.Writer, n: std.fmt.Number) std.Io.Writer.Error!void {
2926        return self.toConst().formatNumber(w, n);
2927    }
2928
2929    /// Returns math.Order.lt, math.Order.eq, math.Order.gt if |a| < |b|, |a| ==
2930    /// |b| or |a| > |b| respectively.
2931    pub fn orderAbs(a: Managed, b: Managed) math.Order {
2932        return a.toConst().orderAbs(b.toConst());
2933    }
2934
2935    /// Returns math.Order.lt, math.Order.eq, math.Order.gt if a < b, a == b or a > b
2936    /// respectively.
2937    pub fn order(a: Managed, b: Managed) math.Order {
2938        return a.toConst().order(b.toConst());
2939    }
2940
2941    /// Returns true if a == 0.
2942    pub fn eqlZero(a: Managed) bool {
2943        return a.toConst().eqlZero();
2944    }
2945
2946    /// Returns true if |a| == |b|.
2947    pub fn eqlAbs(a: Managed, b: Managed) bool {
2948        return a.toConst().eqlAbs(b.toConst());
2949    }
2950
2951    /// Returns true if a == b.
2952    pub fn eql(a: Managed, b: Managed) bool {
2953        return a.toConst().eql(b.toConst());
2954    }
2955
2956    /// Normalize a possible sequence of leading zeros.
2957    ///
2958    /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4]
2959    /// [1, 2, 0, 0, 0] -> [1, 2]
2960    /// [0, 0, 0, 0, 0] -> [0]
2961    pub fn normalize(r: *Managed, length: usize) void {
2962        assert(length > 0);
2963        assert(length <= r.limbs.len);
2964
2965        var j = length;
2966        while (j > 0) : (j -= 1) {
2967            if (r.limbs[j - 1] != 0) {
2968                break;
2969            }
2970        }
2971
2972        // Handle zero
2973        r.setLen(if (j != 0) j else 1);
2974    }
2975
2976    /// r = a + scalar
2977    ///
2978    /// r and a may be aliases.
2979    ///
2980    /// Returns an error if memory could not be allocated.
2981    pub fn addScalar(r: *Managed, a: *const Managed, scalar: anytype) Allocator.Error!void {
2982        try r.ensureAddScalarCapacity(a.toConst(), scalar);
2983        var m = r.toMutable();
2984        m.addScalar(a.toConst(), scalar);
2985        r.setMetadata(m.positive, m.len);
2986    }
2987
2988    /// r = a + b
2989    ///
2990    /// r, a and b may be aliases.
2991    ///
2992    /// Returns an error if memory could not be allocated.
2993    pub fn add(r: *Managed, a: *const Managed, b: *const Managed) Allocator.Error!void {
2994        try r.ensureAddCapacity(a.toConst(), b.toConst());
2995        var m = r.toMutable();
2996        m.add(a.toConst(), b.toConst());
2997        r.setMetadata(m.positive, m.len);
2998    }
2999
3000    /// r = a + b with 2s-complement wrapping semantics. Returns whether any overflow occurred.
3001    ///
3002    /// r, a and b may be aliases.
3003    ///
3004    /// Returns an error if memory could not be allocated.
3005    pub fn addWrap(
3006        r: *Managed,
3007        a: *const Managed,
3008        b: *const Managed,
3009        signedness: Signedness,
3010        bit_count: usize,
3011    ) Allocator.Error!bool {
3012        try r.ensureTwosCompCapacity(bit_count);
3013        var m = r.toMutable();
3014        const wrapped = m.addWrap(a.toConst(), b.toConst(), signedness, bit_count);
3015        r.setMetadata(m.positive, m.len);
3016        return wrapped;
3017    }
3018
3019    /// r = a + b with 2s-complement saturating semantics.
3020    ///
3021    /// r, a and b may be aliases.
3022    ///
3023    /// Returns an error if memory could not be allocated.
3024    pub fn addSat(r: *Managed, a: *const Managed, b: *const Managed, signedness: Signedness, bit_count: usize) Allocator.Error!void {
3025        try r.ensureTwosCompCapacity(bit_count);
3026        var m = r.toMutable();
3027        m.addSat(a.toConst(), b.toConst(), signedness, bit_count);
3028        r.setMetadata(m.positive, m.len);
3029    }
3030
3031    /// r = a - b
3032    ///
3033    /// r, a and b may be aliases.
3034    ///
3035    /// Returns an error if memory could not be allocated.
3036    pub fn sub(r: *Managed, a: *const Managed, b: *const Managed) !void {
3037        try r.ensureCapacity(@max(a.len(), b.len()) + 1);
3038        var m = r.toMutable();
3039        m.sub(a.toConst(), b.toConst());
3040        r.setMetadata(m.positive, m.len);
3041    }
3042
3043    /// r = a - b with 2s-complement wrapping semantics. Returns whether any overflow occurred.
3044    ///
3045    /// r, a and b may be aliases.
3046    ///
3047    /// Returns an error if memory could not be allocated.
3048    pub fn subWrap(
3049        r: *Managed,
3050        a: *const Managed,
3051        b: *const Managed,
3052        signedness: Signedness,
3053        bit_count: usize,
3054    ) Allocator.Error!bool {
3055        try r.ensureTwosCompCapacity(bit_count);
3056        var m = r.toMutable();
3057        const wrapped = m.subWrap(a.toConst(), b.toConst(), signedness, bit_count);
3058        r.setMetadata(m.positive, m.len);
3059        return wrapped;
3060    }
3061
3062    /// r = a - b with 2s-complement saturating semantics.
3063    ///
3064    /// r, a and b may be aliases.
3065    ///
3066    /// Returns an error if memory could not be allocated.
3067    pub fn subSat(
3068        r: *Managed,
3069        a: *const Managed,
3070        b: *const Managed,
3071        signedness: Signedness,
3072        bit_count: usize,
3073    ) Allocator.Error!void {
3074        try r.ensureTwosCompCapacity(bit_count);
3075        var m = r.toMutable();
3076        m.subSat(a.toConst(), b.toConst(), signedness, bit_count);
3077        r.setMetadata(m.positive, m.len);
3078    }
3079
3080    /// rma = a * b
3081    ///
3082    /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
3083    ///
3084    /// Returns an error if memory could not be allocated.
3085    ///
3086    /// rma's allocator is used for temporary storage to speed up the multiplication.
3087    pub fn mul(rma: *Managed, a: *const Managed, b: *const Managed) !void {
3088        var alias_count: usize = 0;
3089        if (rma.limbs.ptr == a.limbs.ptr)
3090            alias_count += 1;
3091        if (rma.limbs.ptr == b.limbs.ptr)
3092            alias_count += 1;
3093        try rma.ensureMulCapacity(a.toConst(), b.toConst());
3094        var m = rma.toMutable();
3095        if (alias_count == 0) {
3096            m.mulNoAlias(a.toConst(), b.toConst(), rma.allocator);
3097        } else {
3098            const limb_count = calcMulLimbsBufferLen(a.len(), b.len(), alias_count);
3099            const limbs_buffer = try rma.allocator.alloc(Limb, limb_count);
3100            defer rma.allocator.free(limbs_buffer);
3101            m.mul(a.toConst(), b.toConst(), limbs_buffer, rma.allocator);
3102        }
3103        rma.setMetadata(m.positive, m.len);
3104    }
3105
3106    /// rma = a * b with 2s-complement wrapping semantics.
3107    ///
3108    /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
3109    ///
3110    /// Returns an error if memory could not be allocated.
3111    ///
3112    /// rma's allocator is used for temporary storage to speed up the multiplication.
3113    pub fn mulWrap(
3114        rma: *Managed,
3115        a: *const Managed,
3116        b: *const Managed,
3117        signedness: Signedness,
3118        bit_count: usize,
3119    ) !void {
3120        var alias_count: usize = 0;
3121        if (rma.limbs.ptr == a.limbs.ptr)
3122            alias_count += 1;
3123        if (rma.limbs.ptr == b.limbs.ptr)
3124            alias_count += 1;
3125
3126        try rma.ensureTwosCompCapacity(bit_count);
3127        var m = rma.toMutable();
3128        if (alias_count == 0) {
3129            m.mulWrapNoAlias(a.toConst(), b.toConst(), signedness, bit_count, rma.allocator);
3130        } else {
3131            const limb_count = calcMulWrapLimbsBufferLen(bit_count, a.len(), b.len(), alias_count);
3132            const limbs_buffer = try rma.allocator.alloc(Limb, limb_count);
3133            defer rma.allocator.free(limbs_buffer);
3134            m.mulWrap(a.toConst(), b.toConst(), signedness, bit_count, limbs_buffer, rma.allocator);
3135        }
3136        rma.setMetadata(m.positive, m.len);
3137    }
3138
3139    pub fn ensureTwosCompCapacity(r: *Managed, bit_count: usize) !void {
3140        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
3141    }
3142
3143    pub fn ensureAddScalarCapacity(r: *Managed, a: Const, scalar: anytype) !void {
3144        try r.ensureCapacity(@max(a.limbs.len, calcLimbLen(scalar)) + 1);
3145    }
3146
3147    pub fn ensureAddCapacity(r: *Managed, a: Const, b: Const) !void {
3148        try r.ensureCapacity(@max(a.limbs.len, b.limbs.len) + 1);
3149    }
3150
3151    pub fn ensureMulCapacity(rma: *Managed, a: Const, b: Const) !void {
3152        try rma.ensureCapacity(a.limbs.len + b.limbs.len + 1);
3153    }
3154
3155    /// q = a / b (rem r)
3156    ///
3157    /// a / b are floored (rounded towards 0).
3158    ///
3159    /// Returns an error if memory could not be allocated.
3160    pub fn divFloor(q: *Managed, r: *Managed, a: *const Managed, b: *const Managed) !void {
3161        try q.ensureCapacity(a.len());
3162        try r.ensureCapacity(b.len());
3163        var mq = q.toMutable();
3164        var mr = r.toMutable();
3165        const limbs_buffer = try q.allocator.alloc(Limb, calcDivLimbsBufferLen(a.len(), b.len()));
3166        defer q.allocator.free(limbs_buffer);
3167        mq.divFloor(&mr, a.toConst(), b.toConst(), limbs_buffer);
3168        q.setMetadata(mq.positive, mq.len);
3169        r.setMetadata(mr.positive, mr.len);
3170    }
3171
3172    /// q = a / b (rem r)
3173    ///
3174    /// a / b are truncated (rounded towards -inf).
3175    ///
3176    /// Returns an error if memory could not be allocated.
3177    pub fn divTrunc(q: *Managed, r: *Managed, a: *const Managed, b: *const Managed) !void {
3178        try q.ensureCapacity(a.len());
3179        try r.ensureCapacity(b.len());
3180        var mq = q.toMutable();
3181        var mr = r.toMutable();
3182        const limbs_buffer = try q.allocator.alloc(Limb, calcDivLimbsBufferLen(a.len(), b.len()));
3183        defer q.allocator.free(limbs_buffer);
3184        mq.divTrunc(&mr, a.toConst(), b.toConst(), limbs_buffer);
3185        q.setMetadata(mq.positive, mq.len);
3186        r.setMetadata(mr.positive, mr.len);
3187    }
3188
3189    /// r = a << shift, in other words, r = a * 2^shift
3190    /// r and a may alias.
3191    pub fn shiftLeft(r: *Managed, a: *const Managed, shift: usize) !void {
3192        try r.ensureCapacity(a.len() + (shift / limb_bits) + 1);
3193        var m = r.toMutable();
3194        m.shiftLeft(a.toConst(), shift);
3195        r.setMetadata(m.positive, m.len);
3196    }
3197
3198    /// r = a <<| shift with 2s-complement saturating semantics.
3199    /// r and a may alias.
3200    pub fn shiftLeftSat(r: *Managed, a: *const Managed, shift: usize, signedness: Signedness, bit_count: usize) !void {
3201        try r.ensureTwosCompCapacity(bit_count);
3202        var m = r.toMutable();
3203        m.shiftLeftSat(a.toConst(), shift, signedness, bit_count);
3204        r.setMetadata(m.positive, m.len);
3205    }
3206
3207    /// r = a >> shift
3208    /// r and a may alias.
3209    pub fn shiftRight(r: *Managed, a: *const Managed, shift: usize) !void {
3210        if (a.len() <= shift / limb_bits) {
3211            // Shifting negative numbers converges to -1 instead of 0
3212            if (a.isPositive()) {
3213                r.metadata = 1;
3214                r.limbs[0] = 0;
3215            } else {
3216                r.metadata = 1;
3217                r.setSign(false);
3218                r.limbs[0] = 1;
3219            }
3220            return;
3221        }
3222
3223        try r.ensureCapacity(a.len() - (shift / limb_bits));
3224        var m = r.toMutable();
3225        m.shiftRight(a.toConst(), shift);
3226        r.setMetadata(m.positive, m.len);
3227    }
3228
3229    /// r = ~a under 2s-complement wrapping semantics.
3230    /// r and a may alias.
3231    pub fn bitNotWrap(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void {
3232        try r.ensureTwosCompCapacity(bit_count);
3233        var m = r.toMutable();
3234        m.bitNotWrap(a.toConst(), signedness, bit_count);
3235        r.setMetadata(m.positive, m.len);
3236    }
3237
3238    /// r = a | b
3239    ///
3240    /// a and b are zero-extended to the longer of a or b.
3241    pub fn bitOr(r: *Managed, a: *const Managed, b: *const Managed) !void {
3242        try r.ensureCapacity(@max(a.len(), b.len()));
3243        var m = r.toMutable();
3244        m.bitOr(a.toConst(), b.toConst());
3245        r.setMetadata(m.positive, m.len);
3246    }
3247
3248    /// r = a & b
3249    pub fn bitAnd(r: *Managed, a: *const Managed, b: *const Managed) !void {
3250        const cap = if (a.len() >= b.len())
3251            if (b.isPositive()) b.len() else if (a.isPositive()) a.len() else a.len() + 1
3252        else if (a.isPositive()) a.len() else if (b.isPositive()) b.len() else b.len() + 1;
3253
3254        try r.ensureCapacity(cap);
3255        var m = r.toMutable();
3256        m.bitAnd(a.toConst(), b.toConst());
3257        r.setMetadata(m.positive, m.len);
3258    }
3259
3260    /// r = a ^ b
3261    pub fn bitXor(r: *Managed, a: *const Managed, b: *const Managed) !void {
3262        const cap = @max(a.len(), b.len()) + @intFromBool(a.isPositive() != b.isPositive());
3263        try r.ensureCapacity(cap);
3264
3265        var m = r.toMutable();
3266        m.bitXor(a.toConst(), b.toConst());
3267        r.setMetadata(m.positive, m.len);
3268    }
3269
3270    /// rma may alias x or y.
3271    /// x and y may alias each other.
3272    ///
3273    /// rma's allocator is used for temporary storage to boost multiplication performance.
3274    pub fn gcd(rma: *Managed, x: *const Managed, y: *const Managed) !void {
3275        try rma.ensureCapacity(@min(x.len(), y.len()));
3276        var m = rma.toMutable();
3277        var limbs_buffer = std.array_list.Managed(Limb).init(rma.allocator);
3278        defer limbs_buffer.deinit();
3279        try m.gcd(x.toConst(), y.toConst(), &limbs_buffer);
3280        rma.setMetadata(m.positive, m.len);
3281    }
3282
3283    /// r = a * a
3284    pub fn sqr(rma: *Managed, a: *const Managed) !void {
3285        const needed_limbs = 2 * a.len() + 1;
3286
3287        if (rma.limbs.ptr == a.limbs.ptr) {
3288            var m = try Managed.initCapacity(rma.allocator, needed_limbs);
3289            errdefer m.deinit();
3290            var m_mut = m.toMutable();
3291            m_mut.sqrNoAlias(a.toConst(), rma.allocator);
3292            m.setMetadata(m_mut.positive, m_mut.len);
3293
3294            rma.deinit();
3295            rma.swap(&m);
3296        } else {
3297            try rma.ensureCapacity(needed_limbs);
3298            var rma_mut = rma.toMutable();
3299            rma_mut.sqrNoAlias(a.toConst(), rma.allocator);
3300            rma.setMetadata(rma_mut.positive, rma_mut.len);
3301        }
3302    }
3303
3304    pub fn pow(rma: *Managed, a: *const Managed, b: u32) !void {
3305        const needed_limbs = calcPowLimbsBufferLen(a.bitCountAbs(), b);
3306
3307        const limbs_buffer = try rma.allocator.alloc(Limb, needed_limbs);
3308        defer rma.allocator.free(limbs_buffer);
3309
3310        if (rma.limbs.ptr == a.limbs.ptr) {
3311            var m = try Managed.initCapacity(rma.allocator, needed_limbs);
3312            errdefer m.deinit();
3313            var m_mut = m.toMutable();
3314            m_mut.pow(a.toConst(), b, limbs_buffer);
3315            m.setMetadata(m_mut.positive, m_mut.len);
3316
3317            rma.deinit();
3318            rma.swap(&m);
3319        } else {
3320            try rma.ensureCapacity(needed_limbs);
3321            var rma_mut = rma.toMutable();
3322            rma_mut.pow(a.toConst(), b, limbs_buffer);
3323            rma.setMetadata(rma_mut.positive, rma_mut.len);
3324        }
3325    }
3326
3327    /// r = ⌊√a⌋
3328    pub fn sqrt(rma: *Managed, a: *const Managed) !void {
3329        const bit_count = a.bitCountAbs();
3330
3331        if (bit_count == 0) {
3332            try rma.set(0);
3333            rma.setMetadata(a.isPositive(), rma.len());
3334            return;
3335        }
3336
3337        if (!a.isPositive()) {
3338            return error.SqrtOfNegativeNumber;
3339        }
3340
3341        const needed_limbs = calcSqrtLimbsBufferLen(bit_count);
3342        const limbs_buffer = try rma.allocator.alloc(Limb, needed_limbs);
3343        defer rma.allocator.free(limbs_buffer);
3344
3345        try rma.ensureCapacity((a.len() - 1) / 2 + 1);
3346        var m = rma.toMutable();
3347        m.sqrt(a.toConst(), limbs_buffer);
3348        rma.setMetadata(m.positive, m.len);
3349    }
3350
3351    /// r = truncate(Int(signedness, bit_count), a)
3352    pub fn truncate(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void {
3353        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
3354        var m = r.toMutable();
3355        m.truncate(a.toConst(), signedness, bit_count);
3356        r.setMetadata(m.positive, m.len);
3357    }
3358
3359    /// r = saturate(Int(signedness, bit_count), a)
3360    pub fn saturate(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void {
3361        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
3362        var m = r.toMutable();
3363        m.saturate(a.toConst(), signedness, bit_count);
3364        r.setMetadata(m.positive, m.len);
3365    }
3366
3367    /// r = @popCount(a) with 2s-complement semantics.
3368    /// r and a may be aliases.
3369    pub fn popCount(r: *Managed, a: *const Managed, bit_count: usize) !void {
3370        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
3371        var m = r.toMutable();
3372        m.popCount(a.toConst(), bit_count);
3373        r.setMetadata(m.positive, m.len);
3374    }
3375};
3376
3377/// Different operators which can be used in accumulation style functions
3378/// (llmulacc, llmulaccKaratsuba, llmulaccLong, llmulLimb). In all these functions,
3379/// a computed value is accumulated with an existing result.
3380const AccOp = enum {
3381    /// The computed value is added to the result.
3382    add,
3383
3384    /// The computed value is subtracted from the result.
3385    sub,
3386};
3387
3388/// Knuth 4.3.1, Algorithm M.
3389///
3390/// r = r (op) a * b
3391/// r MUST NOT alias any of a or b.
3392///
3393/// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs.
3394fn llmulacc(comptime op: AccOp, opt_allocator: ?Allocator, r: []Limb, a: []const Limb, b: []const Limb) void {
3395    assert(r.len >= a.len);
3396    assert(r.len >= b.len);
3397    assert(!slicesOverlap(r, a));
3398    assert(!slicesOverlap(r, b));
3399
3400    // Order greatest first.
3401    var x = a;
3402    var y = b;
3403    if (a.len < b.len) {
3404        x = b;
3405        y = a;
3406    }
3407
3408    k_mul: {
3409        if (y.len > 48) {
3410            if (opt_allocator) |allocator| {
3411                llmulaccKaratsuba(op, allocator, r, x, y) catch |err| switch (err) {
3412                    error.OutOfMemory => break :k_mul, // handled below
3413                };
3414                return;
3415            }
3416        }
3417    }
3418
3419    llmulaccLong(op, r, x, y);
3420}
3421
3422/// Knuth 4.3.1, Algorithm M.
3423///
3424/// r = r (op) a * b
3425/// r MUST NOT alias any of a or b.
3426///
3427/// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs.
3428fn llmulaccKaratsuba(
3429    comptime op: AccOp,
3430    allocator: Allocator,
3431    r: []Limb,
3432    a: []const Limb,
3433    b: []const Limb,
3434) error{OutOfMemory}!void {
3435    assert(r.len >= a.len);
3436    assert(a.len >= b.len);
3437    assert(!slicesOverlap(r, a));
3438    assert(!slicesOverlap(r, b));
3439
3440    // Classical karatsuba algorithm:
3441    // a = a1 * B + a0
3442    // b = b1 * B + b0
3443    // Where a0, b0 < B
3444    //
3445    // We then have:
3446    // ab = a * b
3447    //    = (a1 * B + a0) * (b1 * B + b0)
3448    //    = a1 * b1 * B * B + a1 * B * b0 + a0 * b1 * B + a0 * b0
3449    //    = a1 * b1 * B * B + (a1 * b0 + a0 * b1) * B + a0 * b0
3450    //
3451    // Note that:
3452    // a1 * b0 + a0 * b1
3453    //    = (a1 + a0)(b1 + b0) - a1 * b1 - a0 * b0
3454    //    = (a0 - a1)(b1 - b0) + a1 * b1 + a0 * b0
3455    //
3456    // This yields:
3457    // ab = p2 * B^2 + (p0 + p1 + p2) * B + p0
3458    //
3459    // Where:
3460    // p0 = a0 * b0
3461    // p1 = (a0 - a1)(b1 - b0)
3462    // p2 = a1 * b1
3463    //
3464    // Note, (a0 - a1) and (b1 - b0) produce values -B < x < B, and so we need to mind the sign here.
3465    // We also have:
3466    // 0 <= p0 <= 2B
3467    // -2B <= p1 <= 2B
3468    //
3469    // Note, when B is a multiple of the limb size, multiplies by B amount to shifts or
3470    // slices of a limbs array.
3471    //
3472    // This function computes the result of the multiplication modulo r.len. This means:
3473    // - p2 and p1 only need to be computed modulo r.len - B.
3474    // - In the case of p2, p2 * B^2 needs to be added modulo r.len - 2 * B.
3475
3476    const split = b.len / 2; // B
3477
3478    const limbs_after_split = r.len - split; // Limbs to compute for p1 and p2.
3479    const limbs_after_split2 = r.len - split * 2; // Limbs to add for p2 * B^2.
3480
3481    // For a0 and b0 we need the full range.
3482    const a0 = a[0..llnormalize(a[0..split])];
3483    const b0 = b[0..llnormalize(b[0..split])];
3484
3485    // For a1 and b1 we only need `limbs_after_split` limbs.
3486    const a1 = blk: {
3487        var a1 = a[split..];
3488        a1.len = @min(llnormalize(a1), limbs_after_split);
3489        break :blk a1;
3490    };
3491
3492    const b1 = blk: {
3493        var b1 = b[split..];
3494        b1.len = @min(llnormalize(b1), limbs_after_split);
3495        break :blk b1;
3496    };
3497
3498    // Note that the above slices relative to `split` work because we have a.len > b.len.
3499
3500    // We need some temporary memory to store intermediate results.
3501    // Note, we can reduce the amount of temporaries we need by reordering the computation here:
3502    // ab = p2 * B^2 + (p0 + p1 + p2) * B + p0
3503    //    = p2 * B^2 + (p0 * B + p1 * B + p2 * B) + p0
3504    //    = (p2 * B^2 + p2 * B) + (p0 * B + p0) + p1 * B
3505
3506    // Allocate at least enough memory to be able to multiply the upper two segments of a and b, assuming
3507    // no overflow.
3508    const tmp = try allocator.alloc(Limb, a.len - split + b.len - split);
3509    defer allocator.free(tmp);
3510
3511    // Compute p2.
3512    // Note, we don't need to compute all of p2, just enough limbs to satisfy r.
3513    const p2_limbs = @min(limbs_after_split, a1.len + b1.len);
3514
3515    @memset(tmp[0..p2_limbs], 0);
3516    llmulacc(.add, allocator, tmp[0..p2_limbs], a1[0..@min(a1.len, p2_limbs)], b1[0..@min(b1.len, p2_limbs)]);
3517    const p2 = tmp[0..llnormalize(tmp[0..p2_limbs])];
3518
3519    // Add p2 * B to the result.
3520    llaccum(op, r[split..], p2);
3521
3522    // Add p2 * B^2 to the result if required.
3523    if (limbs_after_split2 > 0) {
3524        llaccum(op, r[split * 2 ..], p2[0..@min(p2.len, limbs_after_split2)]);
3525    }
3526
3527    // Compute p0.
3528    // Since a0.len, b0.len <= split and r.len >= split * 2, the full width of p0 needs to be computed.
3529    const p0_limbs = a0.len + b0.len;
3530    @memset(tmp[0..p0_limbs], 0);
3531    llmulacc(.add, allocator, tmp[0..p0_limbs], a0, b0);
3532    const p0 = tmp[0..llnormalize(tmp[0..p0_limbs])];
3533
3534    // Add p0 to the result.
3535    llaccum(op, r, p0);
3536
3537    // Add p0 * B to the result. In this case, we may not need all of it.
3538    llaccum(op, r[split..], p0[0..@min(limbs_after_split, p0.len)]);
3539
3540    // Finally, compute and add p1.
3541    // From now on we only need `limbs_after_split` limbs for a0 and b0, since the result of the
3542    // following computation will be added * B.
3543    const a0x = a0[0..@min(a0.len, limbs_after_split)];
3544    const b0x = b0[0..@min(b0.len, limbs_after_split)];
3545
3546    const j0_sign = llcmp(a0x, a1);
3547    const j1_sign = llcmp(b1, b0x);
3548
3549    if (j0_sign * j1_sign == 0) {
3550        // p1 is zero, we don't need to do any computation at all.
3551        return;
3552    }
3553
3554    @memset(tmp, 0);
3555
3556    // p1 is nonzero, so compute the intermediary terms j0 = a0 - a1 and j1 = b1 - b0.
3557    // Note that in this case, we again need some storage for intermediary results
3558    // j0 and j1. Since we have tmp.len >= 2B, we can store both
3559    // intermediaries in the already allocated array.
3560    const j0 = tmp[0 .. a.len - split];
3561    const j1 = tmp[a.len - split ..];
3562
3563    // Ensure that no subtraction overflows.
3564    if (j0_sign == 1) {
3565        // a0 > a1.
3566        _ = llsubcarry(j0, a0x, a1);
3567    } else {
3568        // a0 < a1.
3569        _ = llsubcarry(j0, a1, a0x);
3570    }
3571
3572    if (j1_sign == 1) {
3573        // b1 > b0.
3574        _ = llsubcarry(j1, b1, b0x);
3575    } else {
3576        // b1 > b0.
3577        _ = llsubcarry(j1, b0x, b1);
3578    }
3579
3580    if (j0_sign * j1_sign == 1) {
3581        // If j0 and j1 are both positive, we now have:
3582        // p1 = j0 * j1
3583        // If j0 and j1 are both negative, we now have:
3584        // p1 = -j0 * -j1 = j0 * j1
3585        // In this case we can add p1 to the result using llmulacc.
3586        llmulacc(op, allocator, r[split..], j0[0..llnormalize(j0)], j1[0..llnormalize(j1)]);
3587    } else {
3588        // In this case either j0 or j1 is negative, an we have:
3589        // p1 = -(j0 * j1)
3590        // Now we need to subtract instead of accumulate.
3591        const inverted_op = if (op == .add) .sub else .add;
3592        llmulacc(inverted_op, allocator, r[split..], j0[0..llnormalize(j0)], j1[0..llnormalize(j1)]);
3593    }
3594}
3595
3596/// r = r (op) a.
3597/// The result is computed modulo `r.len`.
3598fn llaccum(comptime op: AccOp, r: []Limb, a: []const Limb) void {
3599    if (op == .sub) {
3600        _ = llsubcarry(r, r, a);
3601        return;
3602    }
3603
3604    assert(r.len != 0 and a.len != 0);
3605    assert(r.len >= a.len);
3606
3607    var i: usize = 0;
3608    var carry: Limb = 0;
3609
3610    while (i < a.len) : (i += 1) {
3611        const ov1 = @addWithOverflow(r[i], a[i]);
3612        r[i] = ov1[0];
3613        const ov2 = @addWithOverflow(r[i], carry);
3614        r[i] = ov2[0];
3615        carry = @as(Limb, ov1[1]) + ov2[1];
3616    }
3617
3618    while ((carry != 0) and i < r.len) : (i += 1) {
3619        const ov = @addWithOverflow(r[i], carry);
3620        r[i] = ov[0];
3621        carry = ov[1];
3622    }
3623}
3624
3625/// Returns -1, 0, 1 if |a| < |b|, |a| == |b| or |a| > |b| respectively for limbs.
3626pub fn llcmp(a: []const Limb, b: []const Limb) i8 {
3627    const a_len = llnormalize(a);
3628    const b_len = llnormalize(b);
3629    if (a_len < b_len) {
3630        return -1;
3631    }
3632    if (a_len > b_len) {
3633        return 1;
3634    }
3635
3636    var i: usize = a_len - 1;
3637    while (i != 0) : (i -= 1) {
3638        if (a[i] != b[i]) {
3639            break;
3640        }
3641    }
3642
3643    if (a[i] < b[i]) {
3644        return -1;
3645    } else if (a[i] > b[i]) {
3646        return 1;
3647    } else {
3648        return 0;
3649    }
3650}
3651
3652/// r = r (op) y * xi
3653/// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs.
3654fn llmulaccLong(comptime op: AccOp, r: []Limb, a: []const Limb, b: []const Limb) void {
3655    assert(r.len >= a.len);
3656    assert(a.len >= b.len);
3657
3658    var i: usize = 0;
3659    while (i < b.len) : (i += 1) {
3660        _ = llmulLimb(op, r[i..], a, b[i]);
3661    }
3662}
3663
3664/// r = r (op) y * xi
3665/// The result is computed modulo `r.len`.
3666/// Returns whether the operation overflowed.
3667fn llmulLimb(comptime op: AccOp, acc: []Limb, y: []const Limb, xi: Limb) bool {
3668    if (xi == 0) {
3669        return false;
3670    }
3671
3672    const split = @min(y.len, acc.len);
3673    var a_lo = acc[0..split];
3674    var a_hi = acc[split..];
3675
3676    switch (op) {
3677        .add => {
3678            var carry: Limb = 0;
3679            var j: usize = 0;
3680            while (j < a_lo.len) : (j += 1) {
3681                a_lo[j] = addMulLimbWithCarry(a_lo[j], y[j], xi, &carry);
3682            }
3683
3684            j = 0;
3685            while ((carry != 0) and (j < a_hi.len)) : (j += 1) {
3686                const ov = @addWithOverflow(a_hi[j], carry);
3687                a_hi[j] = ov[0];
3688                carry = ov[1];
3689            }
3690
3691            return carry != 0;
3692        },
3693        .sub => {
3694            var borrow: Limb = 0;
3695            var j: usize = 0;
3696            while (j < a_lo.len) : (j += 1) {
3697                a_lo[j] = subMulLimbWithBorrow(a_lo[j], y[j], xi, &borrow);
3698            }
3699
3700            j = 0;
3701            while ((borrow != 0) and (j < a_hi.len)) : (j += 1) {
3702                const ov = @subWithOverflow(a_hi[j], borrow);
3703                a_hi[j] = ov[0];
3704                borrow = ov[1];
3705            }
3706
3707            return borrow != 0;
3708        },
3709    }
3710}
3711
3712/// returns the min length the limb could be.
3713fn llnormalize(a: []const Limb) usize {
3714    var j = a.len;
3715    while (j > 0) : (j -= 1) {
3716        if (a[j - 1] != 0) {
3717            break;
3718        }
3719    }
3720
3721    // Handle zero
3722    return if (j != 0) j else 1;
3723}
3724
3725/// Knuth 4.3.1, Algorithm S.
3726fn llsubcarry(r: []Limb, a: []const Limb, b: []const Limb) Limb {
3727    assert(a.len != 0 and b.len != 0);
3728    assert(a.len >= b.len);
3729    assert(r.len >= a.len);
3730
3731    var i: usize = 0;
3732    var borrow: Limb = 0;
3733
3734    while (i < b.len) : (i += 1) {
3735        const ov1 = @subWithOverflow(a[i], b[i]);
3736        r[i] = ov1[0];
3737        const ov2 = @subWithOverflow(r[i], borrow);
3738        r[i] = ov2[0];
3739        borrow = @as(Limb, ov1[1]) + ov2[1];
3740    }
3741
3742    while (i < a.len) : (i += 1) {
3743        const ov = @subWithOverflow(a[i], borrow);
3744        r[i] = ov[0];
3745        borrow = ov[1];
3746    }
3747
3748    return borrow;
3749}
3750
3751fn llsub(r: []Limb, a: []const Limb, b: []const Limb) void {
3752    assert(a.len > b.len or (a.len == b.len and a[a.len - 1] >= b[b.len - 1]));
3753    assert(llsubcarry(r, a, b) == 0);
3754}
3755
3756/// Knuth 4.3.1, Algorithm A.
3757fn lladdcarry(r: []Limb, a: []const Limb, b: []const Limb) Limb {
3758    assert(a.len != 0 and b.len != 0);
3759    assert(a.len >= b.len);
3760    assert(r.len >= a.len);
3761
3762    var i: usize = 0;
3763    var carry: Limb = 0;
3764
3765    while (i < b.len) : (i += 1) {
3766        const ov1 = @addWithOverflow(a[i], b[i]);
3767        r[i] = ov1[0];
3768        const ov2 = @addWithOverflow(r[i], carry);
3769        r[i] = ov2[0];
3770        carry = @as(Limb, ov1[1]) + ov2[1];
3771    }
3772
3773    while (i < a.len) : (i += 1) {
3774        const ov = @addWithOverflow(a[i], carry);
3775        r[i] = ov[0];
3776        carry = ov[1];
3777    }
3778
3779    return carry;
3780}
3781
3782fn lladd(r: []Limb, a: []const Limb, b: []const Limb) void {
3783    assert(r.len >= a.len + 1);
3784    r[a.len] = lladdcarry(r, a, b);
3785}
3786
3787/// Knuth 4.3.1, Exercise 16.
3788fn lldiv1(quo: []Limb, rem: *Limb, a: []const Limb, b: Limb) void {
3789    assert(a.len > 1 or a[0] >= b);
3790    assert(quo.len >= a.len);
3791
3792    rem.* = 0;
3793    for (a, 0..) |_, ri| {
3794        const i = a.len - ri - 1;
3795        const pdiv = ((@as(DoubleLimb, rem.*) << limb_bits) | a[i]);
3796
3797        if (pdiv == 0) {
3798            quo[i] = 0;
3799            rem.* = 0;
3800        } else if (pdiv < b) {
3801            quo[i] = 0;
3802            rem.* = @as(Limb, @truncate(pdiv));
3803        } else if (pdiv == b) {
3804            quo[i] = 1;
3805            rem.* = 0;
3806        } else {
3807            quo[i] = @as(Limb, @truncate(@divTrunc(pdiv, b)));
3808            rem.* = @as(Limb, @truncate(pdiv - (quo[i] *% b)));
3809        }
3810    }
3811}
3812
3813fn lldiv0p5(quo: []Limb, rem: *Limb, a: []const Limb, b: HalfLimb) void {
3814    assert(a.len > 1 or a[0] >= b);
3815    assert(quo.len >= a.len);
3816
3817    rem.* = 0;
3818    for (a, 0..) |_, ri| {
3819        const i = a.len - ri - 1;
3820        const ai_high = a[i] >> half_limb_bits;
3821        const ai_low = a[i] & ((1 << half_limb_bits) - 1);
3822
3823        // Split the division into two divisions acting on half a limb each. Carry remainder.
3824        const ai_high_with_carry = (rem.* << half_limb_bits) | ai_high;
3825        const ai_high_quo = ai_high_with_carry / b;
3826        rem.* = ai_high_with_carry % b;
3827
3828        const ai_low_with_carry = (rem.* << half_limb_bits) | ai_low;
3829        const ai_low_quo = ai_low_with_carry / b;
3830        rem.* = ai_low_with_carry % b;
3831
3832        quo[i] = (ai_high_quo << half_limb_bits) | ai_low_quo;
3833    }
3834}
3835
3836/// Performs r = a << shift and returns the amount of limbs affected
3837///
3838/// if a and r overlaps, then r.ptr >= a.ptr is asserted
3839/// r must have the capacity to store a << shift
3840fn llshl(r: []Limb, a: []const Limb, shift: usize) usize {
3841    std.debug.assert(a.len >= 1);
3842    if (slicesOverlap(a, r))
3843        std.debug.assert(@intFromPtr(r.ptr) >= @intFromPtr(a.ptr));
3844
3845    if (shift == 0) {
3846        if (a.ptr != r.ptr) @memmove(r[0..a.len], a);
3847        return a.len;
3848    }
3849    if (shift >= limb_bits) {
3850        const limb_shift = shift / limb_bits;
3851
3852        const affected = llshl(r[limb_shift..], a, shift % limb_bits);
3853        @memset(r[0..limb_shift], 0);
3854
3855        return limb_shift + affected;
3856    }
3857
3858    // shift is guaranteed to be < limb_bits
3859    const bit_shift: Log2Limb = @truncate(shift);
3860    const opposite_bit_shift: Log2Limb = @truncate(limb_bits - bit_shift);
3861
3862    // We only need the extra limb if the shift of the last element overflows.
3863    // This is useful for the implementation of `shiftLeftSat`.
3864    const overflows = a[a.len - 1] >> opposite_bit_shift != 0;
3865    if (overflows) {
3866        std.debug.assert(r.len >= a.len + 1);
3867    } else {
3868        std.debug.assert(r.len >= a.len);
3869    }
3870
3871    var i: usize = a.len;
3872    if (overflows) {
3873        // r is asserted to be large enough above
3874        r[a.len] = a[a.len - 1] >> opposite_bit_shift;
3875    }
3876    while (i > 1) {
3877        i -= 1;
3878        r[i] = (a[i - 1] >> opposite_bit_shift) | (a[i] << bit_shift);
3879    }
3880    r[0] = a[0] << bit_shift;
3881
3882    return a.len + @intFromBool(overflows);
3883}
3884
3885/// Performs r = a >> shift and returns the amount of limbs affected
3886///
3887/// if a and r overlaps, then r.ptr <= a.ptr is asserted
3888/// r must have the capacity to store a >> shift
3889///
3890/// See tests below for examples of behaviour
3891fn llshr(r: []Limb, a: []const Limb, shift: usize) usize {
3892    if (slicesOverlap(a, r))
3893        std.debug.assert(@intFromPtr(r.ptr) <= @intFromPtr(a.ptr));
3894
3895    if (a.len == 0) return 0;
3896
3897    if (shift == 0) {
3898        std.debug.assert(r.len >= a.len);
3899
3900        if (a.ptr != r.ptr) @memmove(r[0..a.len], a);
3901        return a.len;
3902    }
3903    if (shift >= limb_bits) {
3904        if (shift / limb_bits >= a.len) {
3905            r[0] = 0;
3906            return 1;
3907        }
3908        return llshr(r, a[shift / limb_bits ..], shift % limb_bits);
3909    }
3910
3911    // shift is guaranteed to be < limb_bits
3912    const bit_shift: Log2Limb = @truncate(shift);
3913    const opposite_bit_shift: Log2Limb = @truncate(limb_bits - bit_shift);
3914
3915    // special case, where there is a risk to set r to 0
3916    if (a.len == 1) {
3917        r[0] = a[0] >> bit_shift;
3918        return 1;
3919    }
3920    if (a.len == 0) {
3921        r[0] = 0;
3922        return 1;
3923    }
3924
3925    // if the most significant limb becomes 0 after the shift
3926    const shrink = a[a.len - 1] >> bit_shift == 0;
3927    std.debug.assert(r.len >= a.len - @intFromBool(shrink));
3928
3929    var i: usize = 0;
3930    while (i < a.len - 1) : (i += 1) {
3931        r[i] = (a[i] >> bit_shift) | (a[i + 1] << opposite_bit_shift);
3932    }
3933
3934    if (!shrink)
3935        r[i] = a[i] >> bit_shift;
3936
3937    return a.len - @intFromBool(shrink);
3938}
3939
3940// r = ~r
3941fn llnot(r: []Limb) void {
3942    for (r) |*elem| {
3943        elem.* = ~elem.*;
3944    }
3945}
3946
3947// r = a | b with 2s complement semantics.
3948// r may alias.
3949// a and b must not be 0.
3950// Returns `true` when the result is positive.
3951// When b is positive, r requires at least `a.len` limbs of storage.
3952// When b is negative, r requires at least `b.len` limbs of storage.
3953fn llsignedor(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_positive: bool) bool {
3954    assert(r.len >= a.len);
3955    assert(a.len >= b.len);
3956
3957    if (a_positive and b_positive) {
3958        // Trivial case, result is positive.
3959        var i: usize = 0;
3960        while (i < b.len) : (i += 1) {
3961            r[i] = a[i] | b[i];
3962        }
3963        while (i < a.len) : (i += 1) {
3964            r[i] = a[i];
3965        }
3966
3967        return true;
3968    } else if (!a_positive and b_positive) {
3969        // Result is negative.
3970        // r = (--a) | b
3971        //   = ~(-a - 1) | b
3972        //   = ~(-a - 1) | ~~b
3973        //   = ~((-a - 1) & ~b)
3974        //   = -(((-a - 1) & ~b) + 1)
3975
3976        var i: usize = 0;
3977        var a_borrow: u1 = 1;
3978        var r_carry: u1 = 1;
3979
3980        while (i < b.len) : (i += 1) {
3981            const ov1 = @subWithOverflow(a[i], a_borrow);
3982            a_borrow = ov1[1];
3983            const ov2 = @addWithOverflow(ov1[0] & ~b[i], r_carry);
3984            r[i] = ov2[0];
3985            r_carry = ov2[1];
3986        }
3987
3988        // In order for r_carry to be nonzero at this point, ~b[i] would need to be
3989        // all ones, which would require b[i] to be zero. This cannot be when
3990        // b is normalized, so there cannot be a carry here.
3991        // Also, x & ~b can only clear bits, so (x & ~b) <= x, meaning (-a - 1) + 1 never overflows.
3992        assert(r_carry == 0);
3993
3994        // With b = 0, we get (-a - 1) & ~0 = -a - 1.
3995        // Note, if a_borrow is zero we do not need to compute anything for
3996        // the higher limbs so we can early return here.
3997        while (i < a.len and a_borrow == 1) : (i += 1) {
3998            const ov = @subWithOverflow(a[i], a_borrow);
3999            r[i] = ov[0];
4000            a_borrow = ov[1];
4001        }
4002
4003        assert(a_borrow == 0); // a was 0.
4004
4005        return false;
4006    } else if (a_positive and !b_positive) {
4007        // Result is negative.
4008        // r = a | (--b)
4009        //   = a | ~(-b - 1)
4010        //   = ~~a | ~(-b - 1)
4011        //   = ~(~a & (-b - 1))
4012        //   = -((~a & (-b - 1)) + 1)
4013
4014        var i: usize = 0;
4015        var b_borrow: u1 = 1;
4016        var r_carry: u1 = 1;
4017
4018        while (i < b.len) : (i += 1) {
4019            const ov1 = @subWithOverflow(b[i], b_borrow);
4020            b_borrow = ov1[1];
4021            const ov2 = @addWithOverflow(~a[i] & ov1[0], r_carry);
4022            r[i] = ov2[0];
4023            r_carry = ov2[1];
4024        }
4025
4026        // b is at least 1, so this should never underflow.
4027        assert(b_borrow == 0); // b was 0
4028
4029        // x & ~a can only clear bits, so (x & ~a) <= x, meaning (-b - 1) + 1 never overflows.
4030        assert(r_carry == 0);
4031
4032        // With b = 0 and b_borrow = 0, we get ~a & (0 - 0) = ~a & 0 = 0.
4033        // Omit setting the upper bytes, just deal with those when calling llsignedor.
4034
4035        return false;
4036    } else {
4037        // Result is negative.
4038        // r = (--a) | (--b)
4039        //   = ~(-a - 1) | ~(-b - 1)
4040        //   = ~((-a - 1) & (-b - 1))
4041        //   = -(~(~((-a - 1) & (-b - 1))) + 1)
4042        //   = -((-a - 1) & (-b - 1) + 1)
4043
4044        var i: usize = 0;
4045        var a_borrow: u1 = 1;
4046        var b_borrow: u1 = 1;
4047        var r_carry: u1 = 1;
4048
4049        while (i < b.len) : (i += 1) {
4050            const ov1 = @subWithOverflow(a[i], a_borrow);
4051            a_borrow = ov1[1];
4052            const ov2 = @subWithOverflow(b[i], b_borrow);
4053            b_borrow = ov2[1];
4054            const ov3 = @addWithOverflow(ov1[0] & ov2[0], r_carry);
4055            r[i] = ov3[0];
4056            r_carry = ov3[1];
4057        }
4058
4059        // b is at least 1, so this should never underflow.
4060        assert(b_borrow == 0); // b was 0
4061
4062        // Can never overflow because in order for b_limb to be maxInt(Limb),
4063        // b_borrow would need to equal 1.
4064
4065        // x & y can only clear bits, meaning x & y <= x and x & y <= y. This implies that
4066        // for x = a - 1 and y = b - 1, the +1 term would never cause an overflow.
4067        assert(r_carry == 0);
4068
4069        // With b = 0 and b_borrow = 0 we get (-a - 1) & (0 - 0) = (-a - 1) & 0 = 0.
4070        // Omit setting the upper bytes, just deal with those when calling llsignedor.
4071        return false;
4072    }
4073}
4074
4075// r = a & b with 2s complement semantics.
4076// r may alias.
4077// a and b must not be 0.
4078// Returns `true` when the result is positive.
4079// We assume `a.len >= b.len` here, so:
4080// 1. when b is positive, r requires at least `b.len` limbs of storage,
4081// 2. when b is negative but a is positive, r requires at least `a.len` limbs of storage,
4082// 3. when both a and b are negative, r requires at least `a.len + 1` limbs of storage.
4083fn llsignedand(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_positive: bool) bool {
4084    assert(a.len != 0 and b.len != 0);
4085    assert(a.len >= b.len);
4086    assert(r.len >= if (b_positive) b.len else if (a_positive) a.len else a.len + 1);
4087
4088    if (a_positive and b_positive) {
4089        // Trivial case, result is positive.
4090        var i: usize = 0;
4091        while (i < b.len) : (i += 1) {
4092            r[i] = a[i] & b[i];
4093        }
4094
4095        // With b = 0 we have a & 0 = 0, so the upper bytes are zero.
4096        // Omit setting them here and simply discard them whenever
4097        // llsignedand is called.
4098
4099        return true;
4100    } else if (!a_positive and b_positive) {
4101        // Result is positive.
4102        // r = (--a) & b
4103        //   = ~(-a - 1) & b
4104
4105        var i: usize = 0;
4106        var a_borrow: u1 = 1;
4107
4108        while (i < b.len) : (i += 1) {
4109            const ov = @subWithOverflow(a[i], a_borrow);
4110            a_borrow = ov[1];
4111            r[i] = ~ov[0] & b[i];
4112        }
4113
4114        // With b = 0 we have ~(a - 1) & 0 = 0, so the upper bytes are zero.
4115        // Omit setting them here and simply discard them whenever
4116        // llsignedand is called.
4117
4118        return true;
4119    } else if (a_positive and !b_positive) {
4120        // Result is positive.
4121        // r = a & (--b)
4122        //   = a & ~(-b - 1)
4123
4124        var i: usize = 0;
4125        var b_borrow: u1 = 1;
4126
4127        while (i < b.len) : (i += 1) {
4128            const ov = @subWithOverflow(b[i], b_borrow);
4129            b_borrow = ov[1];
4130            r[i] = a[i] & ~ov[0];
4131        }
4132
4133        assert(b_borrow == 0); // b was 0
4134
4135        // With b = 0 and b_borrow = 0 we have a & ~(0 - 0) = a & ~0 = a, so
4136        // the upper bytes are the same as those of a.
4137
4138        while (i < a.len) : (i += 1) {
4139            r[i] = a[i];
4140        }
4141
4142        return true;
4143    } else {
4144        // Result is negative.
4145        // r = (--a) & (--b)
4146        //   = ~(-a - 1) & ~(-b - 1)
4147        //   = ~((-a - 1) | (-b - 1))
4148        //   = -(((-a - 1) | (-b - 1)) + 1)
4149
4150        var i: usize = 0;
4151        var a_borrow: u1 = 1;
4152        var b_borrow: u1 = 1;
4153        var r_carry: u1 = 1;
4154
4155        while (i < b.len) : (i += 1) {
4156            const ov1 = @subWithOverflow(a[i], a_borrow);
4157            a_borrow = ov1[1];
4158            const ov2 = @subWithOverflow(b[i], b_borrow);
4159            b_borrow = ov2[1];
4160            const ov3 = @addWithOverflow(ov1[0] | ov2[0], r_carry);
4161            r[i] = ov3[0];
4162            r_carry = ov3[1];
4163        }
4164
4165        // b is at least 1, so this should never underflow.
4166        assert(b_borrow == 0); // b was 0
4167
4168        // With b = 0 and b_borrow = 0 we get (-a - 1) | (0 - 0) = (-a - 1) | 0 = -a - 1.
4169        while (i < a.len) : (i += 1) {
4170            const ov1 = @subWithOverflow(a[i], a_borrow);
4171            a_borrow = ov1[1];
4172            const ov2 = @addWithOverflow(ov1[0], r_carry);
4173            r[i] = ov2[0];
4174            r_carry = ov2[1];
4175        }
4176
4177        assert(a_borrow == 0); // a was 0.
4178
4179        // The final addition can overflow here, so we need to keep that in mind.
4180        r[i] = r_carry;
4181
4182        return false;
4183    }
4184}
4185
4186// r = a ^ b with 2s complement semantics.
4187// r may alias.
4188// a and b must not be -0.
4189// Returns `true` when the result is positive.
4190// If the sign of a and b is equal, then r requires at least `@max(a.len, b.len)` limbs are required.
4191// Otherwise, r requires at least `@max(a.len, b.len) + 1` limbs.
4192fn llsignedxor(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_positive: bool) bool {
4193    assert(a.len != 0 and b.len != 0);
4194    assert(r.len >= a.len);
4195    assert(a.len >= b.len);
4196
4197    // If a and b are positive, the result is positive and r = a ^ b.
4198    // If a negative, b positive, result is negative and we have
4199    // r = --(--a ^ b)
4200    //   = --(~(-a - 1) ^ b)
4201    //   = -(~(~(-a - 1) ^ b) + 1)
4202    //   = -(((-a - 1) ^ b) + 1)
4203    // Same if a is positive and b is negative, sides switched.
4204    // If both a and b are negative, the result is positive and we have
4205    // r = (--a) ^ (--b)
4206    //   = ~(-a - 1) ^ ~(-b - 1)
4207    //   = (-a - 1) ^ (-b - 1)
4208    // These operations can be made more generic as follows:
4209    // - If a is negative, subtract 1 from |a| before the xor.
4210    // - If b is negative, subtract 1 from |b| before the xor.
4211    // - if the result is supposed to be negative, add 1.
4212
4213    var i: usize = 0;
4214    var a_borrow = @intFromBool(!a_positive);
4215    var b_borrow = @intFromBool(!b_positive);
4216    var r_carry = @intFromBool(a_positive != b_positive);
4217
4218    while (i < b.len) : (i += 1) {
4219        const ov1 = @subWithOverflow(a[i], a_borrow);
4220        a_borrow = ov1[1];
4221        const ov2 = @subWithOverflow(b[i], b_borrow);
4222        b_borrow = ov2[1];
4223        const ov3 = @addWithOverflow(ov1[0] ^ ov2[0], r_carry);
4224        r[i] = ov3[0];
4225        r_carry = ov3[1];
4226    }
4227
4228    while (i < a.len) : (i += 1) {
4229        const ov1 = @subWithOverflow(a[i], a_borrow);
4230        a_borrow = ov1[1];
4231        const ov2 = @addWithOverflow(ov1[0], r_carry);
4232        r[i] = ov2[0];
4233        r_carry = ov2[1];
4234    }
4235
4236    // If both inputs don't share the same sign, an extra limb is required.
4237    if (a_positive != b_positive) {
4238        r[i] = r_carry;
4239    } else {
4240        assert(r_carry == 0);
4241    }
4242
4243    assert(a_borrow == 0);
4244    assert(b_borrow == 0);
4245
4246    return a_positive == b_positive;
4247}
4248
4249/// r MUST NOT alias x.
4250fn llsquareBasecase(r: []Limb, x: []const Limb) void {
4251    const x_norm = x;
4252    assert(r.len >= 2 * x_norm.len + 1);
4253    assert(!slicesOverlap(r, x));
4254
4255    // Compute the square of a N-limb bigint with only (N^2 + N)/2
4256    // multiplications by exploiting the symmetry of the coefficients around the
4257    // diagonal:
4258    //
4259    //           a   b   c *
4260    //           a   b   c =
4261    // -------------------
4262    //          ca  cb  cc +
4263    //      ba  bb  bc     +
4264    //  aa  ab  ac
4265    //
4266    // Note that:
4267    //  - Each mixed-product term appears twice for each column,
4268    //  - Squares are always in the 2k (0 <= k < N) column
4269
4270    for (x_norm, 0..) |v, i| {
4271        // Accumulate all the x[i]*x[j] (with x!=j) products
4272        const overflow = llmulLimb(.add, r[2 * i + 1 ..], x_norm[i + 1 ..], v);
4273        assert(!overflow);
4274    }
4275
4276    // Each product appears twice, multiply by 2
4277    _ = llshl(r, r[0 .. 2 * x_norm.len], 1);
4278
4279    for (x_norm, 0..) |v, i| {
4280        // Compute and add the squares
4281        const overflow = llmulLimb(.add, r[2 * i ..], x[i..][0..1], v);
4282        assert(!overflow);
4283    }
4284}
4285
4286/// Knuth 4.6.3
4287fn llpow(r: []Limb, a: []const Limb, b: u32, tmp_limbs: []Limb) void {
4288    var tmp1: []Limb = undefined;
4289    var tmp2: []Limb = undefined;
4290
4291    // Multiplication requires no aliasing between the operand and the result
4292    // variable, use the output limbs and another temporary set to overcome this
4293    // limitation.
4294    // The initial assignment makes the result end in `r` so an extra memory
4295    // copy is saved, each 1 flips the index twice so it's only the zeros that
4296    // matter.
4297    const b_leading_zeros = @clz(b);
4298    const exp_zeros = @popCount(~b) - b_leading_zeros;
4299    if (exp_zeros & 1 != 0) {
4300        tmp1 = tmp_limbs;
4301        tmp2 = r;
4302    } else {
4303        tmp1 = r;
4304        tmp2 = tmp_limbs;
4305    }
4306
4307    @memcpy(tmp1[0..a.len], a);
4308    @memset(tmp1[a.len..], 0);
4309
4310    // Scan the exponent as a binary number, from left to right, dropping the
4311    // most significant bit set.
4312    // Square the result if the current bit is zero, square and multiply by a if
4313    // it is one.
4314    const exp_bits = 32 - 1 - b_leading_zeros;
4315    var exp = b << @as(u5, @intCast(1 + b_leading_zeros));
4316
4317    var i: usize = 0;
4318    while (i < exp_bits) : (i += 1) {
4319        // Square
4320        @memset(tmp2, 0);
4321        llsquareBasecase(tmp2, tmp1[0..llnormalize(tmp1)]);
4322        mem.swap([]Limb, &tmp1, &tmp2);
4323        // Multiply by a
4324        const ov = @shlWithOverflow(exp, 1);
4325        exp = ov[0];
4326        if (ov[1] != 0) {
4327            @memset(tmp2, 0);
4328            llmulacc(.add, null, tmp2, tmp1[0..llnormalize(tmp1)], a);
4329            mem.swap([]Limb, &tmp1, &tmp2);
4330        }
4331    }
4332}
4333
4334// Storage must live for the lifetime of the returned value
4335fn fixedIntFromSignedDoubleLimb(A: SignedDoubleLimb, storage: []Limb) Mutable {
4336    assert(storage.len >= 2);
4337
4338    const A_is_positive = A >= 0;
4339    const Au = @as(DoubleLimb, @intCast(if (A < 0) -A else A));
4340    storage[0] = @as(Limb, @truncate(Au));
4341    storage[1] = @as(Limb, @truncate(Au >> limb_bits));
4342    return .{
4343        .limbs = storage[0..2],
4344        .positive = A_is_positive,
4345        .len = 2,
4346    };
4347}
4348
4349fn slicesOverlap(a: []const Limb, b: []const Limb) bool {
4350    // there is no overlap if a.ptr + a.len <= b.ptr or b.ptr + b.len <= a.ptr
4351    return @intFromPtr(a.ptr + a.len) > @intFromPtr(b.ptr) and @intFromPtr(b.ptr + b.len) > @intFromPtr(a.ptr);
4352}
4353
4354test {
4355    _ = @import("int_test.zig");
4356}
4357
4358const testing_allocator = std.testing.allocator;
4359test "llshl shift by whole number of limb" {
4360    const padding = maxInt(Limb);
4361
4362    var r: [10]Limb = @splat(padding);
4363
4364    const A: Limb = @truncate(0xCCCCCCCCCCCCCCCCCCCCCCC);
4365    const B: Limb = @truncate(0x22222222222222222222222);
4366
4367    const data = [2]Limb{ A, B };
4368    for (0..9) |i| {
4369        @memset(&r, padding);
4370        const len = llshl(&r, &data, i * @bitSizeOf(Limb));
4371
4372        try std.testing.expectEqual(i + 2, len);
4373        try std.testing.expectEqualSlices(Limb, &data, r[i .. i + 2]);
4374        for (r[0..i]) |x|
4375            try std.testing.expectEqual(0, x);
4376        for (r[i + 2 ..]) |x|
4377            try std.testing.expectEqual(padding, x);
4378    }
4379}
4380
4381test llshl {
4382    if (limb_bits != 64) return error.SkipZigTest;
4383
4384    // 1 << 63
4385    const left_one = 0x8000000000000000;
4386    const maxint: Limb = 0xFFFFFFFFFFFFFFFF;
4387
4388    // zig fmt: off
4389    try testOneShiftCase(.llshl, .{0,  &.{0},                               &.{0}});
4390    try testOneShiftCase(.llshl, .{0,  &.{1},                               &.{1}});
4391    try testOneShiftCase(.llshl, .{0,  &.{125484842448},                    &.{125484842448}});
4392    try testOneShiftCase(.llshl, .{0,  &.{0xdeadbeef},                      &.{0xdeadbeef}});
4393    try testOneShiftCase(.llshl, .{0,  &.{maxint},                          &.{maxint}});
4394    try testOneShiftCase(.llshl, .{0,  &.{left_one},                        &.{left_one}});
4395    try testOneShiftCase(.llshl, .{0,  &.{0, 1},                            &.{0, 1}});
4396    try testOneShiftCase(.llshl, .{0,  &.{1, 2},                            &.{1, 2}});
4397    try testOneShiftCase(.llshl, .{0,  &.{left_one, 1},                     &.{left_one, 1}});
4398    try testOneShiftCase(.llshl, .{1,  &.{0},                               &.{0}});
4399    try testOneShiftCase(.llshl, .{1,  &.{2},                               &.{1}});
4400    try testOneShiftCase(.llshl, .{1,  &.{250969684896},                    &.{125484842448}});
4401    try testOneShiftCase(.llshl, .{1,  &.{0x1bd5b7dde},                     &.{0xdeadbeef}});
4402    try testOneShiftCase(.llshl, .{1,  &.{0xfffffffffffffffe, 1},           &.{maxint}});
4403    try testOneShiftCase(.llshl, .{1,  &.{0, 1},                            &.{left_one}});
4404    try testOneShiftCase(.llshl, .{1,  &.{0, 2},                            &.{0, 1}});
4405    try testOneShiftCase(.llshl, .{1,  &.{2, 4},                            &.{1, 2}});
4406    try testOneShiftCase(.llshl, .{1,  &.{0, 3},                            &.{left_one, 1}});
4407    try testOneShiftCase(.llshl, .{5,  &.{32},                              &.{1}});
4408    try testOneShiftCase(.llshl, .{5,  &.{4015514958336},                   &.{125484842448}});
4409    try testOneShiftCase(.llshl, .{5,  &.{0x1bd5b7dde0},                    &.{0xdeadbeef}});
4410    try testOneShiftCase(.llshl, .{5,  &.{0xffffffffffffffe0, 0x1f},        &.{maxint}});
4411    try testOneShiftCase(.llshl, .{5,  &.{0, 16},                           &.{left_one}});
4412    try testOneShiftCase(.llshl, .{5,  &.{0, 32},                           &.{0, 1}});
4413    try testOneShiftCase(.llshl, .{5,  &.{32, 64},                          &.{1, 2}});
4414    try testOneShiftCase(.llshl, .{5,  &.{0, 48},                           &.{left_one, 1}});
4415    try testOneShiftCase(.llshl, .{64, &.{0, 1},                            &.{1}});
4416    try testOneShiftCase(.llshl, .{64, &.{0, 125484842448},                 &.{125484842448}});
4417    try testOneShiftCase(.llshl, .{64, &.{0, 0xdeadbeef},                   &.{0xdeadbeef}});
4418    try testOneShiftCase(.llshl, .{64, &.{0, maxint},                       &.{maxint}});
4419    try testOneShiftCase(.llshl, .{64, &.{0, left_one},                     &.{left_one}});
4420    try testOneShiftCase(.llshl, .{64, &.{0, 0, 1},                         &.{0, 1}});
4421    try testOneShiftCase(.llshl, .{64, &.{0, 1, 2},                         &.{1, 2}});
4422    try testOneShiftCase(.llshl, .{64, &.{0, left_one, 1},                  &.{left_one, 1}});
4423    try testOneShiftCase(.llshl, .{35, &.{0x800000000},                     &.{1}});
4424    try testOneShiftCase(.llshl, .{35, &.{13534986488655118336, 233},       &.{125484842448}});
4425    try testOneShiftCase(.llshl, .{35, &.{0xf56df77800000000, 6},           &.{0xdeadbeef}});
4426    try testOneShiftCase(.llshl, .{35, &.{0xfffffff800000000, 0x7ffffffff}, &.{maxint}});
4427    try testOneShiftCase(.llshl, .{35, &.{0, 17179869184},                  &.{left_one}});
4428    try testOneShiftCase(.llshl, .{35, &.{0, 0x800000000},                  &.{0, 1}});
4429    try testOneShiftCase(.llshl, .{35, &.{0x800000000, 0x1000000000},       &.{1, 2}});
4430    try testOneShiftCase(.llshl, .{35, &.{0, 0xc00000000},                  &.{left_one, 1}});
4431    try testOneShiftCase(.llshl, .{70, &.{0, 64},                           &.{1}});
4432    try testOneShiftCase(.llshl, .{70, &.{0, 8031029916672},                &.{125484842448}});
4433    try testOneShiftCase(.llshl, .{70, &.{0, 0x37ab6fbbc0},                 &.{0xdeadbeef}});
4434    try testOneShiftCase(.llshl, .{70, &.{0, 0xffffffffffffffc0, 63},       &.{maxint}});
4435    try testOneShiftCase(.llshl, .{70, &.{0, 0, 32},                        &.{left_one}});
4436    try testOneShiftCase(.llshl, .{70, &.{0, 0, 64},                        &.{0, 1}});
4437    try testOneShiftCase(.llshl, .{70, &.{0, 64, 128},                      &.{1, 2}});
4438    try testOneShiftCase(.llshl, .{70, &.{0, 0, 0x60},                      &.{left_one, 1}});
4439    // zig fmt: on
4440}
4441
4442test "llshl shift 0" {
4443    const n = @bitSizeOf(Limb);
4444    if (n <= 20) return error.SkipZigTest;
4445
4446    // zig fmt: off
4447    try testOneShiftCase(.llshl, .{0,   &.{0},    &.{0}});
4448    try testOneShiftCase(.llshl, .{1,   &.{0},    &.{0}});
4449    try testOneShiftCase(.llshl, .{5,   &.{0},    &.{0}});
4450    try testOneShiftCase(.llshl, .{13,  &.{0},    &.{0}});
4451    try testOneShiftCase(.llshl, .{20,  &.{0},    &.{0}});
4452    try testOneShiftCase(.llshl, .{0,   &.{0, 0}, &.{0, 0}});
4453    try testOneShiftCase(.llshl, .{2,   &.{0, 0}, &.{0, 0}});
4454    try testOneShiftCase(.llshl, .{7,   &.{0, 0}, &.{0, 0}});
4455    try testOneShiftCase(.llshl, .{11,  &.{0, 0}, &.{0, 0}});
4456    try testOneShiftCase(.llshl, .{19,  &.{0, 0}, &.{0, 0}});
4457
4458    try testOneShiftCase(.llshl, .{0,   &.{0},                &.{0}});
4459    try testOneShiftCase(.llshl, .{n,   &.{0, 0},             &.{0}});
4460    try testOneShiftCase(.llshl, .{2*n, &.{0, 0, 0},          &.{0}});
4461    try testOneShiftCase(.llshl, .{3*n, &.{0, 0, 0, 0},       &.{0}});
4462    try testOneShiftCase(.llshl, .{4*n, &.{0, 0, 0, 0, 0},    &.{0}});
4463    try testOneShiftCase(.llshl, .{0,   &.{0, 0},             &.{0, 0}});
4464    try testOneShiftCase(.llshl, .{n,   &.{0, 0, 0},          &.{0, 0}});
4465    try testOneShiftCase(.llshl, .{2*n, &.{0, 0, 0, 0},       &.{0, 0}});
4466    try testOneShiftCase(.llshl, .{3*n, &.{0, 0, 0, 0, 0},    &.{0, 0}});
4467    try testOneShiftCase(.llshl, .{4*n, &.{0, 0, 0, 0, 0, 0}, &.{0, 0}});
4468    // zig fmt: on
4469}
4470
4471test "llshr shift 0" {
4472    const n = @bitSizeOf(Limb);
4473
4474    // zig fmt: off
4475    try testOneShiftCase(.llshr, .{0,   &.{0},    &.{0}});
4476    try testOneShiftCase(.llshr, .{1,   &.{0},    &.{0}});
4477    try testOneShiftCase(.llshr, .{5,   &.{0},    &.{0}});
4478    try testOneShiftCase(.llshr, .{13,  &.{0},    &.{0}});
4479    try testOneShiftCase(.llshr, .{20,  &.{0},    &.{0}});
4480    try testOneShiftCase(.llshr, .{0,   &.{0, 0}, &.{0, 0}});
4481    try testOneShiftCase(.llshr, .{2,   &.{0},    &.{0, 0}});
4482    try testOneShiftCase(.llshr, .{7,   &.{0},    &.{0, 0}});
4483    try testOneShiftCase(.llshr, .{11,  &.{0},    &.{0, 0}});
4484    try testOneShiftCase(.llshr, .{19,  &.{0},    &.{0, 0}});
4485
4486    try testOneShiftCase(.llshr, .{n,   &.{0}, &.{0}});
4487    try testOneShiftCase(.llshr, .{2*n, &.{0}, &.{0}});
4488    try testOneShiftCase(.llshr, .{3*n, &.{0}, &.{0}});
4489    try testOneShiftCase(.llshr, .{4*n, &.{0}, &.{0}});
4490    try testOneShiftCase(.llshr, .{n,   &.{0}, &.{0, 0}});
4491    try testOneShiftCase(.llshr, .{2*n, &.{0}, &.{0, 0}});
4492    try testOneShiftCase(.llshr, .{3*n, &.{0}, &.{0, 0}});
4493    try testOneShiftCase(.llshr, .{4*n, &.{0}, &.{0, 0}});
4494
4495    try testOneShiftCase(.llshr, .{1,  &.{}, &.{}});
4496    try testOneShiftCase(.llshr, .{2,  &.{}, &.{}});
4497    try testOneShiftCase(.llshr, .{64, &.{}, &.{}});
4498    // zig fmt: on
4499}
4500
4501test "llshr to 0" {
4502    const n = @bitSizeOf(Limb);
4503    if (n != 64 and n != 32) return error.SkipZigTest;
4504
4505    // zig fmt: off
4506    try testOneShiftCase(.llshr, .{1,   &.{0}, &.{0}});
4507    try testOneShiftCase(.llshr, .{1,   &.{0}, &.{1}});
4508    try testOneShiftCase(.llshr, .{5,   &.{0}, &.{1}});
4509    try testOneShiftCase(.llshr, .{65,  &.{0}, &.{0, 1}});
4510    try testOneShiftCase(.llshr, .{193, &.{0}, &.{0, 0, maxInt(Limb)}});
4511    try testOneShiftCase(.llshr, .{193, &.{0}, &.{maxInt(Limb), 1, maxInt(Limb)}});
4512    try testOneShiftCase(.llshr, .{193, &.{0}, &.{0xdeadbeef, 0xabcdefab, 0x1234}});
4513    // zig fmt: on
4514}
4515
4516test "llshr single" {
4517    if (limb_bits != 64) return error.SkipZigTest;
4518
4519    // 1 << 63
4520    const left_one = 0x8000000000000000;
4521    const maxint: Limb = 0xFFFFFFFFFFFFFFFF;
4522
4523    // zig fmt: off
4524    try testOneShiftCase(.llshr, .{0,  &.{0},                  &.{0}});
4525    try testOneShiftCase(.llshr, .{0,  &.{1},                  &.{1}});
4526    try testOneShiftCase(.llshr, .{0,  &.{125484842448},       &.{125484842448}});
4527    try testOneShiftCase(.llshr, .{0,  &.{0xdeadbeef},         &.{0xdeadbeef}});
4528    try testOneShiftCase(.llshr, .{0,  &.{maxint},             &.{maxint}});
4529    try testOneShiftCase(.llshr, .{0,  &.{left_one},           &.{left_one}});
4530    try testOneShiftCase(.llshr, .{1,  &.{0},                  &.{0}});
4531    try testOneShiftCase(.llshr, .{1,  &.{1},                  &.{2}});
4532    try testOneShiftCase(.llshr, .{1,  &.{62742421224},        &.{125484842448}});
4533    try testOneShiftCase(.llshr, .{1,  &.{62742421223},        &.{125484842447}});
4534    try testOneShiftCase(.llshr, .{1,  &.{0x6f56df77},         &.{0xdeadbeef}});
4535    try testOneShiftCase(.llshr, .{1,  &.{0x7fffffffffffffff}, &.{maxint}});
4536    try testOneShiftCase(.llshr, .{1,  &.{0x4000000000000000}, &.{left_one}});
4537    try testOneShiftCase(.llshr, .{8,  &.{1},                  &.{256}});
4538    try testOneShiftCase(.llshr, .{8,  &.{490175165},          &.{125484842448}});
4539    try testOneShiftCase(.llshr, .{8,  &.{0xdeadbe},           &.{0xdeadbeef}});
4540    try testOneShiftCase(.llshr, .{8,  &.{0xffffffffffffff},   &.{maxint}});
4541    try testOneShiftCase(.llshr, .{8,  &.{0x80000000000000},   &.{left_one}});
4542    // zig fmt: on
4543}
4544
4545test llshr {
4546    if (limb_bits != 64) return error.SkipZigTest;
4547
4548    // 1 << 63
4549    const left_one = 0x8000000000000000;
4550    const maxint: Limb = 0xFFFFFFFFFFFFFFFF;
4551
4552    // zig fmt: off
4553    try testOneShiftCase(.llshr, .{0,  &.{0, 0},                           &.{0, 0}});
4554    try testOneShiftCase(.llshr, .{0,  &.{0, 1},                           &.{0, 1}});
4555    try testOneShiftCase(.llshr, .{0,  &.{15, 1},                          &.{15, 1}});
4556    try testOneShiftCase(.llshr, .{0,  &.{987656565, 123456789456},        &.{987656565, 123456789456}});
4557    try testOneShiftCase(.llshr, .{0,  &.{0xfeebdaed, 0xdeadbeef},         &.{0xfeebdaed, 0xdeadbeef}});
4558    try testOneShiftCase(.llshr, .{0,  &.{1, maxint},                      &.{1, maxint}});
4559    try testOneShiftCase(.llshr, .{0,  &.{0, left_one},                    &.{0, left_one}});
4560    try testOneShiftCase(.llshr, .{1,  &.{0},                              &.{0, 0}});
4561    try testOneShiftCase(.llshr, .{1,  &.{left_one},                       &.{0, 1}});
4562    try testOneShiftCase(.llshr, .{1,  &.{0x8000000000000007},             &.{15, 1}});
4563    try testOneShiftCase(.llshr, .{1,  &.{493828282, 61728394728},         &.{987656565, 123456789456}});
4564    try testOneShiftCase(.llshr, .{1,  &.{0x800000007f75ed76, 0x6f56df77}, &.{0xfeebdaed, 0xdeadbeef}});
4565    try testOneShiftCase(.llshr, .{1,  &.{left_one, 0x7fffffffffffffff},   &.{1, maxint}});
4566    try testOneShiftCase(.llshr, .{1,  &.{0, 0x4000000000000000},          &.{0, left_one}});
4567    try testOneShiftCase(.llshr, .{64, &.{0},                              &.{0, 0}});
4568    try testOneShiftCase(.llshr, .{64, &.{1},                              &.{0, 1}});
4569    try testOneShiftCase(.llshr, .{64, &.{1},                              &.{15, 1}});
4570    try testOneShiftCase(.llshr, .{64, &.{123456789456},                   &.{987656565, 123456789456}});
4571    try testOneShiftCase(.llshr, .{64, &.{0xdeadbeef},                     &.{0xfeebdaed, 0xdeadbeef}});
4572    try testOneShiftCase(.llshr, .{64, &.{maxint},                         &.{1, maxint}});
4573    try testOneShiftCase(.llshr, .{64, &.{left_one},                       &.{0, left_one}});
4574    try testOneShiftCase(.llshr, .{72, &.{0},                              &.{0, 0}});
4575    try testOneShiftCase(.llshr, .{72, &.{0},                              &.{0, 1}});
4576    try testOneShiftCase(.llshr, .{72, &.{0},                              &.{15, 1}});
4577    try testOneShiftCase(.llshr, .{72, &.{482253083},                      &.{987656565, 123456789456}});
4578    try testOneShiftCase(.llshr, .{72, &.{0xdeadbe},                       &.{0xfeebdaed, 0xdeadbeef}});
4579    try testOneShiftCase(.llshr, .{72, &.{0xffffffffffffff},               &.{1, maxint}});
4580    try testOneShiftCase(.llshr, .{72, &.{0x80000000000000},               &.{0, left_one}});
4581    // zig fmt: on
4582}
4583
4584const Case = struct { usize, []const Limb, []const Limb };
4585
4586fn testOneShiftCase(comptime function: enum { llshr, llshl }, case: Case) !void {
4587    const func = if (function == .llshl) llshl else llshr;
4588    const shift_direction = if (function == .llshl) -1 else 1;
4589
4590    try testOneShiftCaseNoAliasing(func, case);
4591    try testOneShiftCaseAliasing(func, case, shift_direction);
4592}
4593
4594fn testOneShiftCaseNoAliasing(func: fn ([]Limb, []const Limb, usize) usize, case: Case) !void {
4595    const padding = maxInt(Limb);
4596    var r: [20]Limb = @splat(padding);
4597
4598    const shift = case[0];
4599    const expected = case[1];
4600    const data = case[2];
4601
4602    std.debug.assert(expected.len <= 20);
4603
4604    const len = func(&r, data, shift);
4605
4606    try std.testing.expectEqual(expected.len, len);
4607    try std.testing.expectEqualSlices(Limb, expected, r[0..len]);
4608    try std.testing.expect(mem.allEqual(Limb, r[len..], padding));
4609}
4610
4611fn testOneShiftCaseAliasing(func: fn ([]Limb, []const Limb, usize) usize, case: Case, shift_direction: isize) !void {
4612    const padding = maxInt(Limb);
4613    var r: [60]Limb = @splat(padding);
4614    const base = 20;
4615
4616    assert(shift_direction == 1 or shift_direction == -1);
4617
4618    for (0..10) |limb_shift| {
4619        const shift = case[0];
4620        const expected = case[1];
4621        const data = case[2];
4622
4623        std.debug.assert(expected.len <= 20);
4624
4625        @memset(&r, padding);
4626        const final_limb_base: usize = @intCast(base + shift_direction * @as(isize, @intCast(limb_shift)));
4627        const written_data = r[final_limb_base..][0..data.len];
4628        @memcpy(written_data, data);
4629
4630        const len = func(r[base..], written_data, shift);
4631
4632        try std.testing.expectEqual(expected.len, len);
4633        try std.testing.expectEqualSlices(Limb, expected, r[base .. base + len]);
4634    }
4635}
4636
4637test "format" {
4638    var a: Managed = try .init(std.testing.allocator);
4639    defer a.deinit();
4640
4641    try a.set(123);
4642    try testFormat(a, "123");
4643
4644    try a.set(-123);
4645    try testFormat(a, "-123");
4646
4647    try a.set(20000000000000000000); // > maxInt(u64)
4648    try testFormat(a, "20000000000000000000");
4649
4650    try a.set(1 << 64 * @sizeOf(usize) * 8);
4651    try testFormat(a, "(BigInt)");
4652
4653    try a.set(-(1 << 64 * @sizeOf(usize) * 8));
4654    try testFormat(a, "(BigInt)");
4655}
4656
4657fn testFormat(a: Managed, expected: []const u8) !void {
4658    try std.testing.expectFmt(expected, "{f}", .{a});
4659    try std.testing.expectFmt(expected, "{f}", .{a.toMutable()});
4660    try std.testing.expectFmt(expected, "{f}", .{a.toConst()});
4661}