master
1const std = @import("../../std.zig");
2const builtin = @import("builtin");
3const math = std.math;
4const Limb = std.math.big.Limb;
5const limb_bits = @typeInfo(Limb).int.bits;
6const HalfLimb = std.math.big.HalfLimb;
7const half_limb_bits = @typeInfo(HalfLimb).int.bits;
8const DoubleLimb = std.math.big.DoubleLimb;
9const SignedDoubleLimb = std.math.big.SignedDoubleLimb;
10const Log2Limb = std.math.big.Log2Limb;
11const Allocator = std.mem.Allocator;
12const mem = std.mem;
13const maxInt = std.math.maxInt;
14const minInt = std.math.minInt;
15const assert = std.debug.assert;
16const Endian = std.builtin.Endian;
17const Signedness = std.builtin.Signedness;
18const native_endian = builtin.cpu.arch.endian();
19
20/// Returns the number of limbs needed to store `scalar`, which must be a
21/// primitive integer or float value.
22/// Note: A comptime-known upper bound of this value that may be used
23/// instead if `scalar` is not already comptime-known is
24/// `calcTwosCompLimbCount(@typeInfo(@TypeOf(scalar)).int.bits)`
25pub fn calcLimbLen(scalar: anytype) usize {
26 switch (@typeInfo(@TypeOf(scalar))) {
27 .int, .comptime_int => {
28 if (scalar == 0) return 1;
29 const w_value = @abs(scalar);
30 return @as(usize, @intCast(@divFloor(@as(Limb, @intCast(math.log2(w_value))), limb_bits) + 1));
31 },
32 .float => {
33 const repr: std.math.FloatRepr(@TypeOf(scalar)) = @bitCast(scalar);
34 return switch (repr.exponent) {
35 .denormal => 1,
36 else => return calcNonZeroTwosCompLimbCount(@as(usize, 2) + @max(repr.exponent.unbias(), 0)),
37 .infinite => 0,
38 };
39 },
40 .comptime_float => return calcLimbLen(@as(f128, scalar)),
41 else => @compileError("expected float or int, got " ++ @typeName(@TypeOf(scalar))),
42 }
43}
44
45pub fn calcToStringLimbsBufferLen(a_len: usize, base: u8) usize {
46 if (math.isPowerOfTwo(base))
47 return 0;
48 return a_len + 2 + a_len + calcDivLimbsBufferLen(a_len, 1);
49}
50
51pub fn calcDivLimbsBufferLen(a_len: usize, b_len: usize) usize {
52 return a_len + b_len + 4;
53}
54
55pub fn calcMulLimbsBufferLen(a_len: usize, b_len: usize, aliases: usize) usize {
56 return aliases * @max(a_len, b_len);
57}
58
59pub fn calcMulWrapLimbsBufferLen(bit_count: usize, a_len: usize, b_len: usize, aliases: usize) usize {
60 const req_limbs = calcTwosCompLimbCount(bit_count);
61 return aliases * @min(req_limbs, @max(a_len, b_len));
62}
63
64pub fn calcSetStringLimbsBufferLen(base: u8, string_len: usize) usize {
65 const limb_count = calcSetStringLimbCount(base, string_len);
66 return calcMulLimbsBufferLen(limb_count, limb_count, 2);
67}
68
69/// Assumes `string_len` doesn't account for minus signs if the number is negative.
70pub fn calcSetStringLimbCount(base: u8, string_len: usize) usize {
71 const base_f: f32 = @floatFromInt(base);
72 const string_len_f: f32 = @floatFromInt(string_len);
73 return 1 + @as(usize, @intFromFloat(@ceil(string_len_f * std.math.log2(base_f) / limb_bits)));
74}
75
76pub fn calcPowLimbsBufferLen(a_bit_count: usize, y: usize) usize {
77 // The 2 accounts for the minimum space requirement for llmulacc
78 return 2 + (a_bit_count * y + (limb_bits - 1)) / limb_bits;
79}
80
81pub fn calcSqrtLimbsBufferLen(a_bit_count: usize) usize {
82 const a_limb_count = (a_bit_count - 1) / limb_bits + 1;
83 const shift = (a_bit_count + 1) / 2;
84 const u_s_rem_limb_count = 1 + ((shift / limb_bits) + 1);
85 return a_limb_count + 3 * u_s_rem_limb_count + calcDivLimbsBufferLen(a_limb_count, u_s_rem_limb_count);
86}
87
88/// Compute the number of limbs required to store a 2s-complement number of `bit_count` bits.
89pub fn calcNonZeroTwosCompLimbCount(bit_count: usize) usize {
90 assert(bit_count != 0);
91 return calcTwosCompLimbCount(bit_count);
92}
93
94/// Compute the number of limbs required to store a 2s-complement number of `bit_count` bits.
95///
96/// Special cases `bit_count == 0` to return 1. Zero-bit integers can only store the value zero
97/// and this big integer implementation stores zero using one limb.
98pub fn calcTwosCompLimbCount(bit_count: usize) usize {
99 return @max(std.math.divCeil(usize, bit_count, @bitSizeOf(Limb)) catch unreachable, 1);
100}
101
102/// a + b * c + *carry, sets carry to the overflow bits
103pub fn addMulLimbWithCarry(a: Limb, b: Limb, c: Limb, carry: *Limb) Limb {
104 // ov1[0] = a + *carry
105 const ov1 = @addWithOverflow(a, carry.*);
106
107 // r2 = b * c
108 const bc = @as(DoubleLimb, math.mulWide(Limb, b, c));
109 const r2 = @as(Limb, @truncate(bc));
110 const c2 = @as(Limb, @truncate(bc >> limb_bits));
111
112 // ov2[0] = ov1[0] + r2
113 const ov2 = @addWithOverflow(ov1[0], r2);
114
115 // This never overflows, c1, c3 are either 0 or 1 and if both are 1 then
116 // c2 is at least <= maxInt(Limb) - 2.
117 carry.* = ov1[1] + c2 + ov2[1];
118
119 return ov2[0];
120}
121
122/// a - b * c - *carry, sets carry to the overflow bits
123fn subMulLimbWithBorrow(a: Limb, b: Limb, c: Limb, carry: *Limb) Limb {
124 // ov1[0] = a - *carry
125 const ov1 = @subWithOverflow(a, carry.*);
126
127 // r2 = b * c
128 const bc = @as(DoubleLimb, std.math.mulWide(Limb, b, c));
129 const r2 = @as(Limb, @truncate(bc));
130 const c2 = @as(Limb, @truncate(bc >> limb_bits));
131
132 // ov2[0] = ov1[0] - r2
133 const ov2 = @subWithOverflow(ov1[0], r2);
134 carry.* = ov1[1] + c2 + ov2[1];
135
136 return ov2[0];
137}
138
139/// Used to indicate either limit of a 2s-complement integer.
140pub const TwosCompIntLimit = enum {
141 // The low limit, either 0x00 (unsigned) or (-)0x80 (signed) for an 8-bit integer.
142 min,
143
144 // The high limit, either 0xFF (unsigned) or 0x7F (signed) for an 8-bit integer.
145 max,
146};
147
148pub const Round = enum {
149 /// Round to the nearest representable value, with ties broken by the representation
150 /// that ends with a 0 bit.
151 nearest_even,
152 /// Round away from zero.
153 away,
154 /// Round towards zero.
155 trunc,
156 /// Round towards negative infinity.
157 floor,
158 /// Round towards positive infinity.
159 ceil,
160};
161
162pub const Exactness = enum { inexact, exact };
163
164/// A arbitrary-precision big integer, with a fixed set of mutable limbs.
165pub const Mutable = struct {
166 /// Raw digits. These are:
167 ///
168 /// * Little-endian ordered
169 /// * limbs.len >= 1
170 /// * Zero is represented as limbs.len == 1 with limbs[0] == 0.
171 ///
172 /// Accessing limbs directly should be avoided.
173 /// These are allocated limbs; the `len` field tells the valid range.
174 limbs: []Limb,
175 len: usize,
176 positive: bool,
177
178 pub fn toConst(self: Mutable) Const {
179 return .{
180 .limbs = self.limbs[0..self.len],
181 .positive = self.positive,
182 };
183 }
184
185 pub const ConvertError = Const.ConvertError;
186
187 /// Convert `self` to `Int`.
188 ///
189 /// Returns an error if self cannot be narrowed into the requested type without truncation.
190 pub fn toInt(self: Mutable, comptime Int: type) ConvertError!Int {
191 return self.toConst().toInt(Int);
192 }
193
194 /// Convert `self` to `Float`.
195 pub fn toFloat(self: Mutable, comptime Float: type, round: Round) struct { Float, Exactness } {
196 return self.toConst().toFloat(Float, round);
197 }
198
199 /// Returns true if `a == 0`.
200 pub fn eqlZero(self: Mutable) bool {
201 return self.toConst().eqlZero();
202 }
203
204 /// Asserts that the allocator owns the limbs memory. If this is not the case,
205 /// use `toConst().toManaged()`.
206 pub fn toManaged(self: Mutable, allocator: Allocator) Managed {
207 return .{
208 .allocator = allocator,
209 .limbs = self.limbs,
210 .metadata = if (self.positive)
211 self.len & ~Managed.sign_bit
212 else
213 self.len | Managed.sign_bit,
214 };
215 }
216
217 /// `value` is a primitive integer type.
218 /// Asserts the value fits within the provided `limbs_buffer`.
219 /// Note: `calcLimbLen` can be used to figure out how big an array to allocate for `limbs_buffer`.
220 pub fn init(limbs_buffer: []Limb, value: anytype) Mutable {
221 limbs_buffer[0] = 0;
222 var self: Mutable = .{
223 .limbs = limbs_buffer,
224 .len = 1,
225 .positive = true,
226 };
227 self.set(value);
228 return self;
229 }
230
231 /// Copies the value of a Const to an existing Mutable so that they both have the same value.
232 /// Asserts the value fits in the limbs buffer.
233 pub fn copy(self: *Mutable, other: Const) void {
234 if (self.limbs.ptr != other.limbs.ptr) {
235 @memcpy(self.limbs[0..other.limbs.len], other.limbs[0..other.limbs.len]);
236 }
237 // Normalize before setting `positive` so the `eqlZero` doesn't need to iterate
238 // over the extra zero limbs.
239 self.normalize(other.limbs.len);
240 self.positive = other.positive or other.eqlZero();
241 }
242
243 /// Efficiently swap an Mutable with another. This swaps the limb pointers and a full copy is not
244 /// performed. The address of the limbs field will not be the same after this function.
245 pub fn swap(self: *Mutable, other: *Mutable) void {
246 mem.swap(Mutable, self, other);
247 }
248
249 pub fn dump(self: Mutable) void {
250 for (self.limbs[0..self.len]) |limb| {
251 std.debug.print("{x} ", .{limb});
252 }
253 std.debug.print("len={} capacity={} positive={}\n", .{ self.len, self.limbs.len, self.positive });
254 }
255
256 /// Clones an Mutable and returns a new Mutable with the same value. The new Mutable is a deep copy and
257 /// can be modified separately from the original.
258 /// Asserts that limbs is big enough to store the value.
259 pub fn clone(other: Mutable, limbs: []Limb) Mutable {
260 @memcpy(limbs[0..other.len], other.limbs[0..other.len]);
261 return .{
262 .limbs = limbs,
263 .len = other.len,
264 .positive = other.positive,
265 };
266 }
267
268 pub fn negate(self: *Mutable) void {
269 self.positive = !self.positive;
270 }
271
272 /// Modify to become the absolute value
273 pub fn abs(self: *Mutable) void {
274 self.positive = true;
275 }
276
277 /// Sets the Mutable to value. Value must be an primitive integer type.
278 /// Asserts the value fits within the limbs buffer.
279 /// Note: `calcLimbLen` can be used to figure out how big the limbs buffer
280 /// needs to be to store a specific value.
281 pub fn set(self: *Mutable, value: anytype) void {
282 const T = @TypeOf(value);
283 const needed_limbs = calcLimbLen(value);
284 assert(needed_limbs <= self.limbs.len); // value too big
285
286 self.len = needed_limbs;
287 self.positive = value >= 0;
288
289 switch (@typeInfo(T)) {
290 .int => |info| {
291 var w_value = @abs(value);
292
293 if (info.bits <= limb_bits) {
294 self.limbs[0] = w_value;
295 } else {
296 var i: usize = 0;
297 while (true) : (i += 1) {
298 self.limbs[i] = @as(Limb, @truncate(w_value));
299 w_value >>= limb_bits;
300
301 if (w_value == 0) break;
302 }
303 }
304 },
305 .comptime_int => {
306 comptime var w_value = @abs(value);
307
308 if (w_value <= maxInt(Limb)) {
309 self.limbs[0] = w_value;
310 } else {
311 const mask = (1 << limb_bits) - 1;
312
313 comptime var i = 0;
314 inline while (true) : (i += 1) {
315 self.limbs[i] = w_value & mask;
316 w_value >>= limb_bits;
317
318 if (w_value == 0) break;
319 }
320 }
321 },
322 else => @compileError("cannot set Mutable using type " ++ @typeName(T)),
323 }
324 }
325
326 /// Set self from the string representation `value`.
327 ///
328 /// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are
329 /// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are
330 /// ignored and can be used as digit separators.
331 ///
332 /// Asserts there is enough memory for the value in `self.limbs`. An upper bound on number of limbs can
333 /// be determined with `calcSetStringLimbCount`.
334 /// Asserts the base is in the range [2, 36].
335 ///
336 /// Returns an error if the value has invalid digits for the requested base.
337 ///
338 /// `limbs_buffer` is used for temporary storage. The size required can be found with
339 /// `calcSetStringLimbsBufferLen`.
340 ///
341 /// If `allocator` is provided, it will be used for temporary storage to improve
342 /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
343 pub fn setString(
344 self: *Mutable,
345 base: u8,
346 value: []const u8,
347 limbs_buffer: []Limb,
348 allocator: ?Allocator,
349 ) error{InvalidCharacter}!void {
350 assert(base >= 2);
351 assert(base <= 36);
352
353 var i: usize = 0;
354 var positive = true;
355 if (value.len > 0 and value[0] == '-') {
356 positive = false;
357 i += 1;
358 }
359
360 const ap_base: Const = .{ .limbs = &[_]Limb{base}, .positive = true };
361 self.set(0);
362
363 for (value[i..]) |ch| {
364 if (ch == '_') {
365 continue;
366 }
367 const d = try std.fmt.charToDigit(ch, base);
368 const ap_d: Const = .{ .limbs = &[_]Limb{d}, .positive = true };
369
370 self.mul(self.toConst(), ap_base, limbs_buffer, allocator);
371 self.add(self.toConst(), ap_d);
372 }
373 self.positive = positive;
374 }
375
376 /// Set self to either bound of a 2s-complement integer.
377 /// Note: The result is still sign-magnitude, not twos complement! In order to convert the
378 /// result to twos complement, it is sufficient to take the absolute value.
379 ///
380 /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
381 /// r is `calcTwosCompLimbCount(bit_count)`.
382 pub fn setTwosCompIntLimit(
383 r: *Mutable,
384 limit: TwosCompIntLimit,
385 signedness: Signedness,
386 bit_count: usize,
387 ) void {
388 // Handle zero-bit types.
389 if (bit_count == 0) {
390 r.set(0);
391 return;
392 }
393
394 const req_limbs = calcTwosCompLimbCount(bit_count);
395 const bit: Log2Limb = @truncate(bit_count - 1);
396 const signmask = @as(Limb, 1) << bit; // 0b0..010..0 where 1 is the sign bit.
397 const mask = (signmask << 1) -% 1; // 0b0..011..1 where the leftmost 1 is the sign bit.
398
399 r.positive = true;
400
401 switch (signedness) {
402 .signed => switch (limit) {
403 .min => {
404 // Negative bound, signed = -0x80.
405 r.len = req_limbs;
406 @memset(r.limbs[0 .. r.len - 1], 0);
407 r.limbs[r.len - 1] = signmask;
408 r.positive = false;
409 },
410 .max => {
411 // Positive bound, signed = 0x7F
412 // Note, in this branch we need to normalize because the first bit is
413 // supposed to be 0.
414
415 // Special case for 1-bit integers.
416 if (bit_count == 1) {
417 r.set(0);
418 } else {
419 const new_req_limbs = calcTwosCompLimbCount(bit_count - 1);
420 const msb = @as(Log2Limb, @truncate(bit_count - 2));
421 const new_signmask = @as(Limb, 1) << msb; // 0b0..010..0 where 1 is the sign bit.
422 const new_mask = (new_signmask << 1) -% 1; // 0b0..001..1 where the rightmost 0 is the sign bit.
423
424 r.len = new_req_limbs;
425 @memset(r.limbs[0 .. r.len - 1], maxInt(Limb));
426 r.limbs[r.len - 1] = new_mask;
427 }
428 },
429 },
430 .unsigned => switch (limit) {
431 .min => {
432 // Min bound, unsigned = 0x00
433 r.set(0);
434 },
435 .max => {
436 // Max bound, unsigned = 0xFF
437 r.len = req_limbs;
438 @memset(r.limbs[0 .. r.len - 1], maxInt(Limb));
439 r.limbs[r.len - 1] = mask;
440 },
441 },
442 }
443 }
444
445 /// Sets the Mutable to a float value rounded according to `round`.
446 /// Returns whether the conversion was exact (`round` had no effect on the result).
447 pub fn setFloat(self: *Mutable, value: anytype, round: Round) Exactness {
448 const Float = @TypeOf(value);
449 if (Float == comptime_float) return self.setFloat(@as(f128, value), round);
450 const abs_value = @abs(value);
451 if (abs_value < 1.0) {
452 if (abs_value == 0.0) {
453 self.set(0);
454 return .exact;
455 }
456 self.set(@as(i2, round: switch (round) {
457 .nearest_even => if (abs_value <= 0.5) 0 else continue :round .away,
458 .away => if (value < 0.0) -1 else 1,
459 .trunc => 0,
460 .floor => -@as(i2, @intFromBool(value < 0.0)),
461 .ceil => @intFromBool(value > 0.0),
462 }));
463 return .inexact;
464 }
465 const Repr = std.math.FloatRepr(Float);
466 const repr: Repr = @bitCast(value);
467 const exponent = repr.exponent.unbias();
468 assert(exponent >= 0);
469 const int_bit: Repr.Mantissa = 1 << (@bitSizeOf(Repr.Mantissa) - 1);
470 const mantissa = int_bit | repr.mantissa;
471 if (exponent >= @bitSizeOf(Repr.Normalized.Fraction)) {
472 self.set(mantissa);
473 self.shiftLeft(self.toConst(), @intCast(exponent - @bitSizeOf(Repr.Normalized.Fraction)));
474 self.positive = repr.sign == .positive;
475 return .exact;
476 }
477 self.set(mantissa >> @intCast(@bitSizeOf(Repr.Normalized.Fraction) - exponent));
478 const round_bits: Repr.Normalized.Fraction = @truncate(mantissa << @intCast(exponent));
479 if (round_bits == 0) {
480 self.positive = repr.sign == .positive;
481 return .exact;
482 }
483 round: switch (round) {
484 .nearest_even => {
485 const half: Repr.Normalized.Fraction = 1 << (@bitSizeOf(Repr.Normalized.Fraction) - 1);
486 if (round_bits >= half) self.addScalar(self.toConst(), 1);
487 if (round_bits == half) self.limbs[0] &= ~@as(Limb, 1);
488 },
489 .away => self.addScalar(self.toConst(), 1),
490 .trunc => {},
491 .floor => switch (repr.sign) {
492 .positive => {},
493 .negative => continue :round .away,
494 },
495 .ceil => switch (repr.sign) {
496 .positive => continue :round .away,
497 .negative => {},
498 },
499 }
500 self.positive = repr.sign == .positive;
501 return .inexact;
502 }
503
504 /// r = a + scalar
505 ///
506 /// r and a may be aliases.
507 /// scalar is a primitive integer type.
508 ///
509 /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
510 /// r is `@max(a.limbs.len, calcLimbLen(scalar)) + 1`.
511 pub fn addScalar(r: *Mutable, a: Const, scalar: anytype) void {
512 // Normally we could just determine the number of limbs needed with calcLimbLen,
513 // but that is not comptime-known when scalar is not a comptime_int. Instead, we
514 // use calcTwosCompLimbCount for a non-comptime_int scalar, which can be pessimistic
515 // in the case that scalar happens to be small in magnitude within its type, but it
516 // is well worth being able to use the stack and not needing an allocator passed in.
517 // Note that Mutable.init still sets len to calcLimbLen(scalar) in any case.
518 const limbs_len = comptime switch (@typeInfo(@TypeOf(scalar))) {
519 .comptime_int => calcLimbLen(scalar),
520 .int => |info| calcTwosCompLimbCount(info.bits),
521 else => @compileError("expected scalar to be an int"),
522 };
523 var limbs: [limbs_len]Limb = undefined;
524 const operand = init(&limbs, scalar).toConst();
525 return add(r, a, operand);
526 }
527
528 /// Base implementation for addition. Adds `@max(a.limbs.len, b.limbs.len)` elements from a and b,
529 /// and returns whether any overflow occurred.
530 /// r, a and b may be aliases.
531 ///
532 /// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`.
533 fn addCarry(r: *Mutable, a: Const, b: Const) bool {
534 if (a.eqlZero()) {
535 r.copy(b);
536 return false;
537 } else if (b.eqlZero()) {
538 r.copy(a);
539 return false;
540 } else if (a.positive != b.positive) {
541 if (a.positive) {
542 // (a) + (-b) => a - b
543 return r.subCarry(a, b.abs());
544 } else {
545 // (-a) + (b) => b - a
546 return r.subCarry(b, a.abs());
547 }
548 } else {
549 r.positive = a.positive;
550 if (a.limbs.len >= b.limbs.len) {
551 const c = lladdcarry(r.limbs, a.limbs, b.limbs);
552 r.normalize(a.limbs.len);
553 return c != 0;
554 } else {
555 const c = lladdcarry(r.limbs, b.limbs, a.limbs);
556 r.normalize(b.limbs.len);
557 return c != 0;
558 }
559 }
560 }
561
562 /// r = a + b
563 ///
564 /// r, a and b may be aliases.
565 ///
566 /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
567 /// r is `@max(a.limbs.len, b.limbs.len) + 1`.
568 pub fn add(r: *Mutable, a: Const, b: Const) void {
569 if (r.addCarry(a, b)) {
570 // Fix up the result. Note that addCarry normalizes by a.limbs.len or b.limbs.len,
571 // so we need to set the length here.
572 const msl = @max(a.limbs.len, b.limbs.len);
573 // `[add|sub]Carry` normalizes by `msl`, so we need to fix up the result manually here.
574 // Note, the fact that it normalized means that the intermediary limbs are zero here.
575 r.len = msl + 1;
576 r.limbs[msl] = 1; // If this panics, there wasn't enough space in `r`.
577 }
578 }
579
580 /// r = a + b with 2s-complement wrapping semantics. Returns whether overflow occurred.
581 /// r, a and b may be aliases
582 ///
583 /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
584 /// r is `calcTwosCompLimbCount(bit_count)`.
585 pub fn addWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool {
586 const req_limbs = calcTwosCompLimbCount(bit_count);
587
588 // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
589 // if an overflow occurred.
590 const x = Const{
591 .positive = a.positive,
592 .limbs = a.limbs[0..@min(req_limbs, a.limbs.len)],
593 };
594
595 const y = Const{
596 .positive = b.positive,
597 .limbs = b.limbs[0..@min(req_limbs, b.limbs.len)],
598 };
599
600 var carry_truncated = false;
601 if (r.addCarry(x, y)) {
602 // There are two possibilities here:
603 // - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by
604 // truncate anyway.
605 // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
606 // Note: after this we still might need to wrap.
607 const msl = @max(a.limbs.len, b.limbs.len);
608 if (msl < req_limbs) {
609 r.limbs[msl] = 1;
610 r.len = req_limbs;
611 @memset(r.limbs[msl + 1 .. req_limbs], 0);
612 } else {
613 carry_truncated = true;
614 }
615 }
616
617 if (!r.toConst().fitsInTwosComp(signedness, bit_count)) {
618 r.truncate(r.toConst(), signedness, bit_count);
619 return true;
620 }
621
622 return carry_truncated;
623 }
624
625 /// r = a + b with 2s-complement saturating semantics.
626 /// r, a and b may be aliases.
627 ///
628 /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
629 /// r is `calcTwosCompLimbCount(bit_count)`.
630 pub fn addSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void {
631 const req_limbs = calcTwosCompLimbCount(bit_count);
632
633 // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
634 // if an overflow occurred.
635 const x = Const{
636 .positive = a.positive,
637 .limbs = a.limbs[0..@min(req_limbs, a.limbs.len)],
638 };
639
640 const y = Const{
641 .positive = b.positive,
642 .limbs = b.limbs[0..@min(req_limbs, b.limbs.len)],
643 };
644
645 if (r.addCarry(x, y)) {
646 // There are two possibilities here:
647 // - We overflowed req_limbs, in which case we need to saturate.
648 // - a and b had less elements than req_limbs, and those were overflowed.
649 // Note: In this case, might _also_ need to saturate.
650 const msl = @max(a.limbs.len, b.limbs.len);
651 if (msl < req_limbs) {
652 r.limbs[msl] = 1;
653 r.len = req_limbs;
654 // Note: Saturation may still be required if msl == req_limbs - 1
655 } else {
656 // Overflowed req_limbs, definitely saturate.
657 r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
658 }
659 }
660
661 // Saturate if the result didn't fit.
662 r.saturate(r.toConst(), signedness, bit_count);
663 }
664
665 /// Base implementation for subtraction. Subtracts `@max(a.limbs.len, b.limbs.len)` elements from a and b,
666 /// and returns whether any overflow occurred.
667 /// r, a and b may be aliases.
668 ///
669 /// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`.
670 fn subCarry(r: *Mutable, a: Const, b: Const) bool {
671 if (a.eqlZero()) {
672 r.copy(b);
673 r.positive = !b.positive;
674 return false;
675 } else if (b.eqlZero()) {
676 r.copy(a);
677 return false;
678 } else if (a.positive != b.positive) {
679 if (a.positive) {
680 // (a) - (-b) => a + b
681 return r.addCarry(a, b.abs());
682 } else {
683 // (-a) - (b) => -a + -b
684 return r.addCarry(a, b.negate());
685 }
686 } else if (a.positive) {
687 if (a.order(b) != .lt) {
688 // (a) - (b) => a - b
689 const c = llsubcarry(r.limbs, a.limbs, b.limbs);
690 r.normalize(a.limbs.len);
691 r.positive = true;
692 return c != 0;
693 } else {
694 // (a) - (b) => -b + a => -(b - a)
695 const c = llsubcarry(r.limbs, b.limbs, a.limbs);
696 r.normalize(b.limbs.len);
697 r.positive = false;
698 return c != 0;
699 }
700 } else {
701 if (a.order(b) == .lt) {
702 // (-a) - (-b) => -(a - b)
703 const c = llsubcarry(r.limbs, a.limbs, b.limbs);
704 r.normalize(a.limbs.len);
705 r.positive = false;
706 return c != 0;
707 } else {
708 // (-a) - (-b) => --b + -a => b - a
709 const c = llsubcarry(r.limbs, b.limbs, a.limbs);
710 r.normalize(b.limbs.len);
711 r.positive = true;
712 return c != 0;
713 }
714 }
715 }
716
717 /// r = a - b
718 ///
719 /// r, a and b may be aliases.
720 ///
721 /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
722 /// r is `@max(a.limbs.len, b.limbs.len) + 1`. The +1 is not needed if both operands are positive.
723 pub fn sub(r: *Mutable, a: Const, b: Const) void {
724 r.add(a, b.negate());
725 }
726
727 /// r = a - b with 2s-complement wrapping semantics. Returns whether any overflow occurred.
728 ///
729 /// r, a and b may be aliases
730 /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
731 /// r is `calcTwosCompLimbCount(bit_count)`.
732 pub fn subWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool {
733 return r.addWrap(a, b.negate(), signedness, bit_count);
734 }
735
736 /// r = a - b with 2s-complement saturating semantics.
737 /// r, a and b may be aliases.
738 ///
739 /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
740 /// r is `calcTwosCompLimbCount(bit_count)`.
741 pub fn subSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void {
742 r.addSat(a, b.negate(), signedness, bit_count);
743 }
744
745 /// rma = a * b
746 ///
747 /// `rma` may alias with `a` or `b`.
748 /// `a` and `b` may alias with each other.
749 ///
750 /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
751 /// rma is given by `a.limbs.len + b.limbs.len`.
752 ///
753 /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulLimbsBufferLen`.
754 pub fn mul(rma: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, allocator: ?Allocator) void {
755 var buf_index: usize = 0;
756
757 const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: {
758 const start = buf_index;
759 @memcpy(limbs_buffer[buf_index..][0..a.limbs.len], a.limbs);
760 buf_index += a.limbs.len;
761 break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
762 } else a;
763
764 const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: {
765 const start = buf_index;
766 @memcpy(limbs_buffer[buf_index..][0..b.limbs.len], b.limbs);
767 buf_index += b.limbs.len;
768 break :blk b.toMutable(limbs_buffer[start..buf_index]).toConst();
769 } else b;
770
771 return rma.mulNoAlias(a_copy, b_copy, allocator);
772 }
773
774 /// rma = a * b
775 ///
776 /// `rma` may not alias with `a` or `b`.
777 /// `a` and `b` may alias with each other.
778 ///
779 /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
780 /// rma is given by `a.limbs.len + b.limbs.len`.
781 ///
782 /// If `allocator` is provided, it will be used for temporary storage to improve
783 /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
784 pub fn mulNoAlias(rma: *Mutable, a: Const, b: Const, allocator: ?Allocator) void {
785 assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
786 assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing
787
788 if (a.limbs.len == 1 and b.limbs.len == 1) {
789 rma.limbs[0], const overflow_bit = @mulWithOverflow(a.limbs[0], b.limbs[0]);
790 if (overflow_bit == 0) {
791 rma.len = 1;
792 rma.positive = (a.positive == b.positive) or rma.limbs[0] == 0;
793 return;
794 }
795 }
796
797 @memset(rma.limbs[0 .. a.limbs.len + b.limbs.len], 0);
798
799 llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs);
800
801 rma.normalize(a.limbs.len + b.limbs.len);
802 rma.positive = (a.positive == b.positive);
803 }
804
805 /// rma = a * b with 2s-complement wrapping semantics.
806 ///
807 /// `rma` may alias with `a` or `b`.
808 /// `a` and `b` may alias with each other.
809 ///
810 /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
811 /// rma is given by `a.limbs.len + b.limbs.len`.
812 ///
813 /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulWrapLimbsBufferLen`.
814 pub fn mulWrap(
815 rma: *Mutable,
816 a: Const,
817 b: Const,
818 signedness: Signedness,
819 bit_count: usize,
820 limbs_buffer: []Limb,
821 allocator: ?Allocator,
822 ) void {
823 var buf_index: usize = 0;
824 const req_limbs = calcTwosCompLimbCount(bit_count);
825
826 const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: {
827 const start = buf_index;
828 const a_len = @min(req_limbs, a.limbs.len);
829 @memcpy(limbs_buffer[buf_index..][0..a_len], a.limbs[0..a_len]);
830 buf_index += a_len;
831 break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
832 } else a;
833
834 const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: {
835 const start = buf_index;
836 const b_len = @min(req_limbs, b.limbs.len);
837 @memcpy(limbs_buffer[buf_index..][0..b_len], b.limbs[0..b_len]);
838 buf_index += b_len;
839 break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
840 } else b;
841
842 return rma.mulWrapNoAlias(a_copy, b_copy, signedness, bit_count, allocator);
843 }
844
845 /// rma = a * b with 2s-complement wrapping semantics.
846 ///
847 /// `rma` may not alias with `a` or `b`.
848 /// `a` and `b` may alias with each other.
849 ///
850 /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
851 /// rma is given by `a.limbs.len + b.limbs.len`.
852 ///
853 /// If `allocator` is provided, it will be used for temporary storage to improve
854 /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
855 pub fn mulWrapNoAlias(
856 rma: *Mutable,
857 a: Const,
858 b: Const,
859 signedness: Signedness,
860 bit_count: usize,
861 allocator: ?Allocator,
862 ) void {
863 assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
864 assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing
865
866 const req_limbs = calcTwosCompLimbCount(bit_count);
867
868 // We can ignore the upper bits here, those results will be discarded anyway.
869 const a_limbs = a.limbs[0..@min(req_limbs, a.limbs.len)];
870 const b_limbs = b.limbs[0..@min(req_limbs, b.limbs.len)];
871
872 @memset(rma.limbs[0..req_limbs], 0);
873
874 llmulacc(.add, allocator, rma.limbs, a_limbs, b_limbs);
875 rma.normalize(@min(req_limbs, a.limbs.len + b.limbs.len));
876 rma.positive = (a.positive == b.positive);
877 rma.truncate(rma.toConst(), signedness, bit_count);
878 }
879
880 /// r = @bitReverse(a) with 2s-complement semantics.
881 /// r and a may be aliases.
882 ///
883 /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by
884 /// r is `calcTwosCompLimbCount(bit_count)`.
885 pub fn bitReverse(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
886 if (bit_count == 0) return;
887
888 r.copy(a);
889
890 const limbs_required = calcTwosCompLimbCount(bit_count);
891
892 if (!a.positive) {
893 r.positive = true; // Negate.
894 r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
895 r.addScalar(r.toConst(), 1); // Add one.
896 } else if (limbs_required > a.limbs.len) {
897 // Zero-extend to our output length
898 for (r.limbs[a.limbs.len..limbs_required]) |*limb| {
899 limb.* = 0;
900 }
901 r.len = limbs_required;
902 }
903
904 // 0b0..01..1000 with @log2(@sizeOf(Limb)) consecutive ones
905 const endian_mask: usize = (@sizeOf(Limb) - 1) << 3;
906
907 const bytes = std.mem.sliceAsBytes(r.limbs);
908
909 var k: usize = 0;
910 while (k < ((bit_count + 1) / 2)) : (k += 1) {
911 var i = k;
912 var rev_i = bit_count - i - 1;
913
914 // This "endian mask" remaps a low (LE) byte to the corresponding high
915 // (BE) byte in the Limb, without changing which limbs we are indexing
916 if (native_endian == .big) {
917 i ^= endian_mask;
918 rev_i ^= endian_mask;
919 }
920
921 const bit_i = std.mem.readPackedInt(u1, bytes, i, .little);
922 const bit_rev_i = std.mem.readPackedInt(u1, bytes, rev_i, .little);
923 std.mem.writePackedInt(u1, bytes, i, bit_rev_i, .little);
924 std.mem.writePackedInt(u1, bytes, rev_i, bit_i, .little);
925 }
926
927 // Calculate signed-magnitude representation for output
928 if (signedness == .signed) {
929 const last_bit = switch (native_endian) {
930 .little => std.mem.readPackedInt(u1, bytes, bit_count - 1, .little),
931 .big => std.mem.readPackedInt(u1, bytes, (bit_count - 1) ^ endian_mask, .little),
932 };
933 if (last_bit == 1) {
934 r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
935 r.addScalar(r.toConst(), 1); // Add one.
936 r.positive = false; // Negate.
937 }
938 }
939 r.normalize(r.len);
940 }
941
942 /// r = @byteSwap(a) with 2s-complement semantics.
943 /// r and a may be aliases.
944 ///
945 /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by
946 /// r is `calcTwosCompLimbCount(8*byte_count)`.
947 pub fn byteSwap(r: *Mutable, a: Const, signedness: Signedness, byte_count: usize) void {
948 if (byte_count == 0) return;
949
950 r.copy(a);
951 const limbs_required = calcTwosCompLimbCount(8 * byte_count);
952
953 if (!a.positive) {
954 r.positive = true; // Negate.
955 r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT.
956 r.addScalar(r.toConst(), 1); // Add one.
957 } else if (limbs_required > a.limbs.len) {
958 // Zero-extend to our output length
959 for (r.limbs[a.limbs.len..limbs_required]) |*limb| {
960 limb.* = 0;
961 }
962 r.len = limbs_required;
963 }
964
965 // 0b0..01..1 with @log2(@sizeOf(Limb)) trailing ones
966 const endian_mask: usize = @sizeOf(Limb) - 1;
967
968 var bytes = std.mem.sliceAsBytes(r.limbs);
969 assert(bytes.len >= byte_count);
970
971 var k: usize = 0;
972 while (k < (byte_count + 1) / 2) : (k += 1) {
973 var i = k;
974 var rev_i = byte_count - k - 1;
975
976 // This "endian mask" remaps a low (LE) byte to the corresponding high
977 // (BE) byte in the Limb, without changing which limbs we are indexing
978 if (native_endian == .big) {
979 i ^= endian_mask;
980 rev_i ^= endian_mask;
981 }
982
983 const byte_i = bytes[i];
984 const byte_rev_i = bytes[rev_i];
985 bytes[rev_i] = byte_i;
986 bytes[i] = byte_rev_i;
987 }
988
989 // Calculate signed-magnitude representation for output
990 if (signedness == .signed) {
991 const last_byte = switch (native_endian) {
992 .little => bytes[byte_count - 1],
993 .big => bytes[(byte_count - 1) ^ endian_mask],
994 };
995
996 if (last_byte & (1 << 7) != 0) { // Check sign bit of last byte
997 r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT.
998 r.addScalar(r.toConst(), 1); // Add one.
999 r.positive = false; // Negate.
1000 }
1001 }
1002 r.normalize(r.len);
1003 }
1004
1005 /// r = @popCount(a) with 2s-complement semantics.
1006 /// r and a may be aliases.
1007 ///
1008 /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
1009 /// r is `calcTwosCompLimbCount(bit_count)`.
1010 pub fn popCount(r: *Mutable, a: Const, bit_count: usize) void {
1011 r.copy(a);
1012
1013 if (!a.positive) {
1014 r.positive = true; // Negate.
1015 r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
1016 r.addScalar(r.toConst(), 1); // Add one.
1017 }
1018
1019 var sum: Limb = 0;
1020 for (r.limbs[0..r.len]) |limb| {
1021 sum += @popCount(limb);
1022 }
1023 r.set(sum);
1024 }
1025
1026 /// rma = a * a
1027 ///
1028 /// `rma` may not alias with `a`.
1029 ///
1030 /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
1031 /// rma is given by `2 * a.limbs.len + 1`.
1032 ///
1033 /// If `allocator` is provided, it will be used for temporary storage to improve
1034 /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
1035 pub fn sqrNoAlias(rma: *Mutable, a: Const, opt_allocator: ?Allocator) void {
1036 _ = opt_allocator;
1037 assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
1038
1039 @memset(rma.limbs, 0);
1040
1041 llsquareBasecase(rma.limbs, a.limbs);
1042
1043 rma.normalize(2 * a.limbs.len + 1);
1044 rma.positive = true;
1045 }
1046
1047 /// q = a / b (rem r)
1048 ///
1049 /// a / b are floored (rounded towards 0).
1050 /// q may alias with a or b.
1051 ///
1052 /// Asserts there is enough memory to store q and r.
1053 /// The upper bound for r limb count is `b.limbs.len`.
1054 /// The upper bound for q limb count is given by `a.limbs`.
1055 ///
1056 /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`.
1057 pub fn divFloor(
1058 q: *Mutable,
1059 r: *Mutable,
1060 a: Const,
1061 b: Const,
1062 limbs_buffer: []Limb,
1063 ) void {
1064 const sep = a.limbs.len + 2;
1065 var x = a.toMutable(limbs_buffer[0..sep]);
1066 var y = b.toMutable(limbs_buffer[sep..]);
1067
1068 div(q, r, &x, &y);
1069
1070 // Note, `div` performs truncating division, which satisfies
1071 // @divTrunc(a, b) * b + @rem(a, b) = a
1072 // so r = a - @divTrunc(a, b) * b
1073 // Note, @rem(a, -b) = @rem(-b, a) = -@rem(a, b) = -@rem(-a, -b)
1074 // For divTrunc, we want to perform
1075 // @divFloor(a, b) * b + @mod(a, b) = a
1076 // Note:
1077 // @divFloor(-a, b)
1078 // = @divFloor(a, -b)
1079 // = -@divCeil(a, b)
1080 // = -@divFloor(a + b - 1, b)
1081 // = -@divTrunc(a + b - 1, b)
1082
1083 // Note (1):
1084 // @divTrunc(a + b - 1, b) * b + @rem(a + b - 1, b) = a + b - 1
1085 // = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) = a + b - 1
1086 // = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = a
1087
1088 if (a.positive and b.positive) {
1089 // Positive-positive case, don't need to do anything.
1090 } else if (a.positive and !b.positive) {
1091 // a/-b -> q is negative, and so we need to fix flooring.
1092 // Subtract one to make the division flooring.
1093
1094 // @divFloor(a, -b) * -b + @mod(a, -b) = a
1095 // If b divides a exactly, we have @divFloor(a, -b) * -b = a
1096 // Else, we have @divFloor(a, -b) * -b > a, so @mod(a, -b) becomes negative
1097
1098 // We have:
1099 // @divFloor(a, -b) * -b + @mod(a, -b) = a
1100 // = -@divTrunc(a + b - 1, b) * -b + @mod(a, -b) = a
1101 // = @divTrunc(a + b - 1, b) * b + @mod(a, -b) = a
1102
1103 // Substitute a for (1):
1104 // @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b + @mod(a, -b)
1105 // Yields:
1106 // @mod(a, -b) = @rem(a - 1, b) - b + 1
1107 // Note that `r` holds @rem(a, b) at this point.
1108 //
1109 // If @rem(a, b) is not 0:
1110 // @rem(a - 1, b) = @rem(a, b) - 1
1111 // => @mod(a, -b) = @rem(a, b) - 1 - b + 1 = @rem(a, b) - b
1112 // Else:
1113 // @rem(a - 1, b) = @rem(a + b - 1, b) = @rem(b - 1, b) = b - 1
1114 // => @mod(a, -b) = b - 1 - b + 1 = 0
1115 if (!r.eqlZero()) {
1116 q.addScalar(q.toConst(), -1);
1117 r.positive = true;
1118 r.sub(r.toConst(), y.toConst().abs());
1119 }
1120 } else if (!a.positive and b.positive) {
1121 // -a/b -> q is negative, and so we need to fix flooring.
1122 // Subtract one to make the division flooring.
1123
1124 // @divFloor(-a, b) * b + @mod(-a, b) = a
1125 // If b divides a exactly, we have @divFloor(-a, b) * b = -a
1126 // Else, we have @divFloor(-a, b) * b < -a, so @mod(-a, b) becomes positive
1127
1128 // We have:
1129 // @divFloor(-a, b) * b + @mod(-a, b) = -a
1130 // = -@divTrunc(a + b - 1, b) * b + @mod(-a, b) = -a
1131 // = @divTrunc(a + b - 1, b) * b - @mod(-a, b) = a
1132
1133 // Substitute a for (1):
1134 // @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b - @mod(-a, b)
1135 // Yields:
1136 // @rem(a - 1, b) - b + 1 = -@mod(-a, b)
1137 // => -@mod(-a, b) = @rem(a - 1, b) - b + 1
1138 // => @mod(-a, b) = -(@rem(a - 1, b) - b + 1) = -@rem(a - 1, b) + b - 1
1139 //
1140 // If @rem(a, b) is not 0:
1141 // @rem(a - 1, b) = @rem(a, b) - 1
1142 // => @mod(-a, b) = -(@rem(a, b) - 1) + b - 1 = -@rem(a, b) + 1 + b - 1 = -@rem(a, b) + b
1143 // Else :
1144 // @rem(a - 1, b) = b - 1
1145 // => @mod(-a, b) = -(b - 1) + b - 1 = 0
1146 if (!r.eqlZero()) {
1147 q.addScalar(q.toConst(), -1);
1148 r.positive = false;
1149 r.add(r.toConst(), y.toConst().abs());
1150 }
1151 } else if (!a.positive and !b.positive) {
1152 // a/b -> q is positive, don't need to do anything to fix flooring.
1153
1154 // @divFloor(-a, -b) * -b + @mod(-a, -b) = -a
1155 // If b divides a exactly, we have @divFloor(-a, -b) * -b = -a
1156 // Else, we have @divFloor(-a, -b) * -b > -a, so @mod(-a, -b) becomes negative
1157
1158 // We have:
1159 // @divFloor(-a, -b) * -b + @mod(-a, -b) = -a
1160 // = @divTrunc(a, b) * -b + @mod(-a, -b) = -a
1161 // = @divTrunc(a, b) * b - @mod(-a, -b) = a
1162
1163 // We also have:
1164 // @divTrunc(a, b) * b + @rem(a, b) = a
1165
1166 // Substitute a:
1167 // @divTrunc(a, b) * b + @rem(a, b) = @divTrunc(a, b) * b - @mod(-a, -b)
1168 // => @rem(a, b) = -@mod(-a, -b)
1169 // => @mod(-a, -b) = -@rem(a, b)
1170 r.positive = false;
1171 }
1172 }
1173
1174 /// q = a / b (rem r)
1175 ///
1176 /// a / b are truncated (rounded towards -inf).
1177 /// q may alias with a or b.
1178 ///
1179 /// Asserts there is enough memory to store q and r.
1180 /// The upper bound for r limb count is `b.limbs.len`.
1181 /// The upper bound for q limb count is given by `a.limbs.len`.
1182 ///
1183 /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`.
1184 pub fn divTrunc(
1185 q: *Mutable,
1186 r: *Mutable,
1187 a: Const,
1188 b: Const,
1189 limbs_buffer: []Limb,
1190 ) void {
1191 const sep = a.limbs.len + 2;
1192 var x = a.toMutable(limbs_buffer[0..sep]);
1193 var y = b.toMutable(limbs_buffer[sep..]);
1194
1195 div(q, r, &x, &y);
1196 }
1197
1198 /// r = a << shift, in other words, r = a * 2^shift
1199 ///
1200 /// r and a may alias.
1201 ///
1202 /// Asserts there is enough memory to fit the result. The upper bound Limb count is
1203 /// `a.limbs.len + (shift / (@sizeOf(Limb) * 8))`.
1204 pub fn shiftLeft(r: *Mutable, a: Const, shift: usize) void {
1205 const new_len = llshl(r.limbs, a.limbs, shift);
1206 r.normalize(new_len);
1207 r.positive = a.positive;
1208 }
1209
1210 /// r = a <<| shift with 2s-complement saturating semantics.
1211 ///
1212 /// r and a may alias.
1213 ///
1214 /// Asserts there is enough memory to fit the result. The upper bound Limb count is
1215 /// r is `calcTwosCompLimbCount(bit_count)`.
1216 pub fn shiftLeftSat(r: *Mutable, a: Const, shift: usize, signedness: Signedness, bit_count: usize) void {
1217 // Special case: When the argument is negative, but the result is supposed to be unsigned,
1218 // return 0 in all cases.
1219 if (!a.positive and signedness == .unsigned) {
1220 r.set(0);
1221 return;
1222 }
1223
1224 // Check whether the shift is going to overflow. This is the case
1225 // when (in 2s complement) any bit above `bit_count - shift` is set in the unshifted value.
1226 // Note, the sign bit is not counted here.
1227
1228 // Handle shifts larger than the target type. This also deals with
1229 // 0-bit integers.
1230 if (bit_count <= shift) {
1231 // In this case, there is only no overflow if `a` is zero.
1232 if (a.eqlZero()) {
1233 r.set(0);
1234 } else {
1235 r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
1236 }
1237 return;
1238 }
1239
1240 const checkbit = bit_count - shift - @intFromBool(signedness == .signed);
1241 // If `checkbit` and more significant bits are zero, no overflow will take place.
1242
1243 if (checkbit >= a.limbs.len * limb_bits) {
1244 // `checkbit` is outside the range of a, so definitely no overflow will take place. We
1245 // can defer to a normal shift.
1246 // Note that if `a` is normalized (which we assume), this checks for set bits in the upper limbs.
1247
1248 // Note, in this case r should already have enough limbs required to perform the normal shift.
1249 // In this case the shift of the most significant limb may still overflow.
1250 r.shiftLeft(a, shift);
1251 return;
1252 } else if (checkbit < (a.limbs.len - 1) * limb_bits) {
1253 // `checkbit` is not in the most significant limb. If `a` is normalized the most significant
1254 // limb will not be zero, so in this case we need to saturate. Note that `a.limbs.len` must be
1255 // at least one according to normalization rules.
1256
1257 r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
1258 return;
1259 }
1260
1261 // Generate a mask with the bits to check in the most significant limb. We'll need to check
1262 // all bits with equal or more significance than checkbit.
1263 // const msb = @truncate(Log2Limb, checkbit);
1264 // const checkmask = (@as(Limb, 1) << msb) -% 1;
1265
1266 if (a.limbs[a.limbs.len - 1] >> @as(Log2Limb, @truncate(checkbit)) != 0) {
1267 // Need to saturate.
1268 r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
1269 return;
1270 }
1271
1272 // This shift should not be able to overflow, so invoke llshl and normalize manually
1273 // to avoid the extra required limb.
1274 const new_len = llshl(r.limbs, a.limbs, shift);
1275 r.normalize(new_len);
1276 r.positive = a.positive;
1277 }
1278
1279 /// r = a >> shift
1280 /// r and a may alias.
1281 ///
1282 /// Asserts there is enough memory to fit the result. The upper bound Limb count is
1283 /// `a.limbs.len - (shift / (@bitSizeOf(Limb)))`.
1284 pub fn shiftRight(r: *Mutable, a: Const, shift: usize) void {
1285 const full_limbs_shifted_out = shift / limb_bits;
1286 const remaining_bits_shifted_out = shift % limb_bits;
1287 if (a.limbs.len <= full_limbs_shifted_out) {
1288 // Shifting negative numbers converges to -1 instead of 0
1289 if (a.positive) {
1290 r.len = 1;
1291 r.positive = true;
1292 r.limbs[0] = 0;
1293 } else {
1294 r.len = 1;
1295 r.positive = false;
1296 r.limbs[0] = 1;
1297 }
1298 return;
1299 }
1300 const nonzero_negative_shiftout = if (a.positive) false else nonzero: {
1301 for (a.limbs[0..full_limbs_shifted_out]) |x| {
1302 if (x != 0)
1303 break :nonzero true;
1304 }
1305 if (remaining_bits_shifted_out == 0)
1306 break :nonzero false;
1307 const not_covered: Log2Limb = @intCast(limb_bits - remaining_bits_shifted_out);
1308 break :nonzero a.limbs[full_limbs_shifted_out] << not_covered != 0;
1309 };
1310
1311 const new_len = llshr(r.limbs, a.limbs, shift);
1312
1313 r.len = new_len;
1314 r.positive = a.positive;
1315 if (nonzero_negative_shiftout) r.addScalar(r.toConst(), -1);
1316 r.normalize(r.len);
1317 }
1318
1319 /// r = ~a under 2s complement wrapping semantics.
1320 /// r may alias with a.
1321 ///
1322 /// Assets that r has enough limbs to store the result. The upper bound Limb count is
1323 /// r is `calcTwosCompLimbCount(bit_count)`.
1324 pub fn bitNotWrap(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
1325 r.copy(a.negate());
1326 const negative_one = Const{ .limbs = &.{1}, .positive = false };
1327 _ = r.addWrap(r.toConst(), negative_one, signedness, bit_count);
1328 }
1329
1330 /// r = a | b under 2s complement semantics.
1331 /// r may alias with a or b.
1332 ///
1333 /// a and b are zero-extended to the longer of a or b.
1334 ///
1335 /// Asserts that r has enough limbs to store the result. Upper bound is `@max(a.limbs.len, b.limbs.len)`.
1336 pub fn bitOr(r: *Mutable, a: Const, b: Const) void {
1337 // Trivial cases, llsignedor does not support zero.
1338 if (a.eqlZero()) {
1339 r.copy(b);
1340 return;
1341 } else if (b.eqlZero()) {
1342 r.copy(a);
1343 return;
1344 }
1345
1346 if (a.limbs.len >= b.limbs.len) {
1347 r.positive = llsignedor(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
1348 r.normalize(if (b.positive) a.limbs.len else b.limbs.len);
1349 } else {
1350 r.positive = llsignedor(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
1351 r.normalize(if (a.positive) b.limbs.len else a.limbs.len);
1352 }
1353 }
1354
1355 /// r = a & b under 2s complement semantics.
1356 /// r may alias with a or b.
1357 ///
1358 /// Asserts that r has enough limbs to store the result.
1359 /// If only a is positive, the upper bound is `a.limbs.len`.
1360 /// If only b is positive, the upper bound is `b.limbs.len`.
1361 /// If a and b are positive, the upper bound is `@min(a.limbs.len, b.limbs.len)`.
1362 /// If a and b are negative, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`.
1363 pub fn bitAnd(r: *Mutable, a: Const, b: Const) void {
1364 // Trivial cases, llsignedand does not support zero.
1365 if (a.eqlZero()) {
1366 r.copy(a);
1367 return;
1368 } else if (b.eqlZero()) {
1369 r.copy(b);
1370 return;
1371 }
1372
1373 if (a.limbs.len >= b.limbs.len) {
1374 r.positive = llsignedand(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
1375 r.normalize(if (b.positive) b.limbs.len else if (a.positive) a.limbs.len else a.limbs.len + 1);
1376 } else {
1377 r.positive = llsignedand(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
1378 r.normalize(if (a.positive) a.limbs.len else if (b.positive) b.limbs.len else b.limbs.len + 1);
1379 }
1380 }
1381
1382 /// r = a ^ b under 2s complement semantics.
1383 /// r may alias with a or b.
1384 ///
1385 /// Asserts that r has enough limbs to store the result. If a and b share the same signedness, the
1386 /// upper bound is `@max(a.limbs.len, b.limbs.len)`. Otherwise, if either a or b is negative
1387 /// but not both, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`.
1388 pub fn bitXor(r: *Mutable, a: Const, b: Const) void {
1389 // Trivial cases, because llsignedxor does not support negative zero.
1390 if (a.eqlZero()) {
1391 r.copy(b);
1392 return;
1393 } else if (b.eqlZero()) {
1394 r.copy(a);
1395 return;
1396 }
1397
1398 if (a.limbs.len > b.limbs.len) {
1399 r.positive = llsignedxor(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
1400 r.normalize(a.limbs.len + @intFromBool(a.positive != b.positive));
1401 } else {
1402 r.positive = llsignedxor(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
1403 r.normalize(b.limbs.len + @intFromBool(a.positive != b.positive));
1404 }
1405 }
1406
1407 /// rma may alias x or y.
1408 /// x and y may alias each other.
1409 /// Asserts that `rma` has enough limbs to store the result. Upper bound is
1410 /// `@min(x.limbs.len, y.limbs.len)`.
1411 ///
1412 /// `limbs_buffer` is used for temporary storage during the operation. When this function returns,
1413 /// it will have the same length as it had when the function was called.
1414 pub fn gcd(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.array_list.Managed(Limb)) !void {
1415 const prev_len = limbs_buffer.items.len;
1416 defer limbs_buffer.shrinkRetainingCapacity(prev_len);
1417 const x_copy = if (rma.limbs.ptr == x.limbs.ptr) blk: {
1418 const start = limbs_buffer.items.len;
1419 try limbs_buffer.appendSlice(x.limbs);
1420 break :blk x.toMutable(limbs_buffer.items[start..]).toConst();
1421 } else x;
1422 const y_copy = if (rma.limbs.ptr == y.limbs.ptr) blk: {
1423 const start = limbs_buffer.items.len;
1424 try limbs_buffer.appendSlice(y.limbs);
1425 break :blk y.toMutable(limbs_buffer.items[start..]).toConst();
1426 } else y;
1427
1428 return gcdLehmer(rma, x_copy, y_copy, limbs_buffer);
1429 }
1430
1431 /// q = a ^ b
1432 ///
1433 /// r may not alias a.
1434 ///
1435 /// Asserts that `r` has enough limbs to store the result. Upper bound is
1436 /// `calcPowLimbsBufferLen(a.bitCountAbs(), b)`.
1437 ///
1438 /// `limbs_buffer` is used for temporary storage.
1439 /// The amount required is given by `calcPowLimbsBufferLen`.
1440 pub fn pow(r: *Mutable, a: Const, b: u32, limbs_buffer: []Limb) void {
1441 assert(r.limbs.ptr != a.limbs.ptr); // illegal aliasing
1442
1443 // Handle all the trivial cases first
1444 switch (b) {
1445 0 => {
1446 // a^0 = 1
1447 return r.set(1);
1448 },
1449 1 => {
1450 // a^1 = a
1451 return r.copy(a);
1452 },
1453 else => {},
1454 }
1455
1456 if (a.eqlZero()) {
1457 // 0^b = 0
1458 return r.set(0);
1459 } else if (a.limbs.len == 1 and a.limbs[0] == 1) {
1460 // 1^b = 1 and -1^b = ±1
1461 r.set(1);
1462 r.positive = a.positive or (b & 1) == 0;
1463 return;
1464 }
1465
1466 // Here a>1 and b>1
1467 const needed_limbs = calcPowLimbsBufferLen(a.bitCountAbs(), b);
1468 assert(r.limbs.len >= needed_limbs);
1469 assert(limbs_buffer.len >= needed_limbs);
1470
1471 llpow(r.limbs, a.limbs, b, limbs_buffer);
1472
1473 r.normalize(needed_limbs);
1474 r.positive = a.positive or (b & 1) == 0;
1475 }
1476
1477 /// r = ⌊√a⌋
1478 ///
1479 /// r may alias a.
1480 ///
1481 /// Asserts that `r` has enough limbs to store the result. Upper bound is
1482 /// `(a.limbs.len - 1) / 2 + 1`.
1483 ///
1484 /// `limbs_buffer` is used for temporary storage.
1485 /// The amount required is given by `calcSqrtLimbsBufferLen`.
1486 pub fn sqrt(
1487 r: *Mutable,
1488 a: Const,
1489 limbs_buffer: []Limb,
1490 ) void {
1491 // Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 SqrtInt
1492 // https://members.loria.fr/PZimmermann/mca/pub226.html
1493 var buf_index: usize = 0;
1494 var t = b: {
1495 const start = buf_index;
1496 buf_index += a.limbs.len;
1497 break :b Mutable.init(limbs_buffer[start..buf_index], 0);
1498 };
1499 var u = b: {
1500 const start = buf_index;
1501 const shift = (a.bitCountAbs() + 1) / 2;
1502 buf_index += 1 + ((shift / limb_bits) + 1);
1503 var m = Mutable.init(limbs_buffer[start..buf_index], 1);
1504 m.shiftLeft(m.toConst(), shift); // u must be >= ⌊√a⌋, and should be as small as possible for efficiency
1505 break :b m;
1506 };
1507 var s = b: {
1508 const start = buf_index;
1509 buf_index += u.limbs.len;
1510 break :b u.toConst().toMutable(limbs_buffer[start..buf_index]);
1511 };
1512 var rem = b: {
1513 const start = buf_index;
1514 buf_index += s.limbs.len;
1515 break :b Mutable.init(limbs_buffer[start..buf_index], 0);
1516 };
1517
1518 while (true) {
1519 t.divFloor(&rem, a, s.toConst(), limbs_buffer[buf_index..]);
1520 t.add(t.toConst(), s.toConst());
1521 u.shiftRight(t.toConst(), 1);
1522
1523 if (u.toConst().order(s.toConst()).compare(.gte)) {
1524 r.copy(s.toConst());
1525 return;
1526 }
1527
1528 // Avoid copying u to s by swapping u and s
1529 const tmp_s = s;
1530 s = u;
1531 u = tmp_s;
1532 }
1533 }
1534
1535 /// rma may not alias x or y.
1536 /// x and y may alias each other.
1537 /// Asserts that `rma` has enough limbs to store the result. Upper bound is given by `calcGcdNoAliasLimbLen`.
1538 ///
1539 /// `limbs_buffer` is used for temporary storage during the operation.
1540 pub fn gcdNoAlias(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.array_list.Managed(Limb)) !void {
1541 assert(rma.limbs.ptr != x.limbs.ptr); // illegal aliasing
1542 assert(rma.limbs.ptr != y.limbs.ptr); // illegal aliasing
1543 return gcdLehmer(rma, x, y, limbs_buffer);
1544 }
1545
1546 fn gcdLehmer(result: *Mutable, xa: Const, ya: Const, limbs_buffer: *std.array_list.Managed(Limb)) !void {
1547 var x = try xa.toManaged(limbs_buffer.allocator);
1548 defer x.deinit();
1549 x.abs();
1550
1551 var y = try ya.toManaged(limbs_buffer.allocator);
1552 defer y.deinit();
1553 y.abs();
1554
1555 if (x.toConst().order(y.toConst()) == .lt) {
1556 x.swap(&y);
1557 }
1558
1559 var t_big = try Managed.init(limbs_buffer.allocator);
1560 defer t_big.deinit();
1561
1562 var r = try Managed.init(limbs_buffer.allocator);
1563 defer r.deinit();
1564
1565 var tmp_x = try Managed.init(limbs_buffer.allocator);
1566 defer tmp_x.deinit();
1567
1568 while (y.len() > 1 and !y.eqlZero()) {
1569 assert(x.isPositive() and y.isPositive());
1570 assert(x.len() >= y.len());
1571
1572 var xh: SignedDoubleLimb = x.limbs[x.len() - 1];
1573 var yh: SignedDoubleLimb = if (x.len() > y.len()) 0 else y.limbs[x.len() - 1];
1574
1575 var A: SignedDoubleLimb = 1;
1576 var B: SignedDoubleLimb = 0;
1577 var C: SignedDoubleLimb = 0;
1578 var D: SignedDoubleLimb = 1;
1579
1580 while (yh + C != 0 and yh + D != 0) {
1581 const q = @divFloor(xh + A, yh + C);
1582 const qp = @divFloor(xh + B, yh + D);
1583 if (q != qp) {
1584 break;
1585 }
1586
1587 var t = A - q * C;
1588 A = C;
1589 C = t;
1590 t = B - q * D;
1591 B = D;
1592 D = t;
1593
1594 t = xh - q * yh;
1595 xh = yh;
1596 yh = t;
1597 }
1598
1599 if (B == 0) {
1600 // t_big = x % y, r is unused
1601 try r.divTrunc(&t_big, &x, &y);
1602 assert(t_big.isPositive());
1603
1604 x.swap(&y);
1605 y.swap(&t_big);
1606 } else {
1607 var storage: [8]Limb = undefined;
1608 const Ap = fixedIntFromSignedDoubleLimb(A, storage[0..2]).toManaged(limbs_buffer.allocator);
1609 const Bp = fixedIntFromSignedDoubleLimb(B, storage[2..4]).toManaged(limbs_buffer.allocator);
1610 const Cp = fixedIntFromSignedDoubleLimb(C, storage[4..6]).toManaged(limbs_buffer.allocator);
1611 const Dp = fixedIntFromSignedDoubleLimb(D, storage[6..8]).toManaged(limbs_buffer.allocator);
1612
1613 // t_big = Ax + By
1614 try r.mul(&x, &Ap);
1615 try t_big.mul(&y, &Bp);
1616 try t_big.add(&r, &t_big);
1617
1618 // u = Cx + Dy, r as u
1619 try tmp_x.copy(x.toConst());
1620 try x.mul(&tmp_x, &Cp);
1621 try r.mul(&y, &Dp);
1622 try r.add(&x, &r);
1623
1624 x.swap(&t_big);
1625 y.swap(&r);
1626 }
1627 }
1628
1629 // euclidean algorithm
1630 assert(x.toConst().order(y.toConst()) != .lt);
1631
1632 while (!y.toConst().eqlZero()) {
1633 try t_big.divTrunc(&r, &x, &y);
1634 x.swap(&y);
1635 y.swap(&r);
1636 }
1637
1638 result.copy(x.toConst());
1639 }
1640
1641 // Truncates by default.
1642 fn div(q: *Mutable, r: *Mutable, x: *Mutable, y: *Mutable) void {
1643 assert(!y.eqlZero()); // division by zero
1644 assert(q != r); // illegal aliasing
1645
1646 const q_positive = (x.positive == y.positive);
1647 const r_positive = x.positive;
1648
1649 if (x.toConst().orderAbs(y.toConst()) == .lt) {
1650 // q may alias x so handle r first.
1651 r.copy(x.toConst());
1652 r.positive = r_positive;
1653
1654 q.set(0);
1655 return;
1656 }
1657
1658 // Handle trailing zero-words of divisor/dividend. These are not handled in the following
1659 // algorithms.
1660 // Note, there must be a non-zero limb for either.
1661 // const x_trailing = std.mem.indexOfScalar(Limb, x.limbs[0..x.len], 0).?;
1662 // const y_trailing = std.mem.indexOfScalar(Limb, y.limbs[0..y.len], 0).?;
1663
1664 const x_trailing = for (x.limbs[0..x.len], 0..) |xi, i| {
1665 if (xi != 0) break i;
1666 } else unreachable;
1667
1668 const y_trailing = for (y.limbs[0..y.len], 0..) |yi, i| {
1669 if (yi != 0) break i;
1670 } else unreachable;
1671
1672 const xy_trailing = @min(x_trailing, y_trailing);
1673
1674 if (y.len - xy_trailing == 1) {
1675 const divisor = y.limbs[y.len - 1];
1676
1677 // Optimization for small divisor. By using a half limb we can avoid requiring DoubleLimb
1678 // divisions in the hot code path. This may often require compiler_rt software-emulation.
1679 if (divisor < maxInt(HalfLimb)) {
1680 lldiv0p5(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], @as(HalfLimb, @intCast(divisor)));
1681 } else {
1682 lldiv1(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], divisor);
1683 }
1684
1685 q.normalize(x.len - xy_trailing);
1686 q.positive = q_positive;
1687
1688 r.len = 1;
1689 r.positive = r_positive;
1690 } else {
1691 // Shrink x, y such that the trailing zero limbs shared between are removed.
1692 var x0 = Mutable{
1693 .limbs = x.limbs[xy_trailing..],
1694 .len = x.len - xy_trailing,
1695 .positive = true,
1696 };
1697
1698 var y0 = Mutable{
1699 .limbs = y.limbs[xy_trailing..],
1700 .len = y.len - xy_trailing,
1701 .positive = true,
1702 };
1703
1704 divmod(q, r, &x0, &y0);
1705 q.positive = q_positive;
1706
1707 r.positive = r_positive;
1708 }
1709
1710 if (xy_trailing != 0 and r.limbs[r.len - 1] != 0) {
1711 // Manually shift here since we know its limb aligned.
1712 @memmove(r.limbs[xy_trailing..][0..r.len], r.limbs[0..r.len]);
1713 @memset(r.limbs[0..xy_trailing], 0);
1714 r.len += xy_trailing;
1715 }
1716 }
1717
1718 /// Handbook of Applied Cryptography, 14.20
1719 ///
1720 /// x = qy + r where 0 <= r < y
1721 /// y is modified but returned intact.
1722 fn divmod(
1723 q: *Mutable,
1724 r: *Mutable,
1725 x: *Mutable,
1726 y: *Mutable,
1727 ) void {
1728 // 0.
1729 // Normalize so that y[t] > b/2
1730 const lz = @clz(y.limbs[y.len - 1]);
1731 const norm_shift = if (lz == 0 and y.toConst().isOdd())
1732 limb_bits // Force an extra limb so that y is even.
1733 else
1734 lz;
1735
1736 x.shiftLeft(x.toConst(), norm_shift);
1737 y.shiftLeft(y.toConst(), norm_shift);
1738
1739 const n = x.len - 1;
1740 const t = y.len - 1;
1741 const shift = n - t;
1742
1743 // 1.
1744 // for 0 <= j <= n - t, set q[j] to 0
1745 q.len = shift + 1;
1746 q.positive = true;
1747 @memset(q.limbs[0..q.len], 0);
1748
1749 // 2.
1750 // while x >= y * b^(n - t):
1751 // x -= y * b^(n - t)
1752 // q[n - t] += 1
1753 // Note, this algorithm is performed only once if y[t] > base/2 and y is even, which we
1754 // enforced in step 0. This means we can replace the while with an if.
1755 // Note, multiplication by b^(n - t) comes down to shifting to the right by n - t limbs.
1756 // We can also replace x >= y * b^(n - t) by x/b^(n - t) >= y, and use shifts for that.
1757 {
1758 // x >= y * b^(n - t) can be replaced by x/b^(n - t) >= y.
1759
1760 // 'divide' x by b^(n - t)
1761 var tmp = Mutable{
1762 .limbs = x.limbs[shift..],
1763 .len = x.len - shift,
1764 .positive = true,
1765 };
1766
1767 if (tmp.toConst().order(y.toConst()) != .lt) {
1768 // Perform x -= y * b^(n - t)
1769 // Note, we can subtract y from x[n - t..] and get the result without shifting.
1770 // We can also re-use tmp which already contains the relevant part of x. Note that
1771 // this also edits x.
1772 // Due to the check above, this cannot underflow.
1773 tmp.sub(tmp.toConst(), y.toConst());
1774
1775 // tmp.sub normalized tmp, but we need to normalize x now.
1776 x.limbs.len = tmp.limbs.len + shift;
1777
1778 q.limbs[shift] += 1;
1779 }
1780 }
1781
1782 // 3.
1783 // for i from n down to t + 1, do
1784 var i = n;
1785 while (i >= t + 1) : (i -= 1) {
1786 const k = i - t - 1;
1787 // 3.1.
1788 // if x_i == y_t:
1789 // q[i - t - 1] = b - 1
1790 // else:
1791 // q[i - t - 1] = (x[i] * b + x[i - 1]) / y[t]
1792 if (x.limbs[i] == y.limbs[t]) {
1793 q.limbs[k] = maxInt(Limb);
1794 } else {
1795 const q0 = (@as(DoubleLimb, x.limbs[i]) << limb_bits) | @as(DoubleLimb, x.limbs[i - 1]);
1796 const n0 = @as(DoubleLimb, y.limbs[t]);
1797 q.limbs[k] = @as(Limb, @intCast(q0 / n0));
1798 }
1799
1800 // 3.2
1801 // while q[i - t - 1] * (y[t] * b + y[t - 1] > x[i] * b * b + x[i - 1] + x[i - 2]:
1802 // q[i - t - 1] -= 1
1803 // Note, if y[t] > b / 2 this part is repeated no more than twice.
1804
1805 // Extract from y.
1806 const y0 = if (t > 0) y.limbs[t - 1] else 0;
1807 const y1 = y.limbs[t];
1808
1809 // Extract from x.
1810 // Note, big endian.
1811 const tmp0 = [_]Limb{
1812 x.limbs[i],
1813 if (i >= 1) x.limbs[i - 1] else 0,
1814 if (i >= 2) x.limbs[i - 2] else 0,
1815 };
1816
1817 while (true) {
1818 // Ad-hoc 2x1 multiplication with q[i - t - 1].
1819 // Note, big endian.
1820 var tmp1 = [_]Limb{ 0, undefined, undefined };
1821 tmp1[2] = addMulLimbWithCarry(0, y0, q.limbs[k], &tmp1[0]);
1822 tmp1[1] = addMulLimbWithCarry(0, y1, q.limbs[k], &tmp1[0]);
1823
1824 // Big-endian compare
1825 if (mem.order(Limb, &tmp1, &tmp0) != .gt)
1826 break;
1827
1828 q.limbs[k] -= 1;
1829 }
1830
1831 // 3.3.
1832 // x -= q[i - t - 1] * y * b^(i - t - 1)
1833 // Note, we multiply by a single limb here.
1834 // The shift doesn't need to be performed if we add the result of the first multiplication
1835 // to x[i - t - 1].
1836 const underflow = llmulLimb(.sub, x.limbs[k..x.len], y.limbs[0..y.len], q.limbs[k]);
1837
1838 // 3.4.
1839 // if x < 0:
1840 // x += y * b^(i - t - 1)
1841 // q[i - t - 1] -= 1
1842 // Note, we check for x < 0 using the underflow flag from the previous operation.
1843 if (underflow) {
1844 // While we didn't properly set the signedness of x, this operation should 'flow' it back to positive.
1845 llaccum(.add, x.limbs[k..x.len], y.limbs[0..y.len]);
1846 q.limbs[k] -= 1;
1847 }
1848 }
1849
1850 x.normalize(x.len);
1851 q.normalize(q.len);
1852
1853 // De-normalize r and y.
1854 r.shiftRight(x.toConst(), norm_shift);
1855 y.shiftRight(y.toConst(), norm_shift);
1856 }
1857
1858 /// Truncate an integer to a number of bits, following 2s-complement semantics.
1859 /// `r` may alias `a`.
1860 ///
1861 /// Asserts `r` has enough storage to compute the result.
1862 /// The upper bound is `calcTwosCompLimbCount(a.len)`.
1863 pub fn truncate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
1864 // Handle 0-bit integers.
1865 if (bit_count == 0) {
1866 @branchHint(.unlikely);
1867 r.set(0);
1868 return;
1869 }
1870
1871 const max_limbs = calcTwosCompLimbCount(bit_count);
1872 const sign_bit = @as(Limb, 1) << @truncate(bit_count - 1);
1873 const mask = @as(Limb, maxInt(Limb)) >> @truncate(-%bit_count);
1874
1875 // Guess whether the result will have the same sign as `a`.
1876 // * If the result will be signed zero, the guess is `true`.
1877 // * If the result will be the minimum signed integer, the guess is `false`.
1878 // * If the result will be unsigned zero, the guess is `a.positive`.
1879 // * Otherwise the guess is correct.
1880 const same_sign_guess = switch (signedness) {
1881 .signed => max_limbs > a.limbs.len or a.limbs[max_limbs - 1] & sign_bit == 0,
1882 .unsigned => a.positive,
1883 };
1884
1885 const abs_trunc_a: Const = .{
1886 .positive = true,
1887 .limbs = a.limbs[0..llnormalize(a.limbs[0..@min(a.limbs.len, max_limbs)])],
1888 };
1889 if (same_sign_guess or abs_trunc_a.eqlZero()) {
1890 // One of the following is true:
1891 // * The result is zero.
1892 // * The result is non-zero and has the same sign as `a`.
1893 r.copy(abs_trunc_a);
1894 if (max_limbs <= r.len) r.limbs[max_limbs - 1] &= mask;
1895 r.normalize(r.len);
1896 r.positive = a.positive or r.eqlZero();
1897 } else {
1898 // One of the following is true:
1899 // * The result is the minimum signed integer.
1900 // * The result is unsigned zero.
1901 // * The result is non-zero and has the opposite sign as `a`.
1902 r.addScalar(abs_trunc_a, -1);
1903 llnot(r.limbs[0..r.len]);
1904 @memset(r.limbs[r.len..max_limbs], maxInt(Limb));
1905 r.limbs[max_limbs - 1] &= mask;
1906 r.normalize(max_limbs);
1907 r.positive = switch (signedness) {
1908 // The only value with the sign bit still set is the minimum signed integer.
1909 .signed => !a.positive and r.limbs[max_limbs - 1] & sign_bit == 0,
1910 .unsigned => !a.positive or r.eqlZero(),
1911 };
1912 }
1913 }
1914
1915 /// Saturate an integer to a number of bits, following 2s-complement semantics.
1916 /// r may alias a.
1917 ///
1918 /// Asserts `r` has enough storage to store the result.
1919 /// The upper bound is `calcTwosCompLimbCount(a.len)`.
1920 pub fn saturate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
1921 if (!a.fitsInTwosComp(signedness, bit_count)) {
1922 r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
1923 }
1924 }
1925
1926 /// Read the value of `x` from `buffer`.
1927 /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`.
1928 ///
1929 /// The contents of `buffer` are interpreted as if they were the contents of
1930 /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`
1931 /// and any required padding bits are expected on the MSB end.
1932 pub fn readTwosComplement(
1933 x: *Mutable,
1934 buffer: []const u8,
1935 bit_count: usize,
1936 endian: Endian,
1937 signedness: Signedness,
1938 ) void {
1939 return readPackedTwosComplement(x, buffer, 0, bit_count, endian, signedness);
1940 }
1941
1942 /// Read the value of `x` from a packed memory `buffer`.
1943 /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`
1944 /// at offset `bit_offset`.
1945 ///
1946 /// This is equivalent to loading the value of an integer with `bit_count` bits as
1947 /// if it were a field in packed memory at the provided bit offset.
1948 pub fn readPackedTwosComplement(
1949 x: *Mutable,
1950 buffer: []const u8,
1951 bit_offset: usize,
1952 bit_count: usize,
1953 endian: Endian,
1954 signedness: Signedness,
1955 ) void {
1956 if (bit_count == 0) {
1957 x.limbs[0] = 0;
1958 x.len = 1;
1959 x.positive = true;
1960 return;
1961 }
1962
1963 // Check whether the input is negative
1964 var positive = true;
1965 if (signedness == .signed) {
1966 const total_bits = bit_offset + bit_count;
1967 const last_byte = switch (endian) {
1968 .little => ((total_bits + 7) / 8) - 1,
1969 .big => buffer.len - ((total_bits + 7) / 8),
1970 };
1971
1972 const sign_bit = @as(u8, 1) << @as(u3, @intCast((total_bits - 1) % 8));
1973 positive = ((buffer[last_byte] & sign_bit) == 0);
1974 }
1975
1976 // Copy all complete limbs
1977 var carry: u1 = 1;
1978 var limb_index: usize = 0;
1979 var bit_index: usize = 0;
1980 while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) {
1981 // Read one Limb of bits
1982 var limb = mem.readPackedInt(Limb, buffer, bit_index + bit_offset, endian);
1983 bit_index += @bitSizeOf(Limb);
1984
1985 // 2's complement (bitwise not, then add carry bit)
1986 if (!positive) {
1987 const ov = @addWithOverflow(~limb, carry);
1988 limb = ov[0];
1989 carry = ov[1];
1990 }
1991 x.limbs[limb_index] = limb;
1992 }
1993
1994 // Copy the remaining bits
1995 if (bit_count != bit_index) {
1996 // Read all remaining bits
1997 var limb = switch (signedness) {
1998 .unsigned => mem.readVarPackedInt(Limb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .unsigned),
1999 .signed => b: {
2000 const SLimb = std.meta.Int(.signed, @bitSizeOf(Limb));
2001 const limb = mem.readVarPackedInt(SLimb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .signed);
2002 break :b @as(Limb, @bitCast(limb));
2003 },
2004 };
2005
2006 // 2's complement (bitwise not, then add carry bit)
2007 if (!positive) {
2008 const ov = @addWithOverflow(~limb, carry);
2009 assert(ov[1] == 0);
2010 limb = ov[0];
2011 }
2012 x.limbs[limb_index] = limb;
2013
2014 limb_index += 1;
2015 }
2016
2017 x.positive = positive;
2018 x.len = limb_index;
2019 x.normalize(x.len);
2020 }
2021
2022 /// Normalize a possible sequence of leading zeros.
2023 ///
2024 /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4]
2025 /// [1, 2, 0, 0, 0] -> [1, 2]
2026 /// [0, 0, 0, 0, 0] -> [0]
2027 pub fn normalize(r: *Mutable, length: usize) void {
2028 r.len = llnormalize(r.limbs[0..length]);
2029 }
2030
2031 pub fn format(self: Mutable, w: *std.Io.Writer) std.Io.Writer.Error!void {
2032 return formatNumber(self, w, .{});
2033 }
2034
2035 /// If the absolute value of integer is greater than or equal to `pow(2, 64 * @sizeOf(usize) * 8)`,
2036 /// this function will fail to print the string, printing "(BigInt)" instead of a number.
2037 /// This is because the rendering algorithm requires reversing a string, which requires O(N) memory.
2038 /// See `Const.toString` and `Const.toStringAlloc` for a way to print big integers without failure.
2039 pub fn formatNumber(self: Mutable, w: *std.Io.Writer, n: std.fmt.Number) std.Io.Writer.Error!void {
2040 return self.toConst().formatNumber(w, n);
2041 }
2042};
2043
2044/// A arbitrary-precision big integer, with a fixed set of immutable limbs.
2045pub const Const = struct {
2046 /// Raw digits. These are:
2047 ///
2048 /// * Little-endian ordered
2049 /// * limbs.len >= 1
2050 /// * Zero is represented as limbs.len == 1 with limbs[0] == 0.
2051 ///
2052 /// Accessing limbs directly should be avoided.
2053 limbs: []const Limb,
2054 positive: bool,
2055
2056 /// The result is an independent resource which is managed by the caller.
2057 pub fn toManaged(self: Const, allocator: Allocator) Allocator.Error!Managed {
2058 const limbs = try allocator.alloc(Limb, @max(Managed.default_capacity, self.limbs.len));
2059 @memcpy(limbs[0..self.limbs.len], self.limbs);
2060 return Managed{
2061 .allocator = allocator,
2062 .limbs = limbs,
2063 .metadata = if (self.positive)
2064 self.limbs.len & ~Managed.sign_bit
2065 else
2066 self.limbs.len | Managed.sign_bit,
2067 };
2068 }
2069
2070 /// Asserts `limbs` is big enough to store the value.
2071 pub fn toMutable(self: Const, limbs: []Limb) Mutable {
2072 @memcpy(limbs[0..self.limbs.len], self.limbs[0..self.limbs.len]);
2073 return .{
2074 .limbs = limbs,
2075 .positive = self.positive,
2076 .len = self.limbs.len,
2077 };
2078 }
2079
2080 pub fn dump(self: Const) void {
2081 for (self.limbs[0..self.limbs.len]) |limb| {
2082 std.debug.print("{x} ", .{limb});
2083 }
2084 std.debug.print("len={} positive={}\n", .{ self.len, self.positive });
2085 }
2086
2087 pub fn abs(self: Const) Const {
2088 return .{
2089 .limbs = self.limbs,
2090 .positive = true,
2091 };
2092 }
2093
2094 pub fn negate(self: Const) Const {
2095 return .{
2096 .limbs = self.limbs,
2097 .positive = !self.positive,
2098 };
2099 }
2100
2101 pub fn isOdd(self: Const) bool {
2102 return self.limbs[0] & 1 != 0;
2103 }
2104
2105 pub fn isEven(self: Const) bool {
2106 return !self.isOdd();
2107 }
2108
2109 /// Returns the number of bits required to represent the absolute value of an integer.
2110 pub fn bitCountAbs(self: Const) usize {
2111 return (self.limbs.len - 1) * limb_bits + (limb_bits - @clz(self.limbs[self.limbs.len - 1]));
2112 }
2113
2114 /// Returns the number of bits required to represent the integer in twos-complement form.
2115 ///
2116 /// If the integer is negative the value returned is the number of bits needed by a signed
2117 /// integer to represent the value. If positive the value is the number of bits for an
2118 /// unsigned integer. Any unsigned integer will fit in the signed integer with bitcount
2119 /// one greater than the returned value.
2120 ///
2121 /// e.g. -127 returns 8 as it will fit in an i8. 127 returns 7 since it fits in a u7.
2122 pub fn bitCountTwosComp(self: Const) usize {
2123 var bits = self.bitCountAbs();
2124
2125 // If the entire value has only one bit set (e.g. 0b100000000) then the negation in twos
2126 // complement requires one less bit.
2127 if (!self.positive) block: {
2128 bits += 1;
2129
2130 if (@popCount(self.limbs[self.limbs.len - 1]) == 1) {
2131 for (self.limbs[0 .. self.limbs.len - 1]) |limb| {
2132 if (@popCount(limb) != 0) {
2133 break :block;
2134 }
2135 }
2136
2137 bits -= 1;
2138 }
2139 }
2140
2141 return bits;
2142 }
2143
2144 /// Returns the number of bits required to represent the integer in twos-complement form
2145 /// with the given signedness.
2146 pub fn bitCountTwosCompForSignedness(self: Const, signedness: std.builtin.Signedness) usize {
2147 return self.bitCountTwosComp() + @intFromBool(self.positive and signedness == .signed);
2148 }
2149
2150 /// @popCount with two's complement semantics.
2151 ///
2152 /// This returns the number of 1 bits set when the value would be represented in
2153 /// two's complement with the given integer width (bit_count).
2154 /// This includes the leading sign bit, which will be set for negative values.
2155 ///
2156 /// Asserts that bit_count is enough to represent value in two's compliment
2157 /// and that the final result fits in a usize.
2158 /// Asserts that there are no trailing empty limbs on the most significant end,
2159 /// i.e. that limb count matches `calcLimbLen()` and zero is not negative.
2160 pub fn popCount(self: Const, bit_count: usize) usize {
2161 var sum: usize = 0;
2162 if (self.positive) {
2163 for (self.limbs) |limb| {
2164 sum += @popCount(limb);
2165 }
2166 } else {
2167 assert(self.fitsInTwosComp(.signed, bit_count));
2168 assert(self.limbs[self.limbs.len - 1] != 0);
2169
2170 var remaining_bits = bit_count;
2171 var carry: u1 = 1;
2172 var add_res: Limb = undefined;
2173
2174 // All but the most significant limb.
2175 for (self.limbs[0 .. self.limbs.len - 1]) |limb| {
2176 const ov = @addWithOverflow(~limb, carry);
2177 add_res = ov[0];
2178 carry = ov[1];
2179 sum += @popCount(add_res);
2180 remaining_bits -= limb_bits; // Asserted not to underflow by fitsInTwosComp
2181 }
2182
2183 // The most significant limb may have fewer than @bitSizeOf(Limb) meaningful bits,
2184 // which we can detect with @clz().
2185 // There may also be fewer limbs than needed to fill bit_count.
2186 const limb = self.limbs[self.limbs.len - 1];
2187 const leading_zeroes = @clz(limb);
2188 // The most significant limb is asserted not to be all 0s (above),
2189 // so ~limb cannot be all 1s, and ~limb + 1 cannot overflow.
2190 sum += @popCount(~limb + carry);
2191 sum -= leading_zeroes; // All leading zeroes were flipped and added to sum, so undo those
2192 const remaining_ones = remaining_bits - (limb_bits - leading_zeroes); // All bits not covered by limbs
2193 sum += remaining_ones;
2194 }
2195 return sum;
2196 }
2197
2198 pub fn fitsInTwosComp(self: Const, signedness: Signedness, bit_count: usize) bool {
2199 if (self.eqlZero()) {
2200 return true;
2201 }
2202 if (signedness == .unsigned and !self.positive) {
2203 return false;
2204 }
2205 return bit_count >= self.bitCountTwosCompForSignedness(signedness);
2206 }
2207
2208 /// Returns whether self can fit into an integer of the requested type.
2209 pub fn fits(self: Const, comptime T: type) bool {
2210 const info = @typeInfo(T).int;
2211 return self.fitsInTwosComp(info.signedness, info.bits);
2212 }
2213
2214 /// Returns the approximate size of the integer in the given base. Negative values accommodate for
2215 /// the minus sign. This is used for determining the number of characters needed to print the
2216 /// value. It is inexact and may exceed the given value by ~1-2 bytes.
2217 /// TODO See if we can make this exact.
2218 pub fn sizeInBaseUpperBound(self: Const, base: usize) usize {
2219 const bit_count = @as(usize, @intFromBool(!self.positive)) + self.bitCountAbs();
2220 return (bit_count / math.log2(base)) + 2;
2221 }
2222
2223 pub const ConvertError = error{
2224 NegativeIntoUnsigned,
2225 TargetTooSmall,
2226 };
2227
2228 /// Convert `self` to `Int`.
2229 ///
2230 /// Returns an error if self cannot be narrowed into the requested type without truncation.
2231 pub fn toInt(self: Const, comptime Int: type) ConvertError!Int {
2232 switch (@typeInfo(Int)) {
2233 .int => |info| {
2234 // Make sure -0 is handled correctly.
2235 if (self.eqlZero()) return 0;
2236
2237 const Unsigned = std.meta.Int(.unsigned, info.bits);
2238
2239 if (!self.fitsInTwosComp(info.signedness, info.bits)) {
2240 return error.TargetTooSmall;
2241 }
2242
2243 var r: Unsigned = 0;
2244
2245 if (@sizeOf(Unsigned) <= @sizeOf(Limb)) {
2246 r = @intCast(self.limbs[0]);
2247 } else {
2248 for (self.limbs[0..self.limbs.len], 0..) |_, ri| {
2249 const limb = self.limbs[self.limbs.len - ri - 1];
2250 r <<= limb_bits;
2251 r |= limb;
2252 }
2253 }
2254
2255 if (info.signedness == .unsigned) {
2256 return if (self.positive) @intCast(r) else error.NegativeIntoUnsigned;
2257 } else {
2258 if (self.positive) {
2259 return @intCast(r);
2260 } else {
2261 if (math.cast(Int, r)) |ok| {
2262 return -ok;
2263 } else {
2264 return minInt(Int);
2265 }
2266 }
2267 }
2268 },
2269 else => @compileError("expected int type, found '" ++ @typeName(Int) ++ "'"),
2270 }
2271 }
2272
2273 /// Convert self to `Float`.
2274 pub fn toFloat(self: Const, comptime Float: type, round: Round) struct { Float, Exactness } {
2275 if (Float == comptime_float) return self.toFloat(f128, round);
2276 const normalized_abs: Const = .{
2277 .limbs = self.limbs[0..llnormalize(self.limbs)],
2278 .positive = true,
2279 };
2280 if (normalized_abs.eqlZero()) return .{ if (self.positive) 0.0 else -0.0, .exact };
2281
2282 const Repr = std.math.FloatRepr(Float);
2283 var mantissa_limbs: [calcNonZeroTwosCompLimbCount(1 + @bitSizeOf(Repr.Mantissa))]Limb = undefined;
2284 var mantissa: Mutable = .{
2285 .limbs = &mantissa_limbs,
2286 .positive = undefined,
2287 .len = undefined,
2288 };
2289 var exponent = normalized_abs.bitCountAbs() - 1;
2290 const exactness: Exactness = exactness: {
2291 if (exponent <= @bitSizeOf(Repr.Normalized.Fraction)) {
2292 mantissa.shiftLeft(normalized_abs, @intCast(@bitSizeOf(Repr.Normalized.Fraction) - exponent));
2293 break :exactness .exact;
2294 }
2295 const shift: usize = @intCast(exponent - @bitSizeOf(Repr.Normalized.Fraction));
2296 mantissa.shiftRight(normalized_abs, shift);
2297 const final_limb_index = (shift - 1) / limb_bits;
2298 const round_bits = normalized_abs.limbs[final_limb_index] << @truncate(-%shift) |
2299 @intFromBool(!std.mem.allEqual(Limb, normalized_abs.limbs[0..final_limb_index], 0));
2300 if (round_bits == 0) break :exactness .exact;
2301 round: switch (round) {
2302 .nearest_even => {
2303 const half: Limb = 1 << (limb_bits - 1);
2304 if (round_bits >= half) mantissa.addScalar(mantissa.toConst(), 1);
2305 if (round_bits == half) mantissa.limbs[0] &= ~@as(Limb, 1);
2306 },
2307 .away => mantissa.addScalar(mantissa.toConst(), 1),
2308 .trunc => {},
2309 .floor => if (!self.positive) continue :round .away,
2310 .ceil => if (self.positive) continue :round .away,
2311 }
2312 break :exactness .inexact;
2313 };
2314 const normalized_res: Repr.Normalized = .{
2315 .fraction = @truncate(mantissa.toInt(Repr.Mantissa) catch |err| switch (err) {
2316 error.NegativeIntoUnsigned => unreachable,
2317 error.TargetTooSmall => fraction: {
2318 assert(mantissa.toConst().orderAgainstScalar(1 << @bitSizeOf(Repr.Mantissa)).compare(.eq));
2319 exponent += 1;
2320 break :fraction 1 << (@bitSizeOf(Repr.Mantissa) - 1);
2321 },
2322 }),
2323 .exponent = std.math.lossyCast(Repr.Normalized.Exponent, exponent),
2324 };
2325 return .{ normalized_res.reconstruct(if (self.positive) .positive else .negative), exactness };
2326 }
2327
2328 pub fn format(self: Const, w: *std.Io.Writer) std.Io.Writer.Error!void {
2329 return self.formatNumber(w, .{});
2330 }
2331
2332 /// If the absolute value of integer is greater than or equal to `pow(2, 64 * @sizeOf(usize) * 8)`,
2333 /// this function will fail to print the string, printing "(BigInt)" instead of a number.
2334 /// This is because the rendering algorithm requires reversing a string, which requires O(N) memory.
2335 /// See `toString` and `toStringAlloc` for a way to print big integers without failure.
2336 pub fn formatNumber(self: Const, w: *std.Io.Writer, number: std.fmt.Number) std.Io.Writer.Error!void {
2337 const available_len = 64;
2338 if (self.limbs.len > available_len)
2339 return w.writeAll("(BigInt)");
2340
2341 var limbs: [calcToStringLimbsBufferLen(available_len, 10)]Limb = undefined;
2342
2343 const biggest: Const = .{
2344 .limbs = &([1]Limb{comptime math.maxInt(Limb)} ** available_len),
2345 .positive = false,
2346 };
2347 var buf: [biggest.sizeInBaseUpperBound(2)]u8 = undefined;
2348 const base: u8 = number.mode.base() orelse @panic("TODO print big int in scientific form");
2349 const len = self.toString(&buf, base, number.case, &limbs);
2350 return w.writeAll(buf[0..len]);
2351 }
2352
2353 /// Converts self to a string in the requested base.
2354 /// Caller owns returned memory.
2355 /// Asserts that `base` is in the range [2, 36].
2356 /// See also `toString`, a lower level function than this.
2357 pub fn toStringAlloc(self: Const, allocator: Allocator, base: u8, case: std.fmt.Case) Allocator.Error![]u8 {
2358 assert(base >= 2);
2359 assert(base <= 36);
2360
2361 if (self.eqlZero()) {
2362 return allocator.dupe(u8, "0");
2363 }
2364 const string = try allocator.alloc(u8, self.sizeInBaseUpperBound(base));
2365 errdefer allocator.free(string);
2366
2367 const limbs = try allocator.alloc(Limb, calcToStringLimbsBufferLen(self.limbs.len, base));
2368 defer allocator.free(limbs);
2369
2370 return allocator.realloc(string, self.toString(string, base, case, limbs));
2371 }
2372
2373 /// Converts self to a string in the requested base.
2374 /// Asserts that `base` is in the range [2, 36].
2375 /// `string` is a caller-provided slice of at least `sizeInBaseUpperBound` bytes,
2376 /// where the result is written to.
2377 /// Returns the length of the string.
2378 /// `limbs_buffer` is caller-provided memory for `toString` to use as a working area. It must have
2379 /// length of at least `calcToStringLimbsBufferLen`.
2380 /// In the case of power-of-two base, `limbs_buffer` is ignored.
2381 /// See also `toStringAlloc`, a higher level function than this.
2382 pub fn toString(self: Const, string: []u8, base: u8, case: std.fmt.Case, limbs_buffer: []Limb) usize {
2383 assert(base >= 2);
2384 assert(base <= 36);
2385
2386 if (self.eqlZero()) {
2387 string[0] = '0';
2388 return 1;
2389 }
2390
2391 var digits_len: usize = 0;
2392
2393 // Power of two: can do a single pass and use masks to extract digits.
2394 if (math.isPowerOfTwo(base)) {
2395 const base_shift = math.log2_int(Limb, base);
2396
2397 outer: for (self.limbs[0..self.limbs.len]) |limb| {
2398 var shift: usize = 0;
2399 while (shift < limb_bits) : (shift += base_shift) {
2400 const r = @as(u8, @intCast((limb >> @as(Log2Limb, @intCast(shift))) & @as(Limb, base - 1)));
2401 const ch = std.fmt.digitToChar(r, case);
2402 string[digits_len] = ch;
2403 digits_len += 1;
2404 // If we hit the end, it must be all zeroes from here.
2405 if (digits_len == string.len) break :outer;
2406 }
2407 }
2408
2409 // Always will have a non-zero digit somewhere.
2410 while (string[digits_len - 1] == '0') {
2411 digits_len -= 1;
2412 }
2413 } else {
2414 // Non power-of-two: batch divisions per word size.
2415 // We use a HalfLimb here so the division uses the faster lldiv0p5 over lldiv1 codepath.
2416 const digits_per_limb = math.log(HalfLimb, base, maxInt(HalfLimb));
2417 var limb_base: Limb = 1;
2418 var j: usize = 0;
2419 while (j < digits_per_limb) : (j += 1) {
2420 limb_base *= base;
2421 }
2422 const b: Const = .{ .limbs = &[_]Limb{limb_base}, .positive = true };
2423
2424 var q: Mutable = .{
2425 .limbs = limbs_buffer[0 .. self.limbs.len + 2],
2426 .positive = true, // Make absolute by ignoring self.positive.
2427 .len = self.limbs.len,
2428 };
2429 @memcpy(q.limbs[0..self.limbs.len], self.limbs);
2430
2431 var r: Mutable = .{
2432 .limbs = limbs_buffer[q.limbs.len..][0..self.limbs.len],
2433 .positive = true,
2434 .len = 1,
2435 };
2436 r.limbs[0] = 0;
2437
2438 const rest_of_the_limbs_buf = limbs_buffer[q.limbs.len + r.limbs.len ..];
2439
2440 while (q.len >= 2) {
2441 // Passing an allocator here would not be helpful since this division is destroying
2442 // information, not creating it. [TODO citation needed]
2443 q.divTrunc(&r, q.toConst(), b, rest_of_the_limbs_buf);
2444
2445 var r_word = r.limbs[0];
2446 var i: usize = 0;
2447 while (i < digits_per_limb) : (i += 1) {
2448 const ch = std.fmt.digitToChar(@as(u8, @intCast(r_word % base)), case);
2449 r_word /= base;
2450 string[digits_len] = ch;
2451 digits_len += 1;
2452 }
2453 }
2454
2455 {
2456 assert(q.len == 1);
2457
2458 var r_word = q.limbs[0];
2459 while (r_word != 0) {
2460 const ch = std.fmt.digitToChar(@as(u8, @intCast(r_word % base)), case);
2461 r_word /= base;
2462 string[digits_len] = ch;
2463 digits_len += 1;
2464 }
2465 }
2466 }
2467
2468 if (!self.positive) {
2469 string[digits_len] = '-';
2470 digits_len += 1;
2471 }
2472
2473 const s = string[0..digits_len];
2474 mem.reverse(u8, s);
2475 return s.len;
2476 }
2477
2478 /// Write the value of `x` into `buffer`
2479 /// Asserts that `buffer` is large enough to store the value.
2480 ///
2481 /// `buffer` is filled so that its contents match what would be observed via
2482 /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`,
2483 /// and any required padding bits are added on the MSB end.
2484 pub fn writeTwosComplement(x: Const, buffer: []u8, endian: Endian) void {
2485 return writePackedTwosComplement(x, buffer, 0, 8 * buffer.len, endian);
2486 }
2487
2488 /// Write the value of `x` to a packed memory `buffer`.
2489 /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`
2490 /// at offset `bit_offset`.
2491 ///
2492 /// This is equivalent to storing the value of an integer with `bit_count` bits as
2493 /// if it were a field in packed memory at the provided bit offset.
2494 pub fn writePackedTwosComplement(x: Const, buffer: []u8, bit_offset: usize, bit_count: usize, endian: Endian) void {
2495 assert(x.fitsInTwosComp(if (x.positive) .unsigned else .signed, bit_count));
2496
2497 // Copy all complete limbs
2498 var carry: u1 = 1;
2499 var limb_index: usize = 0;
2500 var bit_index: usize = 0;
2501 while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) {
2502 var limb: Limb = if (limb_index < x.limbs.len) x.limbs[limb_index] else 0;
2503
2504 // 2's complement (bitwise not, then add carry bit)
2505 if (!x.positive) {
2506 const ov = @addWithOverflow(~limb, carry);
2507 limb = ov[0];
2508 carry = ov[1];
2509 }
2510
2511 // Write one Limb of bits
2512 mem.writePackedInt(Limb, buffer, bit_index + bit_offset, limb, endian);
2513 bit_index += @bitSizeOf(Limb);
2514 }
2515
2516 // Copy the remaining bits
2517 if (bit_count != bit_index) {
2518 var limb: Limb = if (limb_index < x.limbs.len) x.limbs[limb_index] else 0;
2519
2520 // 2's complement (bitwise not, then add carry bit)
2521 if (!x.positive) limb = ~limb +% carry;
2522
2523 // Write all remaining bits
2524 mem.writeVarPackedInt(buffer, bit_index + bit_offset, bit_count - bit_index, limb, endian);
2525 }
2526 }
2527
2528 /// Returns `math.Order.lt`, `math.Order.eq`, `math.Order.gt` if
2529 /// `|a| < |b|`, `|a| == |b|`, or `|a| > |b|` respectively.
2530 pub fn orderAbs(a: Const, b: Const) math.Order {
2531 if (a.limbs.len < b.limbs.len) {
2532 return .lt;
2533 }
2534 if (a.limbs.len > b.limbs.len) {
2535 return .gt;
2536 }
2537
2538 var i: usize = a.limbs.len - 1;
2539 while (i != 0) : (i -= 1) {
2540 if (a.limbs[i] != b.limbs[i]) {
2541 break;
2542 }
2543 }
2544
2545 if (a.limbs[i] < b.limbs[i]) {
2546 return .lt;
2547 } else if (a.limbs[i] > b.limbs[i]) {
2548 return .gt;
2549 } else {
2550 return .eq;
2551 }
2552 }
2553
2554 /// Returns `math.Order.lt`, `math.Order.eq`, `math.Order.gt` if `a < b`, `a == b` or `a > b` respectively.
2555 pub fn order(a: Const, b: Const) math.Order {
2556 if (a.positive != b.positive) {
2557 if (eqlZero(a) and eqlZero(b)) {
2558 return .eq;
2559 } else {
2560 return if (a.positive) .gt else .lt;
2561 }
2562 } else {
2563 const r = orderAbs(a, b);
2564 return if (a.positive) r else switch (r) {
2565 .lt => math.Order.gt,
2566 .eq => math.Order.eq,
2567 .gt => math.Order.lt,
2568 };
2569 }
2570 }
2571
2572 /// Same as `order` but the right-hand operand is a primitive integer.
2573 pub fn orderAgainstScalar(lhs: Const, scalar: anytype) math.Order {
2574 // Normally we could just determine the number of limbs needed with calcLimbLen,
2575 // but that is not comptime-known when scalar is not a comptime_int. Instead, we
2576 // use calcTwosCompLimbCount for a non-comptime_int scalar, which can be pessimistic
2577 // in the case that scalar happens to be small in magnitude within its type, but it
2578 // is well worth being able to use the stack and not needing an allocator passed in.
2579 // Note that Mutable.init still sets len to calcLimbLen(scalar) in any case.
2580 const limbs_len = comptime switch (@typeInfo(@TypeOf(scalar))) {
2581 .comptime_int => calcLimbLen(scalar),
2582 .int => |info| calcTwosCompLimbCount(info.bits),
2583 else => @compileError("expected scalar to be an int"),
2584 };
2585 var limbs: [limbs_len]Limb = undefined;
2586 const rhs = Mutable.init(&limbs, scalar);
2587 return order(lhs, rhs.toConst());
2588 }
2589
2590 /// Returns true if `a == 0`.
2591 pub fn eqlZero(a: Const) bool {
2592 var d: Limb = 0;
2593 for (a.limbs) |limb| d |= limb;
2594 return d == 0;
2595 }
2596
2597 /// Returns true if `|a| == |b|`.
2598 pub fn eqlAbs(a: Const, b: Const) bool {
2599 return orderAbs(a, b) == .eq;
2600 }
2601
2602 /// Returns true if `a == b`.
2603 pub fn eql(a: Const, b: Const) bool {
2604 return order(a, b) == .eq;
2605 }
2606
2607 /// Returns the number of leading zeros in twos-complement form.
2608 pub fn clz(a: Const, bits: Limb) Limb {
2609 // Limbs are stored in little-endian order but we need to iterate big-endian.
2610 if (!a.positive and !a.eqlZero()) return 0;
2611 var total_limb_lz: Limb = 0;
2612 var i: usize = a.limbs.len;
2613 const bits_per_limb = @bitSizeOf(Limb);
2614 while (i != 0) {
2615 i -= 1;
2616 const this_limb_lz = @clz(a.limbs[i]);
2617 total_limb_lz += this_limb_lz;
2618 if (this_limb_lz != bits_per_limb) break;
2619 }
2620 const total_limb_bits = a.limbs.len * bits_per_limb;
2621 return total_limb_lz + bits - total_limb_bits;
2622 }
2623
2624 /// Returns the number of trailing zeros in twos-complement form.
2625 pub fn ctz(a: Const, bits: Limb) Limb {
2626 // Limbs are stored in little-endian order. Converting a negative number to twos-complement
2627 // flips all bits above the lowest set bit, which does not affect the trailing zero count.
2628 if (a.eqlZero()) return bits;
2629 var result: Limb = 0;
2630 for (a.limbs) |limb| {
2631 const limb_tz = @ctz(limb);
2632 result += limb_tz;
2633 if (limb_tz != @bitSizeOf(Limb)) break;
2634 }
2635 return @min(result, bits);
2636 }
2637};
2638
2639/// An arbitrary-precision big integer along with an allocator which manages the memory.
2640///
2641/// Memory is allocated as needed to ensure operations never overflow. The range
2642/// is bounded only by available memory.
2643pub const Managed = struct {
2644 pub const sign_bit: usize = 1 << (@typeInfo(usize).int.bits - 1);
2645
2646 /// Default number of limbs to allocate on creation of a `Managed`.
2647 pub const default_capacity = 4;
2648
2649 /// Allocator used by the Managed when requesting memory.
2650 allocator: Allocator,
2651
2652 /// Raw digits. These are:
2653 ///
2654 /// * Little-endian ordered
2655 /// * limbs.len >= 1
2656 /// * Zero is represent as Managed.len() == 1 with limbs[0] == 0.
2657 ///
2658 /// Accessing limbs directly should be avoided.
2659 limbs: []Limb,
2660
2661 /// High bit is the sign bit. If set, Managed is negative, else Managed is positive.
2662 /// The remaining bits represent the number of limbs used by Managed.
2663 metadata: usize,
2664
2665 /// Creates a new `Managed`. `default_capacity` limbs will be allocated immediately.
2666 /// The integer value after initializing is `0`.
2667 pub fn init(allocator: Allocator) !Managed {
2668 return initCapacity(allocator, default_capacity);
2669 }
2670
2671 pub fn toMutable(self: Managed) Mutable {
2672 return .{
2673 .limbs = self.limbs,
2674 .positive = self.isPositive(),
2675 .len = self.len(),
2676 };
2677 }
2678
2679 pub fn toConst(self: Managed) Const {
2680 return .{
2681 .limbs = self.limbs[0..self.len()],
2682 .positive = self.isPositive(),
2683 };
2684 }
2685
2686 /// Creates a new `Managed` with value `value`.
2687 ///
2688 /// This is identical to an `init`, followed by a `set`.
2689 pub fn initSet(allocator: Allocator, value: anytype) !Managed {
2690 var s = try Managed.init(allocator);
2691 errdefer s.deinit();
2692 try s.set(value);
2693 return s;
2694 }
2695
2696 /// Creates a new Managed with a specific capacity. If capacity < default_capacity then the
2697 /// default capacity will be used instead.
2698 /// The integer value after initializing is `0`.
2699 pub fn initCapacity(allocator: Allocator, capacity: usize) !Managed {
2700 return Managed{
2701 .allocator = allocator,
2702 .metadata = 1,
2703 .limbs = block: {
2704 const limbs = try allocator.alloc(Limb, @max(default_capacity, capacity));
2705 limbs[0] = 0;
2706 break :block limbs;
2707 },
2708 };
2709 }
2710
2711 /// Returns the number of limbs currently in use.
2712 pub fn len(self: Managed) usize {
2713 return self.metadata & ~sign_bit;
2714 }
2715
2716 /// Returns whether an Managed is positive.
2717 pub fn isPositive(self: Managed) bool {
2718 return self.metadata & sign_bit == 0;
2719 }
2720
2721 /// Sets the sign of an Managed.
2722 pub fn setSign(self: *Managed, positive: bool) void {
2723 if (positive) {
2724 self.metadata &= ~sign_bit;
2725 } else {
2726 self.metadata |= sign_bit;
2727 }
2728 }
2729
2730 /// Sets the length of an Managed.
2731 ///
2732 /// If setLen is used, then the Managed must be normalized to suit.
2733 pub fn setLen(self: *Managed, new_len: usize) void {
2734 self.metadata &= sign_bit;
2735 self.metadata |= new_len;
2736 }
2737
2738 pub fn setMetadata(self: *Managed, positive: bool, length: usize) void {
2739 self.metadata = if (positive) length & ~sign_bit else length | sign_bit;
2740 }
2741
2742 /// Ensures an Managed has enough space allocated for capacity limbs. If the Managed does not have
2743 /// sufficient capacity, the exact amount will be allocated. This occurs even if the requested
2744 /// capacity is only greater than the current capacity by one limb.
2745 pub fn ensureCapacity(self: *Managed, capacity: usize) !void {
2746 if (capacity <= self.limbs.len) {
2747 return;
2748 }
2749 self.limbs = try self.allocator.realloc(self.limbs, capacity);
2750 }
2751
2752 /// Frees all associated memory.
2753 pub fn deinit(self: *Managed) void {
2754 self.allocator.free(self.limbs);
2755 self.* = undefined;
2756 }
2757
2758 /// Returns a `Managed` with the same value. The returned `Managed` is a deep copy and
2759 /// can be modified separately from the original, and its resources are managed
2760 /// separately from the original.
2761 pub fn clone(other: Managed) !Managed {
2762 return other.cloneWithDifferentAllocator(other.allocator);
2763 }
2764
2765 pub fn cloneWithDifferentAllocator(other: Managed, allocator: Allocator) !Managed {
2766 return Managed{
2767 .allocator = allocator,
2768 .metadata = other.metadata,
2769 .limbs = block: {
2770 const limbs = try allocator.alloc(Limb, other.len());
2771 @memcpy(limbs, other.limbs[0..other.len()]);
2772 break :block limbs;
2773 },
2774 };
2775 }
2776
2777 /// Copies the value of the integer to an existing `Managed` so that they both have the same value.
2778 /// Extra memory will be allocated if the receiver does not have enough capacity.
2779 pub fn copy(self: *Managed, other: Const) !void {
2780 if (self.limbs.ptr == other.limbs.ptr) return;
2781
2782 try self.ensureCapacity(other.limbs.len);
2783 @memcpy(self.limbs[0..other.limbs.len], other.limbs[0..other.limbs.len]);
2784 self.setMetadata(other.positive, other.limbs.len);
2785 }
2786
2787 /// Efficiently swap a `Managed` with another. This swaps the limb pointers and a full copy is not
2788 /// performed. The address of the limbs field will not be the same after this function.
2789 pub fn swap(self: *Managed, other: *Managed) void {
2790 mem.swap(Managed, self, other);
2791 }
2792
2793 /// Debugging tool: prints the state to stderr.
2794 pub fn dump(self: Managed) void {
2795 for (self.limbs[0..self.len()]) |limb| {
2796 std.debug.print("{x} ", .{limb});
2797 }
2798 std.debug.print("len={} capacity={} positive={}\n", .{ self.len(), self.limbs.len, self.isPositive() });
2799 }
2800
2801 /// Negate the sign.
2802 pub fn negate(self: *Managed) void {
2803 self.metadata ^= sign_bit;
2804 }
2805
2806 /// Make positive.
2807 pub fn abs(self: *Managed) void {
2808 self.metadata &= ~sign_bit;
2809 }
2810
2811 pub fn isOdd(self: Managed) bool {
2812 return self.limbs[0] & 1 != 0;
2813 }
2814
2815 pub fn isEven(self: Managed) bool {
2816 return !self.isOdd();
2817 }
2818
2819 /// Returns the number of bits required to represent the absolute value of an integer.
2820 pub fn bitCountAbs(self: Managed) usize {
2821 return self.toConst().bitCountAbs();
2822 }
2823
2824 /// Returns the number of bits required to represent the integer in twos-complement form.
2825 ///
2826 /// If the integer is negative the value returned is the number of bits needed by a signed
2827 /// integer to represent the value. If positive the value is the number of bits for an
2828 /// unsigned integer. Any unsigned integer will fit in the signed integer with bitcount
2829 /// one greater than the returned value.
2830 ///
2831 /// e.g. -127 returns 8 as it will fit in an i8. 127 returns 7 since it fits in a u7.
2832 pub fn bitCountTwosComp(self: Managed) usize {
2833 return self.toConst().bitCountTwosComp();
2834 }
2835
2836 pub fn fitsInTwosComp(self: Managed, signedness: Signedness, bit_count: usize) bool {
2837 return self.toConst().fitsInTwosComp(signedness, bit_count);
2838 }
2839
2840 /// Returns whether self can fit into an integer of the requested type.
2841 pub fn fits(self: Managed, comptime T: type) bool {
2842 return self.toConst().fits(T);
2843 }
2844
2845 /// Returns the approximate size of the integer in the given base. Negative values accommodate for
2846 /// the minus sign. This is used for determining the number of characters needed to print the
2847 /// value. It is inexact and may exceed the given value by ~1-2 bytes.
2848 pub fn sizeInBaseUpperBound(self: Managed, base: usize) usize {
2849 return self.toConst().sizeInBaseUpperBound(base);
2850 }
2851
2852 /// Sets an Managed to value. Value must be an primitive integer type.
2853 pub fn set(self: *Managed, value: anytype) Allocator.Error!void {
2854 try self.ensureCapacity(calcLimbLen(value));
2855 var m = self.toMutable();
2856 m.set(value);
2857 self.setMetadata(m.positive, m.len);
2858 }
2859
2860 pub const ConvertError = Const.ConvertError;
2861
2862 /// Convert `self` to `Int`.
2863 ///
2864 /// Returns an error if self cannot be narrowed into the requested type without truncation.
2865 pub fn toInt(self: Managed, comptime Int: type) ConvertError!Int {
2866 return self.toConst().toInt(Int);
2867 }
2868
2869 /// Convert `self` to `Float`.
2870 pub fn toFloat(self: Managed, comptime Float: type, round: Round) struct { Float, Exactness } {
2871 return self.toConst().toFloat(Float, round);
2872 }
2873
2874 /// Set self from the string representation `value`.
2875 ///
2876 /// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are
2877 /// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are
2878 /// ignored and can be used as digit separators.
2879 ///
2880 /// Returns an error if memory could not be allocated or `value` has invalid digits for the
2881 /// requested base.
2882 ///
2883 /// self's allocator is used for temporary storage to boost multiplication performance.
2884 pub fn setString(self: *Managed, base: u8, value: []const u8) !void {
2885 if (base < 2 or base > 36) return error.InvalidBase;
2886 try self.ensureCapacity(calcSetStringLimbCount(base, value.len));
2887 const limbs_buffer = try self.allocator.alloc(Limb, calcSetStringLimbsBufferLen(base, value.len));
2888 defer self.allocator.free(limbs_buffer);
2889 var m = self.toMutable();
2890 try m.setString(base, value, limbs_buffer, self.allocator);
2891 self.setMetadata(m.positive, m.len);
2892 }
2893
2894 /// Set self to either bound of a 2s-complement integer.
2895 /// Note: The result is still sign-magnitude, not twos complement! In order to convert the
2896 /// result to twos complement, it is sufficient to take the absolute value.
2897 pub fn setTwosCompIntLimit(
2898 r: *Managed,
2899 limit: TwosCompIntLimit,
2900 signedness: Signedness,
2901 bit_count: usize,
2902 ) !void {
2903 try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
2904 var m = r.toMutable();
2905 m.setTwosCompIntLimit(limit, signedness, bit_count);
2906 r.setMetadata(m.positive, m.len);
2907 }
2908
2909 /// Converts self to a string in the requested base. Memory is allocated from the provided
2910 /// allocator and not the one present in self.
2911 pub fn toString(self: Managed, allocator: Allocator, base: u8, case: std.fmt.Case) ![]u8 {
2912 if (base < 2 or base > 36) return error.InvalidBase;
2913 return self.toConst().toStringAlloc(allocator, base, case);
2914 }
2915
2916 /// To allow `std.fmt.format` to work with `Managed`.
2917 pub fn format(self: Managed, w: *std.Io.Writer) std.Io.Writer.Error!void {
2918 return formatNumber(self, w, .{});
2919 }
2920
2921 /// If the absolute value of integer is greater than or equal to `pow(2, 64 * @sizeOf(usize) * 8)`,
2922 /// this function will fail to print the string, printing "(BigInt)" instead of a number.
2923 /// This is because the rendering algorithm requires reversing a string, which requires O(N) memory.
2924 /// See `toString` and `toStringAlloc` for a way to print big integers without failure.
2925 pub fn formatNumber(self: Managed, w: *std.Io.Writer, n: std.fmt.Number) std.Io.Writer.Error!void {
2926 return self.toConst().formatNumber(w, n);
2927 }
2928
2929 /// Returns math.Order.lt, math.Order.eq, math.Order.gt if |a| < |b|, |a| ==
2930 /// |b| or |a| > |b| respectively.
2931 pub fn orderAbs(a: Managed, b: Managed) math.Order {
2932 return a.toConst().orderAbs(b.toConst());
2933 }
2934
2935 /// Returns math.Order.lt, math.Order.eq, math.Order.gt if a < b, a == b or a > b
2936 /// respectively.
2937 pub fn order(a: Managed, b: Managed) math.Order {
2938 return a.toConst().order(b.toConst());
2939 }
2940
2941 /// Returns true if a == 0.
2942 pub fn eqlZero(a: Managed) bool {
2943 return a.toConst().eqlZero();
2944 }
2945
2946 /// Returns true if |a| == |b|.
2947 pub fn eqlAbs(a: Managed, b: Managed) bool {
2948 return a.toConst().eqlAbs(b.toConst());
2949 }
2950
2951 /// Returns true if a == b.
2952 pub fn eql(a: Managed, b: Managed) bool {
2953 return a.toConst().eql(b.toConst());
2954 }
2955
2956 /// Normalize a possible sequence of leading zeros.
2957 ///
2958 /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4]
2959 /// [1, 2, 0, 0, 0] -> [1, 2]
2960 /// [0, 0, 0, 0, 0] -> [0]
2961 pub fn normalize(r: *Managed, length: usize) void {
2962 assert(length > 0);
2963 assert(length <= r.limbs.len);
2964
2965 var j = length;
2966 while (j > 0) : (j -= 1) {
2967 if (r.limbs[j - 1] != 0) {
2968 break;
2969 }
2970 }
2971
2972 // Handle zero
2973 r.setLen(if (j != 0) j else 1);
2974 }
2975
2976 /// r = a + scalar
2977 ///
2978 /// r and a may be aliases.
2979 ///
2980 /// Returns an error if memory could not be allocated.
2981 pub fn addScalar(r: *Managed, a: *const Managed, scalar: anytype) Allocator.Error!void {
2982 try r.ensureAddScalarCapacity(a.toConst(), scalar);
2983 var m = r.toMutable();
2984 m.addScalar(a.toConst(), scalar);
2985 r.setMetadata(m.positive, m.len);
2986 }
2987
2988 /// r = a + b
2989 ///
2990 /// r, a and b may be aliases.
2991 ///
2992 /// Returns an error if memory could not be allocated.
2993 pub fn add(r: *Managed, a: *const Managed, b: *const Managed) Allocator.Error!void {
2994 try r.ensureAddCapacity(a.toConst(), b.toConst());
2995 var m = r.toMutable();
2996 m.add(a.toConst(), b.toConst());
2997 r.setMetadata(m.positive, m.len);
2998 }
2999
3000 /// r = a + b with 2s-complement wrapping semantics. Returns whether any overflow occurred.
3001 ///
3002 /// r, a and b may be aliases.
3003 ///
3004 /// Returns an error if memory could not be allocated.
3005 pub fn addWrap(
3006 r: *Managed,
3007 a: *const Managed,
3008 b: *const Managed,
3009 signedness: Signedness,
3010 bit_count: usize,
3011 ) Allocator.Error!bool {
3012 try r.ensureTwosCompCapacity(bit_count);
3013 var m = r.toMutable();
3014 const wrapped = m.addWrap(a.toConst(), b.toConst(), signedness, bit_count);
3015 r.setMetadata(m.positive, m.len);
3016 return wrapped;
3017 }
3018
3019 /// r = a + b with 2s-complement saturating semantics.
3020 ///
3021 /// r, a and b may be aliases.
3022 ///
3023 /// Returns an error if memory could not be allocated.
3024 pub fn addSat(r: *Managed, a: *const Managed, b: *const Managed, signedness: Signedness, bit_count: usize) Allocator.Error!void {
3025 try r.ensureTwosCompCapacity(bit_count);
3026 var m = r.toMutable();
3027 m.addSat(a.toConst(), b.toConst(), signedness, bit_count);
3028 r.setMetadata(m.positive, m.len);
3029 }
3030
3031 /// r = a - b
3032 ///
3033 /// r, a and b may be aliases.
3034 ///
3035 /// Returns an error if memory could not be allocated.
3036 pub fn sub(r: *Managed, a: *const Managed, b: *const Managed) !void {
3037 try r.ensureCapacity(@max(a.len(), b.len()) + 1);
3038 var m = r.toMutable();
3039 m.sub(a.toConst(), b.toConst());
3040 r.setMetadata(m.positive, m.len);
3041 }
3042
3043 /// r = a - b with 2s-complement wrapping semantics. Returns whether any overflow occurred.
3044 ///
3045 /// r, a and b may be aliases.
3046 ///
3047 /// Returns an error if memory could not be allocated.
3048 pub fn subWrap(
3049 r: *Managed,
3050 a: *const Managed,
3051 b: *const Managed,
3052 signedness: Signedness,
3053 bit_count: usize,
3054 ) Allocator.Error!bool {
3055 try r.ensureTwosCompCapacity(bit_count);
3056 var m = r.toMutable();
3057 const wrapped = m.subWrap(a.toConst(), b.toConst(), signedness, bit_count);
3058 r.setMetadata(m.positive, m.len);
3059 return wrapped;
3060 }
3061
3062 /// r = a - b with 2s-complement saturating semantics.
3063 ///
3064 /// r, a and b may be aliases.
3065 ///
3066 /// Returns an error if memory could not be allocated.
3067 pub fn subSat(
3068 r: *Managed,
3069 a: *const Managed,
3070 b: *const Managed,
3071 signedness: Signedness,
3072 bit_count: usize,
3073 ) Allocator.Error!void {
3074 try r.ensureTwosCompCapacity(bit_count);
3075 var m = r.toMutable();
3076 m.subSat(a.toConst(), b.toConst(), signedness, bit_count);
3077 r.setMetadata(m.positive, m.len);
3078 }
3079
3080 /// rma = a * b
3081 ///
3082 /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
3083 ///
3084 /// Returns an error if memory could not be allocated.
3085 ///
3086 /// rma's allocator is used for temporary storage to speed up the multiplication.
3087 pub fn mul(rma: *Managed, a: *const Managed, b: *const Managed) !void {
3088 var alias_count: usize = 0;
3089 if (rma.limbs.ptr == a.limbs.ptr)
3090 alias_count += 1;
3091 if (rma.limbs.ptr == b.limbs.ptr)
3092 alias_count += 1;
3093 try rma.ensureMulCapacity(a.toConst(), b.toConst());
3094 var m = rma.toMutable();
3095 if (alias_count == 0) {
3096 m.mulNoAlias(a.toConst(), b.toConst(), rma.allocator);
3097 } else {
3098 const limb_count = calcMulLimbsBufferLen(a.len(), b.len(), alias_count);
3099 const limbs_buffer = try rma.allocator.alloc(Limb, limb_count);
3100 defer rma.allocator.free(limbs_buffer);
3101 m.mul(a.toConst(), b.toConst(), limbs_buffer, rma.allocator);
3102 }
3103 rma.setMetadata(m.positive, m.len);
3104 }
3105
3106 /// rma = a * b with 2s-complement wrapping semantics.
3107 ///
3108 /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
3109 ///
3110 /// Returns an error if memory could not be allocated.
3111 ///
3112 /// rma's allocator is used for temporary storage to speed up the multiplication.
3113 pub fn mulWrap(
3114 rma: *Managed,
3115 a: *const Managed,
3116 b: *const Managed,
3117 signedness: Signedness,
3118 bit_count: usize,
3119 ) !void {
3120 var alias_count: usize = 0;
3121 if (rma.limbs.ptr == a.limbs.ptr)
3122 alias_count += 1;
3123 if (rma.limbs.ptr == b.limbs.ptr)
3124 alias_count += 1;
3125
3126 try rma.ensureTwosCompCapacity(bit_count);
3127 var m = rma.toMutable();
3128 if (alias_count == 0) {
3129 m.mulWrapNoAlias(a.toConst(), b.toConst(), signedness, bit_count, rma.allocator);
3130 } else {
3131 const limb_count = calcMulWrapLimbsBufferLen(bit_count, a.len(), b.len(), alias_count);
3132 const limbs_buffer = try rma.allocator.alloc(Limb, limb_count);
3133 defer rma.allocator.free(limbs_buffer);
3134 m.mulWrap(a.toConst(), b.toConst(), signedness, bit_count, limbs_buffer, rma.allocator);
3135 }
3136 rma.setMetadata(m.positive, m.len);
3137 }
3138
3139 pub fn ensureTwosCompCapacity(r: *Managed, bit_count: usize) !void {
3140 try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
3141 }
3142
3143 pub fn ensureAddScalarCapacity(r: *Managed, a: Const, scalar: anytype) !void {
3144 try r.ensureCapacity(@max(a.limbs.len, calcLimbLen(scalar)) + 1);
3145 }
3146
3147 pub fn ensureAddCapacity(r: *Managed, a: Const, b: Const) !void {
3148 try r.ensureCapacity(@max(a.limbs.len, b.limbs.len) + 1);
3149 }
3150
3151 pub fn ensureMulCapacity(rma: *Managed, a: Const, b: Const) !void {
3152 try rma.ensureCapacity(a.limbs.len + b.limbs.len + 1);
3153 }
3154
3155 /// q = a / b (rem r)
3156 ///
3157 /// a / b are floored (rounded towards 0).
3158 ///
3159 /// Returns an error if memory could not be allocated.
3160 pub fn divFloor(q: *Managed, r: *Managed, a: *const Managed, b: *const Managed) !void {
3161 try q.ensureCapacity(a.len());
3162 try r.ensureCapacity(b.len());
3163 var mq = q.toMutable();
3164 var mr = r.toMutable();
3165 const limbs_buffer = try q.allocator.alloc(Limb, calcDivLimbsBufferLen(a.len(), b.len()));
3166 defer q.allocator.free(limbs_buffer);
3167 mq.divFloor(&mr, a.toConst(), b.toConst(), limbs_buffer);
3168 q.setMetadata(mq.positive, mq.len);
3169 r.setMetadata(mr.positive, mr.len);
3170 }
3171
3172 /// q = a / b (rem r)
3173 ///
3174 /// a / b are truncated (rounded towards -inf).
3175 ///
3176 /// Returns an error if memory could not be allocated.
3177 pub fn divTrunc(q: *Managed, r: *Managed, a: *const Managed, b: *const Managed) !void {
3178 try q.ensureCapacity(a.len());
3179 try r.ensureCapacity(b.len());
3180 var mq = q.toMutable();
3181 var mr = r.toMutable();
3182 const limbs_buffer = try q.allocator.alloc(Limb, calcDivLimbsBufferLen(a.len(), b.len()));
3183 defer q.allocator.free(limbs_buffer);
3184 mq.divTrunc(&mr, a.toConst(), b.toConst(), limbs_buffer);
3185 q.setMetadata(mq.positive, mq.len);
3186 r.setMetadata(mr.positive, mr.len);
3187 }
3188
3189 /// r = a << shift, in other words, r = a * 2^shift
3190 /// r and a may alias.
3191 pub fn shiftLeft(r: *Managed, a: *const Managed, shift: usize) !void {
3192 try r.ensureCapacity(a.len() + (shift / limb_bits) + 1);
3193 var m = r.toMutable();
3194 m.shiftLeft(a.toConst(), shift);
3195 r.setMetadata(m.positive, m.len);
3196 }
3197
3198 /// r = a <<| shift with 2s-complement saturating semantics.
3199 /// r and a may alias.
3200 pub fn shiftLeftSat(r: *Managed, a: *const Managed, shift: usize, signedness: Signedness, bit_count: usize) !void {
3201 try r.ensureTwosCompCapacity(bit_count);
3202 var m = r.toMutable();
3203 m.shiftLeftSat(a.toConst(), shift, signedness, bit_count);
3204 r.setMetadata(m.positive, m.len);
3205 }
3206
3207 /// r = a >> shift
3208 /// r and a may alias.
3209 pub fn shiftRight(r: *Managed, a: *const Managed, shift: usize) !void {
3210 if (a.len() <= shift / limb_bits) {
3211 // Shifting negative numbers converges to -1 instead of 0
3212 if (a.isPositive()) {
3213 r.metadata = 1;
3214 r.limbs[0] = 0;
3215 } else {
3216 r.metadata = 1;
3217 r.setSign(false);
3218 r.limbs[0] = 1;
3219 }
3220 return;
3221 }
3222
3223 try r.ensureCapacity(a.len() - (shift / limb_bits));
3224 var m = r.toMutable();
3225 m.shiftRight(a.toConst(), shift);
3226 r.setMetadata(m.positive, m.len);
3227 }
3228
3229 /// r = ~a under 2s-complement wrapping semantics.
3230 /// r and a may alias.
3231 pub fn bitNotWrap(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void {
3232 try r.ensureTwosCompCapacity(bit_count);
3233 var m = r.toMutable();
3234 m.bitNotWrap(a.toConst(), signedness, bit_count);
3235 r.setMetadata(m.positive, m.len);
3236 }
3237
3238 /// r = a | b
3239 ///
3240 /// a and b are zero-extended to the longer of a or b.
3241 pub fn bitOr(r: *Managed, a: *const Managed, b: *const Managed) !void {
3242 try r.ensureCapacity(@max(a.len(), b.len()));
3243 var m = r.toMutable();
3244 m.bitOr(a.toConst(), b.toConst());
3245 r.setMetadata(m.positive, m.len);
3246 }
3247
3248 /// r = a & b
3249 pub fn bitAnd(r: *Managed, a: *const Managed, b: *const Managed) !void {
3250 const cap = if (a.len() >= b.len())
3251 if (b.isPositive()) b.len() else if (a.isPositive()) a.len() else a.len() + 1
3252 else if (a.isPositive()) a.len() else if (b.isPositive()) b.len() else b.len() + 1;
3253
3254 try r.ensureCapacity(cap);
3255 var m = r.toMutable();
3256 m.bitAnd(a.toConst(), b.toConst());
3257 r.setMetadata(m.positive, m.len);
3258 }
3259
3260 /// r = a ^ b
3261 pub fn bitXor(r: *Managed, a: *const Managed, b: *const Managed) !void {
3262 const cap = @max(a.len(), b.len()) + @intFromBool(a.isPositive() != b.isPositive());
3263 try r.ensureCapacity(cap);
3264
3265 var m = r.toMutable();
3266 m.bitXor(a.toConst(), b.toConst());
3267 r.setMetadata(m.positive, m.len);
3268 }
3269
3270 /// rma may alias x or y.
3271 /// x and y may alias each other.
3272 ///
3273 /// rma's allocator is used for temporary storage to boost multiplication performance.
3274 pub fn gcd(rma: *Managed, x: *const Managed, y: *const Managed) !void {
3275 try rma.ensureCapacity(@min(x.len(), y.len()));
3276 var m = rma.toMutable();
3277 var limbs_buffer = std.array_list.Managed(Limb).init(rma.allocator);
3278 defer limbs_buffer.deinit();
3279 try m.gcd(x.toConst(), y.toConst(), &limbs_buffer);
3280 rma.setMetadata(m.positive, m.len);
3281 }
3282
3283 /// r = a * a
3284 pub fn sqr(rma: *Managed, a: *const Managed) !void {
3285 const needed_limbs = 2 * a.len() + 1;
3286
3287 if (rma.limbs.ptr == a.limbs.ptr) {
3288 var m = try Managed.initCapacity(rma.allocator, needed_limbs);
3289 errdefer m.deinit();
3290 var m_mut = m.toMutable();
3291 m_mut.sqrNoAlias(a.toConst(), rma.allocator);
3292 m.setMetadata(m_mut.positive, m_mut.len);
3293
3294 rma.deinit();
3295 rma.swap(&m);
3296 } else {
3297 try rma.ensureCapacity(needed_limbs);
3298 var rma_mut = rma.toMutable();
3299 rma_mut.sqrNoAlias(a.toConst(), rma.allocator);
3300 rma.setMetadata(rma_mut.positive, rma_mut.len);
3301 }
3302 }
3303
3304 pub fn pow(rma: *Managed, a: *const Managed, b: u32) !void {
3305 const needed_limbs = calcPowLimbsBufferLen(a.bitCountAbs(), b);
3306
3307 const limbs_buffer = try rma.allocator.alloc(Limb, needed_limbs);
3308 defer rma.allocator.free(limbs_buffer);
3309
3310 if (rma.limbs.ptr == a.limbs.ptr) {
3311 var m = try Managed.initCapacity(rma.allocator, needed_limbs);
3312 errdefer m.deinit();
3313 var m_mut = m.toMutable();
3314 m_mut.pow(a.toConst(), b, limbs_buffer);
3315 m.setMetadata(m_mut.positive, m_mut.len);
3316
3317 rma.deinit();
3318 rma.swap(&m);
3319 } else {
3320 try rma.ensureCapacity(needed_limbs);
3321 var rma_mut = rma.toMutable();
3322 rma_mut.pow(a.toConst(), b, limbs_buffer);
3323 rma.setMetadata(rma_mut.positive, rma_mut.len);
3324 }
3325 }
3326
3327 /// r = ⌊√a⌋
3328 pub fn sqrt(rma: *Managed, a: *const Managed) !void {
3329 const bit_count = a.bitCountAbs();
3330
3331 if (bit_count == 0) {
3332 try rma.set(0);
3333 rma.setMetadata(a.isPositive(), rma.len());
3334 return;
3335 }
3336
3337 if (!a.isPositive()) {
3338 return error.SqrtOfNegativeNumber;
3339 }
3340
3341 const needed_limbs = calcSqrtLimbsBufferLen(bit_count);
3342 const limbs_buffer = try rma.allocator.alloc(Limb, needed_limbs);
3343 defer rma.allocator.free(limbs_buffer);
3344
3345 try rma.ensureCapacity((a.len() - 1) / 2 + 1);
3346 var m = rma.toMutable();
3347 m.sqrt(a.toConst(), limbs_buffer);
3348 rma.setMetadata(m.positive, m.len);
3349 }
3350
3351 /// r = truncate(Int(signedness, bit_count), a)
3352 pub fn truncate(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void {
3353 try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
3354 var m = r.toMutable();
3355 m.truncate(a.toConst(), signedness, bit_count);
3356 r.setMetadata(m.positive, m.len);
3357 }
3358
3359 /// r = saturate(Int(signedness, bit_count), a)
3360 pub fn saturate(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void {
3361 try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
3362 var m = r.toMutable();
3363 m.saturate(a.toConst(), signedness, bit_count);
3364 r.setMetadata(m.positive, m.len);
3365 }
3366
3367 /// r = @popCount(a) with 2s-complement semantics.
3368 /// r and a may be aliases.
3369 pub fn popCount(r: *Managed, a: *const Managed, bit_count: usize) !void {
3370 try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
3371 var m = r.toMutable();
3372 m.popCount(a.toConst(), bit_count);
3373 r.setMetadata(m.positive, m.len);
3374 }
3375};
3376
3377/// Different operators which can be used in accumulation style functions
3378/// (llmulacc, llmulaccKaratsuba, llmulaccLong, llmulLimb). In all these functions,
3379/// a computed value is accumulated with an existing result.
3380const AccOp = enum {
3381 /// The computed value is added to the result.
3382 add,
3383
3384 /// The computed value is subtracted from the result.
3385 sub,
3386};
3387
3388/// Knuth 4.3.1, Algorithm M.
3389///
3390/// r = r (op) a * b
3391/// r MUST NOT alias any of a or b.
3392///
3393/// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs.
3394fn llmulacc(comptime op: AccOp, opt_allocator: ?Allocator, r: []Limb, a: []const Limb, b: []const Limb) void {
3395 assert(r.len >= a.len);
3396 assert(r.len >= b.len);
3397 assert(!slicesOverlap(r, a));
3398 assert(!slicesOverlap(r, b));
3399
3400 // Order greatest first.
3401 var x = a;
3402 var y = b;
3403 if (a.len < b.len) {
3404 x = b;
3405 y = a;
3406 }
3407
3408 k_mul: {
3409 if (y.len > 48) {
3410 if (opt_allocator) |allocator| {
3411 llmulaccKaratsuba(op, allocator, r, x, y) catch |err| switch (err) {
3412 error.OutOfMemory => break :k_mul, // handled below
3413 };
3414 return;
3415 }
3416 }
3417 }
3418
3419 llmulaccLong(op, r, x, y);
3420}
3421
3422/// Knuth 4.3.1, Algorithm M.
3423///
3424/// r = r (op) a * b
3425/// r MUST NOT alias any of a or b.
3426///
3427/// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs.
3428fn llmulaccKaratsuba(
3429 comptime op: AccOp,
3430 allocator: Allocator,
3431 r: []Limb,
3432 a: []const Limb,
3433 b: []const Limb,
3434) error{OutOfMemory}!void {
3435 assert(r.len >= a.len);
3436 assert(a.len >= b.len);
3437 assert(!slicesOverlap(r, a));
3438 assert(!slicesOverlap(r, b));
3439
3440 // Classical karatsuba algorithm:
3441 // a = a1 * B + a0
3442 // b = b1 * B + b0
3443 // Where a0, b0 < B
3444 //
3445 // We then have:
3446 // ab = a * b
3447 // = (a1 * B + a0) * (b1 * B + b0)
3448 // = a1 * b1 * B * B + a1 * B * b0 + a0 * b1 * B + a0 * b0
3449 // = a1 * b1 * B * B + (a1 * b0 + a0 * b1) * B + a0 * b0
3450 //
3451 // Note that:
3452 // a1 * b0 + a0 * b1
3453 // = (a1 + a0)(b1 + b0) - a1 * b1 - a0 * b0
3454 // = (a0 - a1)(b1 - b0) + a1 * b1 + a0 * b0
3455 //
3456 // This yields:
3457 // ab = p2 * B^2 + (p0 + p1 + p2) * B + p0
3458 //
3459 // Where:
3460 // p0 = a0 * b0
3461 // p1 = (a0 - a1)(b1 - b0)
3462 // p2 = a1 * b1
3463 //
3464 // Note, (a0 - a1) and (b1 - b0) produce values -B < x < B, and so we need to mind the sign here.
3465 // We also have:
3466 // 0 <= p0 <= 2B
3467 // -2B <= p1 <= 2B
3468 //
3469 // Note, when B is a multiple of the limb size, multiplies by B amount to shifts or
3470 // slices of a limbs array.
3471 //
3472 // This function computes the result of the multiplication modulo r.len. This means:
3473 // - p2 and p1 only need to be computed modulo r.len - B.
3474 // - In the case of p2, p2 * B^2 needs to be added modulo r.len - 2 * B.
3475
3476 const split = b.len / 2; // B
3477
3478 const limbs_after_split = r.len - split; // Limbs to compute for p1 and p2.
3479 const limbs_after_split2 = r.len - split * 2; // Limbs to add for p2 * B^2.
3480
3481 // For a0 and b0 we need the full range.
3482 const a0 = a[0..llnormalize(a[0..split])];
3483 const b0 = b[0..llnormalize(b[0..split])];
3484
3485 // For a1 and b1 we only need `limbs_after_split` limbs.
3486 const a1 = blk: {
3487 var a1 = a[split..];
3488 a1.len = @min(llnormalize(a1), limbs_after_split);
3489 break :blk a1;
3490 };
3491
3492 const b1 = blk: {
3493 var b1 = b[split..];
3494 b1.len = @min(llnormalize(b1), limbs_after_split);
3495 break :blk b1;
3496 };
3497
3498 // Note that the above slices relative to `split` work because we have a.len > b.len.
3499
3500 // We need some temporary memory to store intermediate results.
3501 // Note, we can reduce the amount of temporaries we need by reordering the computation here:
3502 // ab = p2 * B^2 + (p0 + p1 + p2) * B + p0
3503 // = p2 * B^2 + (p0 * B + p1 * B + p2 * B) + p0
3504 // = (p2 * B^2 + p2 * B) + (p0 * B + p0) + p1 * B
3505
3506 // Allocate at least enough memory to be able to multiply the upper two segments of a and b, assuming
3507 // no overflow.
3508 const tmp = try allocator.alloc(Limb, a.len - split + b.len - split);
3509 defer allocator.free(tmp);
3510
3511 // Compute p2.
3512 // Note, we don't need to compute all of p2, just enough limbs to satisfy r.
3513 const p2_limbs = @min(limbs_after_split, a1.len + b1.len);
3514
3515 @memset(tmp[0..p2_limbs], 0);
3516 llmulacc(.add, allocator, tmp[0..p2_limbs], a1[0..@min(a1.len, p2_limbs)], b1[0..@min(b1.len, p2_limbs)]);
3517 const p2 = tmp[0..llnormalize(tmp[0..p2_limbs])];
3518
3519 // Add p2 * B to the result.
3520 llaccum(op, r[split..], p2);
3521
3522 // Add p2 * B^2 to the result if required.
3523 if (limbs_after_split2 > 0) {
3524 llaccum(op, r[split * 2 ..], p2[0..@min(p2.len, limbs_after_split2)]);
3525 }
3526
3527 // Compute p0.
3528 // Since a0.len, b0.len <= split and r.len >= split * 2, the full width of p0 needs to be computed.
3529 const p0_limbs = a0.len + b0.len;
3530 @memset(tmp[0..p0_limbs], 0);
3531 llmulacc(.add, allocator, tmp[0..p0_limbs], a0, b0);
3532 const p0 = tmp[0..llnormalize(tmp[0..p0_limbs])];
3533
3534 // Add p0 to the result.
3535 llaccum(op, r, p0);
3536
3537 // Add p0 * B to the result. In this case, we may not need all of it.
3538 llaccum(op, r[split..], p0[0..@min(limbs_after_split, p0.len)]);
3539
3540 // Finally, compute and add p1.
3541 // From now on we only need `limbs_after_split` limbs for a0 and b0, since the result of the
3542 // following computation will be added * B.
3543 const a0x = a0[0..@min(a0.len, limbs_after_split)];
3544 const b0x = b0[0..@min(b0.len, limbs_after_split)];
3545
3546 const j0_sign = llcmp(a0x, a1);
3547 const j1_sign = llcmp(b1, b0x);
3548
3549 if (j0_sign * j1_sign == 0) {
3550 // p1 is zero, we don't need to do any computation at all.
3551 return;
3552 }
3553
3554 @memset(tmp, 0);
3555
3556 // p1 is nonzero, so compute the intermediary terms j0 = a0 - a1 and j1 = b1 - b0.
3557 // Note that in this case, we again need some storage for intermediary results
3558 // j0 and j1. Since we have tmp.len >= 2B, we can store both
3559 // intermediaries in the already allocated array.
3560 const j0 = tmp[0 .. a.len - split];
3561 const j1 = tmp[a.len - split ..];
3562
3563 // Ensure that no subtraction overflows.
3564 if (j0_sign == 1) {
3565 // a0 > a1.
3566 _ = llsubcarry(j0, a0x, a1);
3567 } else {
3568 // a0 < a1.
3569 _ = llsubcarry(j0, a1, a0x);
3570 }
3571
3572 if (j1_sign == 1) {
3573 // b1 > b0.
3574 _ = llsubcarry(j1, b1, b0x);
3575 } else {
3576 // b1 > b0.
3577 _ = llsubcarry(j1, b0x, b1);
3578 }
3579
3580 if (j0_sign * j1_sign == 1) {
3581 // If j0 and j1 are both positive, we now have:
3582 // p1 = j0 * j1
3583 // If j0 and j1 are both negative, we now have:
3584 // p1 = -j0 * -j1 = j0 * j1
3585 // In this case we can add p1 to the result using llmulacc.
3586 llmulacc(op, allocator, r[split..], j0[0..llnormalize(j0)], j1[0..llnormalize(j1)]);
3587 } else {
3588 // In this case either j0 or j1 is negative, an we have:
3589 // p1 = -(j0 * j1)
3590 // Now we need to subtract instead of accumulate.
3591 const inverted_op = if (op == .add) .sub else .add;
3592 llmulacc(inverted_op, allocator, r[split..], j0[0..llnormalize(j0)], j1[0..llnormalize(j1)]);
3593 }
3594}
3595
3596/// r = r (op) a.
3597/// The result is computed modulo `r.len`.
3598fn llaccum(comptime op: AccOp, r: []Limb, a: []const Limb) void {
3599 if (op == .sub) {
3600 _ = llsubcarry(r, r, a);
3601 return;
3602 }
3603
3604 assert(r.len != 0 and a.len != 0);
3605 assert(r.len >= a.len);
3606
3607 var i: usize = 0;
3608 var carry: Limb = 0;
3609
3610 while (i < a.len) : (i += 1) {
3611 const ov1 = @addWithOverflow(r[i], a[i]);
3612 r[i] = ov1[0];
3613 const ov2 = @addWithOverflow(r[i], carry);
3614 r[i] = ov2[0];
3615 carry = @as(Limb, ov1[1]) + ov2[1];
3616 }
3617
3618 while ((carry != 0) and i < r.len) : (i += 1) {
3619 const ov = @addWithOverflow(r[i], carry);
3620 r[i] = ov[0];
3621 carry = ov[1];
3622 }
3623}
3624
3625/// Returns -1, 0, 1 if |a| < |b|, |a| == |b| or |a| > |b| respectively for limbs.
3626pub fn llcmp(a: []const Limb, b: []const Limb) i8 {
3627 const a_len = llnormalize(a);
3628 const b_len = llnormalize(b);
3629 if (a_len < b_len) {
3630 return -1;
3631 }
3632 if (a_len > b_len) {
3633 return 1;
3634 }
3635
3636 var i: usize = a_len - 1;
3637 while (i != 0) : (i -= 1) {
3638 if (a[i] != b[i]) {
3639 break;
3640 }
3641 }
3642
3643 if (a[i] < b[i]) {
3644 return -1;
3645 } else if (a[i] > b[i]) {
3646 return 1;
3647 } else {
3648 return 0;
3649 }
3650}
3651
3652/// r = r (op) y * xi
3653/// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs.
3654fn llmulaccLong(comptime op: AccOp, r: []Limb, a: []const Limb, b: []const Limb) void {
3655 assert(r.len >= a.len);
3656 assert(a.len >= b.len);
3657
3658 var i: usize = 0;
3659 while (i < b.len) : (i += 1) {
3660 _ = llmulLimb(op, r[i..], a, b[i]);
3661 }
3662}
3663
3664/// r = r (op) y * xi
3665/// The result is computed modulo `r.len`.
3666/// Returns whether the operation overflowed.
3667fn llmulLimb(comptime op: AccOp, acc: []Limb, y: []const Limb, xi: Limb) bool {
3668 if (xi == 0) {
3669 return false;
3670 }
3671
3672 const split = @min(y.len, acc.len);
3673 var a_lo = acc[0..split];
3674 var a_hi = acc[split..];
3675
3676 switch (op) {
3677 .add => {
3678 var carry: Limb = 0;
3679 var j: usize = 0;
3680 while (j < a_lo.len) : (j += 1) {
3681 a_lo[j] = addMulLimbWithCarry(a_lo[j], y[j], xi, &carry);
3682 }
3683
3684 j = 0;
3685 while ((carry != 0) and (j < a_hi.len)) : (j += 1) {
3686 const ov = @addWithOverflow(a_hi[j], carry);
3687 a_hi[j] = ov[0];
3688 carry = ov[1];
3689 }
3690
3691 return carry != 0;
3692 },
3693 .sub => {
3694 var borrow: Limb = 0;
3695 var j: usize = 0;
3696 while (j < a_lo.len) : (j += 1) {
3697 a_lo[j] = subMulLimbWithBorrow(a_lo[j], y[j], xi, &borrow);
3698 }
3699
3700 j = 0;
3701 while ((borrow != 0) and (j < a_hi.len)) : (j += 1) {
3702 const ov = @subWithOverflow(a_hi[j], borrow);
3703 a_hi[j] = ov[0];
3704 borrow = ov[1];
3705 }
3706
3707 return borrow != 0;
3708 },
3709 }
3710}
3711
3712/// returns the min length the limb could be.
3713fn llnormalize(a: []const Limb) usize {
3714 var j = a.len;
3715 while (j > 0) : (j -= 1) {
3716 if (a[j - 1] != 0) {
3717 break;
3718 }
3719 }
3720
3721 // Handle zero
3722 return if (j != 0) j else 1;
3723}
3724
3725/// Knuth 4.3.1, Algorithm S.
3726fn llsubcarry(r: []Limb, a: []const Limb, b: []const Limb) Limb {
3727 assert(a.len != 0 and b.len != 0);
3728 assert(a.len >= b.len);
3729 assert(r.len >= a.len);
3730
3731 var i: usize = 0;
3732 var borrow: Limb = 0;
3733
3734 while (i < b.len) : (i += 1) {
3735 const ov1 = @subWithOverflow(a[i], b[i]);
3736 r[i] = ov1[0];
3737 const ov2 = @subWithOverflow(r[i], borrow);
3738 r[i] = ov2[0];
3739 borrow = @as(Limb, ov1[1]) + ov2[1];
3740 }
3741
3742 while (i < a.len) : (i += 1) {
3743 const ov = @subWithOverflow(a[i], borrow);
3744 r[i] = ov[0];
3745 borrow = ov[1];
3746 }
3747
3748 return borrow;
3749}
3750
3751fn llsub(r: []Limb, a: []const Limb, b: []const Limb) void {
3752 assert(a.len > b.len or (a.len == b.len and a[a.len - 1] >= b[b.len - 1]));
3753 assert(llsubcarry(r, a, b) == 0);
3754}
3755
3756/// Knuth 4.3.1, Algorithm A.
3757fn lladdcarry(r: []Limb, a: []const Limb, b: []const Limb) Limb {
3758 assert(a.len != 0 and b.len != 0);
3759 assert(a.len >= b.len);
3760 assert(r.len >= a.len);
3761
3762 var i: usize = 0;
3763 var carry: Limb = 0;
3764
3765 while (i < b.len) : (i += 1) {
3766 const ov1 = @addWithOverflow(a[i], b[i]);
3767 r[i] = ov1[0];
3768 const ov2 = @addWithOverflow(r[i], carry);
3769 r[i] = ov2[0];
3770 carry = @as(Limb, ov1[1]) + ov2[1];
3771 }
3772
3773 while (i < a.len) : (i += 1) {
3774 const ov = @addWithOverflow(a[i], carry);
3775 r[i] = ov[0];
3776 carry = ov[1];
3777 }
3778
3779 return carry;
3780}
3781
3782fn lladd(r: []Limb, a: []const Limb, b: []const Limb) void {
3783 assert(r.len >= a.len + 1);
3784 r[a.len] = lladdcarry(r, a, b);
3785}
3786
3787/// Knuth 4.3.1, Exercise 16.
3788fn lldiv1(quo: []Limb, rem: *Limb, a: []const Limb, b: Limb) void {
3789 assert(a.len > 1 or a[0] >= b);
3790 assert(quo.len >= a.len);
3791
3792 rem.* = 0;
3793 for (a, 0..) |_, ri| {
3794 const i = a.len - ri - 1;
3795 const pdiv = ((@as(DoubleLimb, rem.*) << limb_bits) | a[i]);
3796
3797 if (pdiv == 0) {
3798 quo[i] = 0;
3799 rem.* = 0;
3800 } else if (pdiv < b) {
3801 quo[i] = 0;
3802 rem.* = @as(Limb, @truncate(pdiv));
3803 } else if (pdiv == b) {
3804 quo[i] = 1;
3805 rem.* = 0;
3806 } else {
3807 quo[i] = @as(Limb, @truncate(@divTrunc(pdiv, b)));
3808 rem.* = @as(Limb, @truncate(pdiv - (quo[i] *% b)));
3809 }
3810 }
3811}
3812
3813fn lldiv0p5(quo: []Limb, rem: *Limb, a: []const Limb, b: HalfLimb) void {
3814 assert(a.len > 1 or a[0] >= b);
3815 assert(quo.len >= a.len);
3816
3817 rem.* = 0;
3818 for (a, 0..) |_, ri| {
3819 const i = a.len - ri - 1;
3820 const ai_high = a[i] >> half_limb_bits;
3821 const ai_low = a[i] & ((1 << half_limb_bits) - 1);
3822
3823 // Split the division into two divisions acting on half a limb each. Carry remainder.
3824 const ai_high_with_carry = (rem.* << half_limb_bits) | ai_high;
3825 const ai_high_quo = ai_high_with_carry / b;
3826 rem.* = ai_high_with_carry % b;
3827
3828 const ai_low_with_carry = (rem.* << half_limb_bits) | ai_low;
3829 const ai_low_quo = ai_low_with_carry / b;
3830 rem.* = ai_low_with_carry % b;
3831
3832 quo[i] = (ai_high_quo << half_limb_bits) | ai_low_quo;
3833 }
3834}
3835
3836/// Performs r = a << shift and returns the amount of limbs affected
3837///
3838/// if a and r overlaps, then r.ptr >= a.ptr is asserted
3839/// r must have the capacity to store a << shift
3840fn llshl(r: []Limb, a: []const Limb, shift: usize) usize {
3841 std.debug.assert(a.len >= 1);
3842 if (slicesOverlap(a, r))
3843 std.debug.assert(@intFromPtr(r.ptr) >= @intFromPtr(a.ptr));
3844
3845 if (shift == 0) {
3846 if (a.ptr != r.ptr) @memmove(r[0..a.len], a);
3847 return a.len;
3848 }
3849 if (shift >= limb_bits) {
3850 const limb_shift = shift / limb_bits;
3851
3852 const affected = llshl(r[limb_shift..], a, shift % limb_bits);
3853 @memset(r[0..limb_shift], 0);
3854
3855 return limb_shift + affected;
3856 }
3857
3858 // shift is guaranteed to be < limb_bits
3859 const bit_shift: Log2Limb = @truncate(shift);
3860 const opposite_bit_shift: Log2Limb = @truncate(limb_bits - bit_shift);
3861
3862 // We only need the extra limb if the shift of the last element overflows.
3863 // This is useful for the implementation of `shiftLeftSat`.
3864 const overflows = a[a.len - 1] >> opposite_bit_shift != 0;
3865 if (overflows) {
3866 std.debug.assert(r.len >= a.len + 1);
3867 } else {
3868 std.debug.assert(r.len >= a.len);
3869 }
3870
3871 var i: usize = a.len;
3872 if (overflows) {
3873 // r is asserted to be large enough above
3874 r[a.len] = a[a.len - 1] >> opposite_bit_shift;
3875 }
3876 while (i > 1) {
3877 i -= 1;
3878 r[i] = (a[i - 1] >> opposite_bit_shift) | (a[i] << bit_shift);
3879 }
3880 r[0] = a[0] << bit_shift;
3881
3882 return a.len + @intFromBool(overflows);
3883}
3884
3885/// Performs r = a >> shift and returns the amount of limbs affected
3886///
3887/// if a and r overlaps, then r.ptr <= a.ptr is asserted
3888/// r must have the capacity to store a >> shift
3889///
3890/// See tests below for examples of behaviour
3891fn llshr(r: []Limb, a: []const Limb, shift: usize) usize {
3892 if (slicesOverlap(a, r))
3893 std.debug.assert(@intFromPtr(r.ptr) <= @intFromPtr(a.ptr));
3894
3895 if (a.len == 0) return 0;
3896
3897 if (shift == 0) {
3898 std.debug.assert(r.len >= a.len);
3899
3900 if (a.ptr != r.ptr) @memmove(r[0..a.len], a);
3901 return a.len;
3902 }
3903 if (shift >= limb_bits) {
3904 if (shift / limb_bits >= a.len) {
3905 r[0] = 0;
3906 return 1;
3907 }
3908 return llshr(r, a[shift / limb_bits ..], shift % limb_bits);
3909 }
3910
3911 // shift is guaranteed to be < limb_bits
3912 const bit_shift: Log2Limb = @truncate(shift);
3913 const opposite_bit_shift: Log2Limb = @truncate(limb_bits - bit_shift);
3914
3915 // special case, where there is a risk to set r to 0
3916 if (a.len == 1) {
3917 r[0] = a[0] >> bit_shift;
3918 return 1;
3919 }
3920 if (a.len == 0) {
3921 r[0] = 0;
3922 return 1;
3923 }
3924
3925 // if the most significant limb becomes 0 after the shift
3926 const shrink = a[a.len - 1] >> bit_shift == 0;
3927 std.debug.assert(r.len >= a.len - @intFromBool(shrink));
3928
3929 var i: usize = 0;
3930 while (i < a.len - 1) : (i += 1) {
3931 r[i] = (a[i] >> bit_shift) | (a[i + 1] << opposite_bit_shift);
3932 }
3933
3934 if (!shrink)
3935 r[i] = a[i] >> bit_shift;
3936
3937 return a.len - @intFromBool(shrink);
3938}
3939
3940// r = ~r
3941fn llnot(r: []Limb) void {
3942 for (r) |*elem| {
3943 elem.* = ~elem.*;
3944 }
3945}
3946
3947// r = a | b with 2s complement semantics.
3948// r may alias.
3949// a and b must not be 0.
3950// Returns `true` when the result is positive.
3951// When b is positive, r requires at least `a.len` limbs of storage.
3952// When b is negative, r requires at least `b.len` limbs of storage.
3953fn llsignedor(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_positive: bool) bool {
3954 assert(r.len >= a.len);
3955 assert(a.len >= b.len);
3956
3957 if (a_positive and b_positive) {
3958 // Trivial case, result is positive.
3959 var i: usize = 0;
3960 while (i < b.len) : (i += 1) {
3961 r[i] = a[i] | b[i];
3962 }
3963 while (i < a.len) : (i += 1) {
3964 r[i] = a[i];
3965 }
3966
3967 return true;
3968 } else if (!a_positive and b_positive) {
3969 // Result is negative.
3970 // r = (--a) | b
3971 // = ~(-a - 1) | b
3972 // = ~(-a - 1) | ~~b
3973 // = ~((-a - 1) & ~b)
3974 // = -(((-a - 1) & ~b) + 1)
3975
3976 var i: usize = 0;
3977 var a_borrow: u1 = 1;
3978 var r_carry: u1 = 1;
3979
3980 while (i < b.len) : (i += 1) {
3981 const ov1 = @subWithOverflow(a[i], a_borrow);
3982 a_borrow = ov1[1];
3983 const ov2 = @addWithOverflow(ov1[0] & ~b[i], r_carry);
3984 r[i] = ov2[0];
3985 r_carry = ov2[1];
3986 }
3987
3988 // In order for r_carry to be nonzero at this point, ~b[i] would need to be
3989 // all ones, which would require b[i] to be zero. This cannot be when
3990 // b is normalized, so there cannot be a carry here.
3991 // Also, x & ~b can only clear bits, so (x & ~b) <= x, meaning (-a - 1) + 1 never overflows.
3992 assert(r_carry == 0);
3993
3994 // With b = 0, we get (-a - 1) & ~0 = -a - 1.
3995 // Note, if a_borrow is zero we do not need to compute anything for
3996 // the higher limbs so we can early return here.
3997 while (i < a.len and a_borrow == 1) : (i += 1) {
3998 const ov = @subWithOverflow(a[i], a_borrow);
3999 r[i] = ov[0];
4000 a_borrow = ov[1];
4001 }
4002
4003 assert(a_borrow == 0); // a was 0.
4004
4005 return false;
4006 } else if (a_positive and !b_positive) {
4007 // Result is negative.
4008 // r = a | (--b)
4009 // = a | ~(-b - 1)
4010 // = ~~a | ~(-b - 1)
4011 // = ~(~a & (-b - 1))
4012 // = -((~a & (-b - 1)) + 1)
4013
4014 var i: usize = 0;
4015 var b_borrow: u1 = 1;
4016 var r_carry: u1 = 1;
4017
4018 while (i < b.len) : (i += 1) {
4019 const ov1 = @subWithOverflow(b[i], b_borrow);
4020 b_borrow = ov1[1];
4021 const ov2 = @addWithOverflow(~a[i] & ov1[0], r_carry);
4022 r[i] = ov2[0];
4023 r_carry = ov2[1];
4024 }
4025
4026 // b is at least 1, so this should never underflow.
4027 assert(b_borrow == 0); // b was 0
4028
4029 // x & ~a can only clear bits, so (x & ~a) <= x, meaning (-b - 1) + 1 never overflows.
4030 assert(r_carry == 0);
4031
4032 // With b = 0 and b_borrow = 0, we get ~a & (0 - 0) = ~a & 0 = 0.
4033 // Omit setting the upper bytes, just deal with those when calling llsignedor.
4034
4035 return false;
4036 } else {
4037 // Result is negative.
4038 // r = (--a) | (--b)
4039 // = ~(-a - 1) | ~(-b - 1)
4040 // = ~((-a - 1) & (-b - 1))
4041 // = -(~(~((-a - 1) & (-b - 1))) + 1)
4042 // = -((-a - 1) & (-b - 1) + 1)
4043
4044 var i: usize = 0;
4045 var a_borrow: u1 = 1;
4046 var b_borrow: u1 = 1;
4047 var r_carry: u1 = 1;
4048
4049 while (i < b.len) : (i += 1) {
4050 const ov1 = @subWithOverflow(a[i], a_borrow);
4051 a_borrow = ov1[1];
4052 const ov2 = @subWithOverflow(b[i], b_borrow);
4053 b_borrow = ov2[1];
4054 const ov3 = @addWithOverflow(ov1[0] & ov2[0], r_carry);
4055 r[i] = ov3[0];
4056 r_carry = ov3[1];
4057 }
4058
4059 // b is at least 1, so this should never underflow.
4060 assert(b_borrow == 0); // b was 0
4061
4062 // Can never overflow because in order for b_limb to be maxInt(Limb),
4063 // b_borrow would need to equal 1.
4064
4065 // x & y can only clear bits, meaning x & y <= x and x & y <= y. This implies that
4066 // for x = a - 1 and y = b - 1, the +1 term would never cause an overflow.
4067 assert(r_carry == 0);
4068
4069 // With b = 0 and b_borrow = 0 we get (-a - 1) & (0 - 0) = (-a - 1) & 0 = 0.
4070 // Omit setting the upper bytes, just deal with those when calling llsignedor.
4071 return false;
4072 }
4073}
4074
4075// r = a & b with 2s complement semantics.
4076// r may alias.
4077// a and b must not be 0.
4078// Returns `true` when the result is positive.
4079// We assume `a.len >= b.len` here, so:
4080// 1. when b is positive, r requires at least `b.len` limbs of storage,
4081// 2. when b is negative but a is positive, r requires at least `a.len` limbs of storage,
4082// 3. when both a and b are negative, r requires at least `a.len + 1` limbs of storage.
4083fn llsignedand(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_positive: bool) bool {
4084 assert(a.len != 0 and b.len != 0);
4085 assert(a.len >= b.len);
4086 assert(r.len >= if (b_positive) b.len else if (a_positive) a.len else a.len + 1);
4087
4088 if (a_positive and b_positive) {
4089 // Trivial case, result is positive.
4090 var i: usize = 0;
4091 while (i < b.len) : (i += 1) {
4092 r[i] = a[i] & b[i];
4093 }
4094
4095 // With b = 0 we have a & 0 = 0, so the upper bytes are zero.
4096 // Omit setting them here and simply discard them whenever
4097 // llsignedand is called.
4098
4099 return true;
4100 } else if (!a_positive and b_positive) {
4101 // Result is positive.
4102 // r = (--a) & b
4103 // = ~(-a - 1) & b
4104
4105 var i: usize = 0;
4106 var a_borrow: u1 = 1;
4107
4108 while (i < b.len) : (i += 1) {
4109 const ov = @subWithOverflow(a[i], a_borrow);
4110 a_borrow = ov[1];
4111 r[i] = ~ov[0] & b[i];
4112 }
4113
4114 // With b = 0 we have ~(a - 1) & 0 = 0, so the upper bytes are zero.
4115 // Omit setting them here and simply discard them whenever
4116 // llsignedand is called.
4117
4118 return true;
4119 } else if (a_positive and !b_positive) {
4120 // Result is positive.
4121 // r = a & (--b)
4122 // = a & ~(-b - 1)
4123
4124 var i: usize = 0;
4125 var b_borrow: u1 = 1;
4126
4127 while (i < b.len) : (i += 1) {
4128 const ov = @subWithOverflow(b[i], b_borrow);
4129 b_borrow = ov[1];
4130 r[i] = a[i] & ~ov[0];
4131 }
4132
4133 assert(b_borrow == 0); // b was 0
4134
4135 // With b = 0 and b_borrow = 0 we have a & ~(0 - 0) = a & ~0 = a, so
4136 // the upper bytes are the same as those of a.
4137
4138 while (i < a.len) : (i += 1) {
4139 r[i] = a[i];
4140 }
4141
4142 return true;
4143 } else {
4144 // Result is negative.
4145 // r = (--a) & (--b)
4146 // = ~(-a - 1) & ~(-b - 1)
4147 // = ~((-a - 1) | (-b - 1))
4148 // = -(((-a - 1) | (-b - 1)) + 1)
4149
4150 var i: usize = 0;
4151 var a_borrow: u1 = 1;
4152 var b_borrow: u1 = 1;
4153 var r_carry: u1 = 1;
4154
4155 while (i < b.len) : (i += 1) {
4156 const ov1 = @subWithOverflow(a[i], a_borrow);
4157 a_borrow = ov1[1];
4158 const ov2 = @subWithOverflow(b[i], b_borrow);
4159 b_borrow = ov2[1];
4160 const ov3 = @addWithOverflow(ov1[0] | ov2[0], r_carry);
4161 r[i] = ov3[0];
4162 r_carry = ov3[1];
4163 }
4164
4165 // b is at least 1, so this should never underflow.
4166 assert(b_borrow == 0); // b was 0
4167
4168 // With b = 0 and b_borrow = 0 we get (-a - 1) | (0 - 0) = (-a - 1) | 0 = -a - 1.
4169 while (i < a.len) : (i += 1) {
4170 const ov1 = @subWithOverflow(a[i], a_borrow);
4171 a_borrow = ov1[1];
4172 const ov2 = @addWithOverflow(ov1[0], r_carry);
4173 r[i] = ov2[0];
4174 r_carry = ov2[1];
4175 }
4176
4177 assert(a_borrow == 0); // a was 0.
4178
4179 // The final addition can overflow here, so we need to keep that in mind.
4180 r[i] = r_carry;
4181
4182 return false;
4183 }
4184}
4185
4186// r = a ^ b with 2s complement semantics.
4187// r may alias.
4188// a and b must not be -0.
4189// Returns `true` when the result is positive.
4190// If the sign of a and b is equal, then r requires at least `@max(a.len, b.len)` limbs are required.
4191// Otherwise, r requires at least `@max(a.len, b.len) + 1` limbs.
4192fn llsignedxor(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_positive: bool) bool {
4193 assert(a.len != 0 and b.len != 0);
4194 assert(r.len >= a.len);
4195 assert(a.len >= b.len);
4196
4197 // If a and b are positive, the result is positive and r = a ^ b.
4198 // If a negative, b positive, result is negative and we have
4199 // r = --(--a ^ b)
4200 // = --(~(-a - 1) ^ b)
4201 // = -(~(~(-a - 1) ^ b) + 1)
4202 // = -(((-a - 1) ^ b) + 1)
4203 // Same if a is positive and b is negative, sides switched.
4204 // If both a and b are negative, the result is positive and we have
4205 // r = (--a) ^ (--b)
4206 // = ~(-a - 1) ^ ~(-b - 1)
4207 // = (-a - 1) ^ (-b - 1)
4208 // These operations can be made more generic as follows:
4209 // - If a is negative, subtract 1 from |a| before the xor.
4210 // - If b is negative, subtract 1 from |b| before the xor.
4211 // - if the result is supposed to be negative, add 1.
4212
4213 var i: usize = 0;
4214 var a_borrow = @intFromBool(!a_positive);
4215 var b_borrow = @intFromBool(!b_positive);
4216 var r_carry = @intFromBool(a_positive != b_positive);
4217
4218 while (i < b.len) : (i += 1) {
4219 const ov1 = @subWithOverflow(a[i], a_borrow);
4220 a_borrow = ov1[1];
4221 const ov2 = @subWithOverflow(b[i], b_borrow);
4222 b_borrow = ov2[1];
4223 const ov3 = @addWithOverflow(ov1[0] ^ ov2[0], r_carry);
4224 r[i] = ov3[0];
4225 r_carry = ov3[1];
4226 }
4227
4228 while (i < a.len) : (i += 1) {
4229 const ov1 = @subWithOverflow(a[i], a_borrow);
4230 a_borrow = ov1[1];
4231 const ov2 = @addWithOverflow(ov1[0], r_carry);
4232 r[i] = ov2[0];
4233 r_carry = ov2[1];
4234 }
4235
4236 // If both inputs don't share the same sign, an extra limb is required.
4237 if (a_positive != b_positive) {
4238 r[i] = r_carry;
4239 } else {
4240 assert(r_carry == 0);
4241 }
4242
4243 assert(a_borrow == 0);
4244 assert(b_borrow == 0);
4245
4246 return a_positive == b_positive;
4247}
4248
4249/// r MUST NOT alias x.
4250fn llsquareBasecase(r: []Limb, x: []const Limb) void {
4251 const x_norm = x;
4252 assert(r.len >= 2 * x_norm.len + 1);
4253 assert(!slicesOverlap(r, x));
4254
4255 // Compute the square of a N-limb bigint with only (N^2 + N)/2
4256 // multiplications by exploiting the symmetry of the coefficients around the
4257 // diagonal:
4258 //
4259 // a b c *
4260 // a b c =
4261 // -------------------
4262 // ca cb cc +
4263 // ba bb bc +
4264 // aa ab ac
4265 //
4266 // Note that:
4267 // - Each mixed-product term appears twice for each column,
4268 // - Squares are always in the 2k (0 <= k < N) column
4269
4270 for (x_norm, 0..) |v, i| {
4271 // Accumulate all the x[i]*x[j] (with x!=j) products
4272 const overflow = llmulLimb(.add, r[2 * i + 1 ..], x_norm[i + 1 ..], v);
4273 assert(!overflow);
4274 }
4275
4276 // Each product appears twice, multiply by 2
4277 _ = llshl(r, r[0 .. 2 * x_norm.len], 1);
4278
4279 for (x_norm, 0..) |v, i| {
4280 // Compute and add the squares
4281 const overflow = llmulLimb(.add, r[2 * i ..], x[i..][0..1], v);
4282 assert(!overflow);
4283 }
4284}
4285
4286/// Knuth 4.6.3
4287fn llpow(r: []Limb, a: []const Limb, b: u32, tmp_limbs: []Limb) void {
4288 var tmp1: []Limb = undefined;
4289 var tmp2: []Limb = undefined;
4290
4291 // Multiplication requires no aliasing between the operand and the result
4292 // variable, use the output limbs and another temporary set to overcome this
4293 // limitation.
4294 // The initial assignment makes the result end in `r` so an extra memory
4295 // copy is saved, each 1 flips the index twice so it's only the zeros that
4296 // matter.
4297 const b_leading_zeros = @clz(b);
4298 const exp_zeros = @popCount(~b) - b_leading_zeros;
4299 if (exp_zeros & 1 != 0) {
4300 tmp1 = tmp_limbs;
4301 tmp2 = r;
4302 } else {
4303 tmp1 = r;
4304 tmp2 = tmp_limbs;
4305 }
4306
4307 @memcpy(tmp1[0..a.len], a);
4308 @memset(tmp1[a.len..], 0);
4309
4310 // Scan the exponent as a binary number, from left to right, dropping the
4311 // most significant bit set.
4312 // Square the result if the current bit is zero, square and multiply by a if
4313 // it is one.
4314 const exp_bits = 32 - 1 - b_leading_zeros;
4315 var exp = b << @as(u5, @intCast(1 + b_leading_zeros));
4316
4317 var i: usize = 0;
4318 while (i < exp_bits) : (i += 1) {
4319 // Square
4320 @memset(tmp2, 0);
4321 llsquareBasecase(tmp2, tmp1[0..llnormalize(tmp1)]);
4322 mem.swap([]Limb, &tmp1, &tmp2);
4323 // Multiply by a
4324 const ov = @shlWithOverflow(exp, 1);
4325 exp = ov[0];
4326 if (ov[1] != 0) {
4327 @memset(tmp2, 0);
4328 llmulacc(.add, null, tmp2, tmp1[0..llnormalize(tmp1)], a);
4329 mem.swap([]Limb, &tmp1, &tmp2);
4330 }
4331 }
4332}
4333
4334// Storage must live for the lifetime of the returned value
4335fn fixedIntFromSignedDoubleLimb(A: SignedDoubleLimb, storage: []Limb) Mutable {
4336 assert(storage.len >= 2);
4337
4338 const A_is_positive = A >= 0;
4339 const Au = @as(DoubleLimb, @intCast(if (A < 0) -A else A));
4340 storage[0] = @as(Limb, @truncate(Au));
4341 storage[1] = @as(Limb, @truncate(Au >> limb_bits));
4342 return .{
4343 .limbs = storage[0..2],
4344 .positive = A_is_positive,
4345 .len = 2,
4346 };
4347}
4348
4349fn slicesOverlap(a: []const Limb, b: []const Limb) bool {
4350 // there is no overlap if a.ptr + a.len <= b.ptr or b.ptr + b.len <= a.ptr
4351 return @intFromPtr(a.ptr + a.len) > @intFromPtr(b.ptr) and @intFromPtr(b.ptr + b.len) > @intFromPtr(a.ptr);
4352}
4353
4354test {
4355 _ = @import("int_test.zig");
4356}
4357
4358const testing_allocator = std.testing.allocator;
4359test "llshl shift by whole number of limb" {
4360 const padding = maxInt(Limb);
4361
4362 var r: [10]Limb = @splat(padding);
4363
4364 const A: Limb = @truncate(0xCCCCCCCCCCCCCCCCCCCCCCC);
4365 const B: Limb = @truncate(0x22222222222222222222222);
4366
4367 const data = [2]Limb{ A, B };
4368 for (0..9) |i| {
4369 @memset(&r, padding);
4370 const len = llshl(&r, &data, i * @bitSizeOf(Limb));
4371
4372 try std.testing.expectEqual(i + 2, len);
4373 try std.testing.expectEqualSlices(Limb, &data, r[i .. i + 2]);
4374 for (r[0..i]) |x|
4375 try std.testing.expectEqual(0, x);
4376 for (r[i + 2 ..]) |x|
4377 try std.testing.expectEqual(padding, x);
4378 }
4379}
4380
4381test llshl {
4382 if (limb_bits != 64) return error.SkipZigTest;
4383
4384 // 1 << 63
4385 const left_one = 0x8000000000000000;
4386 const maxint: Limb = 0xFFFFFFFFFFFFFFFF;
4387
4388 // zig fmt: off
4389 try testOneShiftCase(.llshl, .{0, &.{0}, &.{0}});
4390 try testOneShiftCase(.llshl, .{0, &.{1}, &.{1}});
4391 try testOneShiftCase(.llshl, .{0, &.{125484842448}, &.{125484842448}});
4392 try testOneShiftCase(.llshl, .{0, &.{0xdeadbeef}, &.{0xdeadbeef}});
4393 try testOneShiftCase(.llshl, .{0, &.{maxint}, &.{maxint}});
4394 try testOneShiftCase(.llshl, .{0, &.{left_one}, &.{left_one}});
4395 try testOneShiftCase(.llshl, .{0, &.{0, 1}, &.{0, 1}});
4396 try testOneShiftCase(.llshl, .{0, &.{1, 2}, &.{1, 2}});
4397 try testOneShiftCase(.llshl, .{0, &.{left_one, 1}, &.{left_one, 1}});
4398 try testOneShiftCase(.llshl, .{1, &.{0}, &.{0}});
4399 try testOneShiftCase(.llshl, .{1, &.{2}, &.{1}});
4400 try testOneShiftCase(.llshl, .{1, &.{250969684896}, &.{125484842448}});
4401 try testOneShiftCase(.llshl, .{1, &.{0x1bd5b7dde}, &.{0xdeadbeef}});
4402 try testOneShiftCase(.llshl, .{1, &.{0xfffffffffffffffe, 1}, &.{maxint}});
4403 try testOneShiftCase(.llshl, .{1, &.{0, 1}, &.{left_one}});
4404 try testOneShiftCase(.llshl, .{1, &.{0, 2}, &.{0, 1}});
4405 try testOneShiftCase(.llshl, .{1, &.{2, 4}, &.{1, 2}});
4406 try testOneShiftCase(.llshl, .{1, &.{0, 3}, &.{left_one, 1}});
4407 try testOneShiftCase(.llshl, .{5, &.{32}, &.{1}});
4408 try testOneShiftCase(.llshl, .{5, &.{4015514958336}, &.{125484842448}});
4409 try testOneShiftCase(.llshl, .{5, &.{0x1bd5b7dde0}, &.{0xdeadbeef}});
4410 try testOneShiftCase(.llshl, .{5, &.{0xffffffffffffffe0, 0x1f}, &.{maxint}});
4411 try testOneShiftCase(.llshl, .{5, &.{0, 16}, &.{left_one}});
4412 try testOneShiftCase(.llshl, .{5, &.{0, 32}, &.{0, 1}});
4413 try testOneShiftCase(.llshl, .{5, &.{32, 64}, &.{1, 2}});
4414 try testOneShiftCase(.llshl, .{5, &.{0, 48}, &.{left_one, 1}});
4415 try testOneShiftCase(.llshl, .{64, &.{0, 1}, &.{1}});
4416 try testOneShiftCase(.llshl, .{64, &.{0, 125484842448}, &.{125484842448}});
4417 try testOneShiftCase(.llshl, .{64, &.{0, 0xdeadbeef}, &.{0xdeadbeef}});
4418 try testOneShiftCase(.llshl, .{64, &.{0, maxint}, &.{maxint}});
4419 try testOneShiftCase(.llshl, .{64, &.{0, left_one}, &.{left_one}});
4420 try testOneShiftCase(.llshl, .{64, &.{0, 0, 1}, &.{0, 1}});
4421 try testOneShiftCase(.llshl, .{64, &.{0, 1, 2}, &.{1, 2}});
4422 try testOneShiftCase(.llshl, .{64, &.{0, left_one, 1}, &.{left_one, 1}});
4423 try testOneShiftCase(.llshl, .{35, &.{0x800000000}, &.{1}});
4424 try testOneShiftCase(.llshl, .{35, &.{13534986488655118336, 233}, &.{125484842448}});
4425 try testOneShiftCase(.llshl, .{35, &.{0xf56df77800000000, 6}, &.{0xdeadbeef}});
4426 try testOneShiftCase(.llshl, .{35, &.{0xfffffff800000000, 0x7ffffffff}, &.{maxint}});
4427 try testOneShiftCase(.llshl, .{35, &.{0, 17179869184}, &.{left_one}});
4428 try testOneShiftCase(.llshl, .{35, &.{0, 0x800000000}, &.{0, 1}});
4429 try testOneShiftCase(.llshl, .{35, &.{0x800000000, 0x1000000000}, &.{1, 2}});
4430 try testOneShiftCase(.llshl, .{35, &.{0, 0xc00000000}, &.{left_one, 1}});
4431 try testOneShiftCase(.llshl, .{70, &.{0, 64}, &.{1}});
4432 try testOneShiftCase(.llshl, .{70, &.{0, 8031029916672}, &.{125484842448}});
4433 try testOneShiftCase(.llshl, .{70, &.{0, 0x37ab6fbbc0}, &.{0xdeadbeef}});
4434 try testOneShiftCase(.llshl, .{70, &.{0, 0xffffffffffffffc0, 63}, &.{maxint}});
4435 try testOneShiftCase(.llshl, .{70, &.{0, 0, 32}, &.{left_one}});
4436 try testOneShiftCase(.llshl, .{70, &.{0, 0, 64}, &.{0, 1}});
4437 try testOneShiftCase(.llshl, .{70, &.{0, 64, 128}, &.{1, 2}});
4438 try testOneShiftCase(.llshl, .{70, &.{0, 0, 0x60}, &.{left_one, 1}});
4439 // zig fmt: on
4440}
4441
4442test "llshl shift 0" {
4443 const n = @bitSizeOf(Limb);
4444 if (n <= 20) return error.SkipZigTest;
4445
4446 // zig fmt: off
4447 try testOneShiftCase(.llshl, .{0, &.{0}, &.{0}});
4448 try testOneShiftCase(.llshl, .{1, &.{0}, &.{0}});
4449 try testOneShiftCase(.llshl, .{5, &.{0}, &.{0}});
4450 try testOneShiftCase(.llshl, .{13, &.{0}, &.{0}});
4451 try testOneShiftCase(.llshl, .{20, &.{0}, &.{0}});
4452 try testOneShiftCase(.llshl, .{0, &.{0, 0}, &.{0, 0}});
4453 try testOneShiftCase(.llshl, .{2, &.{0, 0}, &.{0, 0}});
4454 try testOneShiftCase(.llshl, .{7, &.{0, 0}, &.{0, 0}});
4455 try testOneShiftCase(.llshl, .{11, &.{0, 0}, &.{0, 0}});
4456 try testOneShiftCase(.llshl, .{19, &.{0, 0}, &.{0, 0}});
4457
4458 try testOneShiftCase(.llshl, .{0, &.{0}, &.{0}});
4459 try testOneShiftCase(.llshl, .{n, &.{0, 0}, &.{0}});
4460 try testOneShiftCase(.llshl, .{2*n, &.{0, 0, 0}, &.{0}});
4461 try testOneShiftCase(.llshl, .{3*n, &.{0, 0, 0, 0}, &.{0}});
4462 try testOneShiftCase(.llshl, .{4*n, &.{0, 0, 0, 0, 0}, &.{0}});
4463 try testOneShiftCase(.llshl, .{0, &.{0, 0}, &.{0, 0}});
4464 try testOneShiftCase(.llshl, .{n, &.{0, 0, 0}, &.{0, 0}});
4465 try testOneShiftCase(.llshl, .{2*n, &.{0, 0, 0, 0}, &.{0, 0}});
4466 try testOneShiftCase(.llshl, .{3*n, &.{0, 0, 0, 0, 0}, &.{0, 0}});
4467 try testOneShiftCase(.llshl, .{4*n, &.{0, 0, 0, 0, 0, 0}, &.{0, 0}});
4468 // zig fmt: on
4469}
4470
4471test "llshr shift 0" {
4472 const n = @bitSizeOf(Limb);
4473
4474 // zig fmt: off
4475 try testOneShiftCase(.llshr, .{0, &.{0}, &.{0}});
4476 try testOneShiftCase(.llshr, .{1, &.{0}, &.{0}});
4477 try testOneShiftCase(.llshr, .{5, &.{0}, &.{0}});
4478 try testOneShiftCase(.llshr, .{13, &.{0}, &.{0}});
4479 try testOneShiftCase(.llshr, .{20, &.{0}, &.{0}});
4480 try testOneShiftCase(.llshr, .{0, &.{0, 0}, &.{0, 0}});
4481 try testOneShiftCase(.llshr, .{2, &.{0}, &.{0, 0}});
4482 try testOneShiftCase(.llshr, .{7, &.{0}, &.{0, 0}});
4483 try testOneShiftCase(.llshr, .{11, &.{0}, &.{0, 0}});
4484 try testOneShiftCase(.llshr, .{19, &.{0}, &.{0, 0}});
4485
4486 try testOneShiftCase(.llshr, .{n, &.{0}, &.{0}});
4487 try testOneShiftCase(.llshr, .{2*n, &.{0}, &.{0}});
4488 try testOneShiftCase(.llshr, .{3*n, &.{0}, &.{0}});
4489 try testOneShiftCase(.llshr, .{4*n, &.{0}, &.{0}});
4490 try testOneShiftCase(.llshr, .{n, &.{0}, &.{0, 0}});
4491 try testOneShiftCase(.llshr, .{2*n, &.{0}, &.{0, 0}});
4492 try testOneShiftCase(.llshr, .{3*n, &.{0}, &.{0, 0}});
4493 try testOneShiftCase(.llshr, .{4*n, &.{0}, &.{0, 0}});
4494
4495 try testOneShiftCase(.llshr, .{1, &.{}, &.{}});
4496 try testOneShiftCase(.llshr, .{2, &.{}, &.{}});
4497 try testOneShiftCase(.llshr, .{64, &.{}, &.{}});
4498 // zig fmt: on
4499}
4500
4501test "llshr to 0" {
4502 const n = @bitSizeOf(Limb);
4503 if (n != 64 and n != 32) return error.SkipZigTest;
4504
4505 // zig fmt: off
4506 try testOneShiftCase(.llshr, .{1, &.{0}, &.{0}});
4507 try testOneShiftCase(.llshr, .{1, &.{0}, &.{1}});
4508 try testOneShiftCase(.llshr, .{5, &.{0}, &.{1}});
4509 try testOneShiftCase(.llshr, .{65, &.{0}, &.{0, 1}});
4510 try testOneShiftCase(.llshr, .{193, &.{0}, &.{0, 0, maxInt(Limb)}});
4511 try testOneShiftCase(.llshr, .{193, &.{0}, &.{maxInt(Limb), 1, maxInt(Limb)}});
4512 try testOneShiftCase(.llshr, .{193, &.{0}, &.{0xdeadbeef, 0xabcdefab, 0x1234}});
4513 // zig fmt: on
4514}
4515
4516test "llshr single" {
4517 if (limb_bits != 64) return error.SkipZigTest;
4518
4519 // 1 << 63
4520 const left_one = 0x8000000000000000;
4521 const maxint: Limb = 0xFFFFFFFFFFFFFFFF;
4522
4523 // zig fmt: off
4524 try testOneShiftCase(.llshr, .{0, &.{0}, &.{0}});
4525 try testOneShiftCase(.llshr, .{0, &.{1}, &.{1}});
4526 try testOneShiftCase(.llshr, .{0, &.{125484842448}, &.{125484842448}});
4527 try testOneShiftCase(.llshr, .{0, &.{0xdeadbeef}, &.{0xdeadbeef}});
4528 try testOneShiftCase(.llshr, .{0, &.{maxint}, &.{maxint}});
4529 try testOneShiftCase(.llshr, .{0, &.{left_one}, &.{left_one}});
4530 try testOneShiftCase(.llshr, .{1, &.{0}, &.{0}});
4531 try testOneShiftCase(.llshr, .{1, &.{1}, &.{2}});
4532 try testOneShiftCase(.llshr, .{1, &.{62742421224}, &.{125484842448}});
4533 try testOneShiftCase(.llshr, .{1, &.{62742421223}, &.{125484842447}});
4534 try testOneShiftCase(.llshr, .{1, &.{0x6f56df77}, &.{0xdeadbeef}});
4535 try testOneShiftCase(.llshr, .{1, &.{0x7fffffffffffffff}, &.{maxint}});
4536 try testOneShiftCase(.llshr, .{1, &.{0x4000000000000000}, &.{left_one}});
4537 try testOneShiftCase(.llshr, .{8, &.{1}, &.{256}});
4538 try testOneShiftCase(.llshr, .{8, &.{490175165}, &.{125484842448}});
4539 try testOneShiftCase(.llshr, .{8, &.{0xdeadbe}, &.{0xdeadbeef}});
4540 try testOneShiftCase(.llshr, .{8, &.{0xffffffffffffff}, &.{maxint}});
4541 try testOneShiftCase(.llshr, .{8, &.{0x80000000000000}, &.{left_one}});
4542 // zig fmt: on
4543}
4544
4545test llshr {
4546 if (limb_bits != 64) return error.SkipZigTest;
4547
4548 // 1 << 63
4549 const left_one = 0x8000000000000000;
4550 const maxint: Limb = 0xFFFFFFFFFFFFFFFF;
4551
4552 // zig fmt: off
4553 try testOneShiftCase(.llshr, .{0, &.{0, 0}, &.{0, 0}});
4554 try testOneShiftCase(.llshr, .{0, &.{0, 1}, &.{0, 1}});
4555 try testOneShiftCase(.llshr, .{0, &.{15, 1}, &.{15, 1}});
4556 try testOneShiftCase(.llshr, .{0, &.{987656565, 123456789456}, &.{987656565, 123456789456}});
4557 try testOneShiftCase(.llshr, .{0, &.{0xfeebdaed, 0xdeadbeef}, &.{0xfeebdaed, 0xdeadbeef}});
4558 try testOneShiftCase(.llshr, .{0, &.{1, maxint}, &.{1, maxint}});
4559 try testOneShiftCase(.llshr, .{0, &.{0, left_one}, &.{0, left_one}});
4560 try testOneShiftCase(.llshr, .{1, &.{0}, &.{0, 0}});
4561 try testOneShiftCase(.llshr, .{1, &.{left_one}, &.{0, 1}});
4562 try testOneShiftCase(.llshr, .{1, &.{0x8000000000000007}, &.{15, 1}});
4563 try testOneShiftCase(.llshr, .{1, &.{493828282, 61728394728}, &.{987656565, 123456789456}});
4564 try testOneShiftCase(.llshr, .{1, &.{0x800000007f75ed76, 0x6f56df77}, &.{0xfeebdaed, 0xdeadbeef}});
4565 try testOneShiftCase(.llshr, .{1, &.{left_one, 0x7fffffffffffffff}, &.{1, maxint}});
4566 try testOneShiftCase(.llshr, .{1, &.{0, 0x4000000000000000}, &.{0, left_one}});
4567 try testOneShiftCase(.llshr, .{64, &.{0}, &.{0, 0}});
4568 try testOneShiftCase(.llshr, .{64, &.{1}, &.{0, 1}});
4569 try testOneShiftCase(.llshr, .{64, &.{1}, &.{15, 1}});
4570 try testOneShiftCase(.llshr, .{64, &.{123456789456}, &.{987656565, 123456789456}});
4571 try testOneShiftCase(.llshr, .{64, &.{0xdeadbeef}, &.{0xfeebdaed, 0xdeadbeef}});
4572 try testOneShiftCase(.llshr, .{64, &.{maxint}, &.{1, maxint}});
4573 try testOneShiftCase(.llshr, .{64, &.{left_one}, &.{0, left_one}});
4574 try testOneShiftCase(.llshr, .{72, &.{0}, &.{0, 0}});
4575 try testOneShiftCase(.llshr, .{72, &.{0}, &.{0, 1}});
4576 try testOneShiftCase(.llshr, .{72, &.{0}, &.{15, 1}});
4577 try testOneShiftCase(.llshr, .{72, &.{482253083}, &.{987656565, 123456789456}});
4578 try testOneShiftCase(.llshr, .{72, &.{0xdeadbe}, &.{0xfeebdaed, 0xdeadbeef}});
4579 try testOneShiftCase(.llshr, .{72, &.{0xffffffffffffff}, &.{1, maxint}});
4580 try testOneShiftCase(.llshr, .{72, &.{0x80000000000000}, &.{0, left_one}});
4581 // zig fmt: on
4582}
4583
4584const Case = struct { usize, []const Limb, []const Limb };
4585
4586fn testOneShiftCase(comptime function: enum { llshr, llshl }, case: Case) !void {
4587 const func = if (function == .llshl) llshl else llshr;
4588 const shift_direction = if (function == .llshl) -1 else 1;
4589
4590 try testOneShiftCaseNoAliasing(func, case);
4591 try testOneShiftCaseAliasing(func, case, shift_direction);
4592}
4593
4594fn testOneShiftCaseNoAliasing(func: fn ([]Limb, []const Limb, usize) usize, case: Case) !void {
4595 const padding = maxInt(Limb);
4596 var r: [20]Limb = @splat(padding);
4597
4598 const shift = case[0];
4599 const expected = case[1];
4600 const data = case[2];
4601
4602 std.debug.assert(expected.len <= 20);
4603
4604 const len = func(&r, data, shift);
4605
4606 try std.testing.expectEqual(expected.len, len);
4607 try std.testing.expectEqualSlices(Limb, expected, r[0..len]);
4608 try std.testing.expect(mem.allEqual(Limb, r[len..], padding));
4609}
4610
4611fn testOneShiftCaseAliasing(func: fn ([]Limb, []const Limb, usize) usize, case: Case, shift_direction: isize) !void {
4612 const padding = maxInt(Limb);
4613 var r: [60]Limb = @splat(padding);
4614 const base = 20;
4615
4616 assert(shift_direction == 1 or shift_direction == -1);
4617
4618 for (0..10) |limb_shift| {
4619 const shift = case[0];
4620 const expected = case[1];
4621 const data = case[2];
4622
4623 std.debug.assert(expected.len <= 20);
4624
4625 @memset(&r, padding);
4626 const final_limb_base: usize = @intCast(base + shift_direction * @as(isize, @intCast(limb_shift)));
4627 const written_data = r[final_limb_base..][0..data.len];
4628 @memcpy(written_data, data);
4629
4630 const len = func(r[base..], written_data, shift);
4631
4632 try std.testing.expectEqual(expected.len, len);
4633 try std.testing.expectEqualSlices(Limb, expected, r[base .. base + len]);
4634 }
4635}
4636
4637test "format" {
4638 var a: Managed = try .init(std.testing.allocator);
4639 defer a.deinit();
4640
4641 try a.set(123);
4642 try testFormat(a, "123");
4643
4644 try a.set(-123);
4645 try testFormat(a, "-123");
4646
4647 try a.set(20000000000000000000); // > maxInt(u64)
4648 try testFormat(a, "20000000000000000000");
4649
4650 try a.set(1 << 64 * @sizeOf(usize) * 8);
4651 try testFormat(a, "(BigInt)");
4652
4653 try a.set(-(1 << 64 * @sizeOf(usize) * 8));
4654 try testFormat(a, "(BigInt)");
4655}
4656
4657fn testFormat(a: Managed, expected: []const u8) !void {
4658 try std.testing.expectFmt(expected, "{f}", .{a});
4659 try std.testing.expectFmt(expected, "{f}", .{a.toMutable()});
4660 try std.testing.expectFmt(expected, "{f}", .{a.toConst()});
4661}