Commit 7674a8b43d

Frank Denis <github@pureftpd.org>
2021-05-26 21:20:23
p256: update to the last fiat-crypto code & share PC tables
fiat-crypto now generates proper types, so take advantage of that. Add mixed subtraction and double base multiplication. We will eventually leverage mixed addition/subtraction for fixed base multiplication. The reason we don't right now is that precomputing the tables at comptime would take forever. We don't use combs for the same reason. Stage2 + less function calls in the fiat-crypto generated code will eventually address that. Also make the edwards25519 code consistent with these changes. No functional changes.
1 parent d8d92da
Changed files (6)
lib/std/crypto/25519/edwards25519.zig
@@ -143,7 +143,7 @@ pub const Edwards25519 = struct {
         p.t.cMov(a.t, c);
     }
 
-    inline fn pcSelect(comptime n: usize, pc: [n]Edwards25519, b: u8) Edwards25519 {
+    inline fn pcSelect(comptime n: usize, pc: *const [n]Edwards25519, b: u8) Edwards25519 {
         var t = Edwards25519.identityElement;
         comptime var i: u8 = 1;
         inline while (i < pc.len) : (i += 1) {
@@ -176,7 +176,7 @@ pub const Edwards25519 = struct {
     // Based on real-world benchmarks, we only use this for multi-scalar multiplication.
     // NAF could be useful to half the size of precomputation tables, but we intentionally
     // avoid these to keep the standard library lightweight.
-    fn pcMul(pc: [9]Edwards25519, s: [32]u8, comptime vartime: bool) IdentityElementError!Edwards25519 {
+    fn pcMul(pc: *const [9]Edwards25519, s: [32]u8, comptime vartime: bool) IdentityElementError!Edwards25519 {
         std.debug.assert(vartime);
         const e = slide(s);
         var q = Edwards25519.identityElement;
@@ -196,7 +196,7 @@ pub const Edwards25519 = struct {
     }
 
     // Scalar multiplication with a 4-bit window and the first 15 multiples.
-    fn pcMul16(pc: [16]Edwards25519, s: [32]u8, comptime vartime: bool) IdentityElementError!Edwards25519 {
+    fn pcMul16(pc: *const [16]Edwards25519, s: [32]u8, comptime vartime: bool) IdentityElementError!Edwards25519 {
         var q = Edwards25519.identityElement;
         var pos: usize = 252;
         while (true) : (pos -= 4) {
@@ -231,11 +231,6 @@ pub const Edwards25519 = struct {
         break :pc precompute(Edwards25519.basePoint, 15);
     };
 
-    const basePointPc8 = pc: {
-        @setEvalBranchQuota(10000);
-        break :pc precompute(Edwards25519.basePoint, 8);
-    };
-
     /// Multiply an Edwards25519 point by a scalar without clamping it.
     /// Return error.WeakPublicKey if the base generates a small-order group,
     /// and error.IdentityElement if the result is the identity element.
@@ -245,33 +240,35 @@ pub const Edwards25519 = struct {
             xpc[4].rejectIdentity() catch return error.WeakPublicKey;
             break :pc xpc;
         };
-        return pcMul16(pc, s, false);
+        return pcMul16(&pc, s, false);
     }
 
     /// Multiply an Edwards25519 point by a *PUBLIC* scalar *IN VARIABLE TIME*
     /// This can be used for signature verification.
     pub fn mulPublic(p: Edwards25519, s: [32]u8) (IdentityElementError || WeakPublicKeyError)!Edwards25519 {
         if (p.is_base) {
-            return pcMul16(basePointPc, s, true);
+            return pcMul16(&basePointPc, s, true);
         } else {
             const pc = precompute(p, 8);
             pc[4].rejectIdentity() catch return error.WeakPublicKey;
-            return pcMul(pc, s, true);
+            return pcMul(&pc, s, true);
         }
     }
 
     /// Double-base multiplication of public parameters - Compute (p1*s1)+(p2*s2) *IN VARIABLE TIME*
     /// This can be used for signature verification.
     pub fn mulDoubleBasePublic(p1: Edwards25519, s1: [32]u8, p2: Edwards25519, s2: [32]u8) (IdentityElementError || WeakPublicKeyError)!Edwards25519 {
-        const pc1 = if (p1.is_base) basePointPc8 else pc: {
-            const xpc = precompute(p1, 8);
-            xpc[4].rejectIdentity() catch return error.WeakPublicKey;
-            break :pc xpc;
+        var pc1_array: [9]Edwards25519 = undefined;
+        const pc1 = if (p1.is_base) basePointPc[0..9] else pc: {
+            pc1_array = precompute(p1, 8);
+            pc1_array[4].rejectIdentity() catch return error.WeakPublicKey;
+            break :pc &pc1_array;
         };
-        const pc2 = if (p2.is_base) basePointPc8 else pc: {
-            const xpc = precompute(p2, 8);
-            xpc[4].rejectIdentity() catch return error.WeakPublicKey;
-            break :pc xpc;
+        var pc2_array: [9]Edwards25519 = undefined;
+        const pc2 = if (p2.is_base) basePointPc[0..9] else pc: {
+            pc2_array = precompute(p2, 8);
+            pc2_array[4].rejectIdentity() catch return error.WeakPublicKey;
+            break :pc &pc2_array;
         };
         const e1 = slide(s1);
         const e2 = slide(s2);
@@ -301,9 +298,13 @@ pub const Edwards25519 = struct {
     /// Computes ps0*ss0 + ps1*ss1 + ps2*ss2... faster than doing many of these operations individually
     pub fn mulMulti(comptime count: usize, ps: [count]Edwards25519, ss: [count][32]u8) (IdentityElementError || WeakPublicKeyError)!Edwards25519 {
         var pcs: [count][9]Edwards25519 = undefined;
+
+        var bpc: [9]Edwards25519 = undefined;
+        mem.copy(Edwards25519, bpc[0..], basePointPc[0..bpc.len]);
+
         for (ps) |p, i| {
             if (p.is_base) {
-                pcs[i] = basePointPc8;
+                pcs[i] = bpc;
             } else {
                 pcs[i] = precompute(p, 8);
                 pcs[i][4].rejectIdentity() catch return error.WeakPublicKey;
lib/std/crypto/pcurves/p256/p256_64.zig
@@ -1,4 +1,4 @@
-// Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --lang Zig --internal-static --public-function-case camelCase --private-function-case camelCase --no-prefix-fiat --package-name p256 '' 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp
+// Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --lang Zig --internal-static --public-function-case camelCase --private-function-case camelCase --public-type-case UpperCamelCase --private-type-case UpperCamelCase --no-prefix-fiat --package-name p256 '' 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp
 // curve description (via package name): p256
 // machine_wordsize = 64 (from "64")
 // requested operations: mul, square, add, sub, opp, from_montgomery, to_montgomery, nonzero, selectznz, to_bytes, from_bytes, one, msat, divstep, divstep_precomp
@@ -12,18 +12,25 @@
 //   return values.
 //
 // Computed values:
-// eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192)
-// bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248)
-// twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in
-//                          if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256
+//   eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192)
+//   bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248)
+//   twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in
+//                            if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256
 
 const std = @import("std");
 const cast = std.meta.cast;
 const mode = std.builtin.mode; // Checked arithmetic is disabled in non-debug modes to avoid side channels
 
-pub const Limbs = [4]u64;
+// The type MontgomeryDomainFieldElement is a field element in the Montgomery domain.
+// Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub const MontgomeryDomainFieldElement = [4]u64;
+
+// The type NonMontgomeryDomainFieldElement is a field element NOT in the Montgomery domain.
+// Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub const NonMontgomeryDomainFieldElement = [4]u64;
 
 /// The function addcarryxU64 is an addition with carry.
+///
 /// Postconditions:
 ///   out1 = (arg1 + arg2 + arg3) mod 2^64
 ///   out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
@@ -45,6 +52,7 @@ inline fn addcarryxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) vo
 }
 
 /// The function subborrowxU64 is a subtraction with borrow.
+///
 /// Postconditions:
 ///   out1 = (-arg1 + arg2 + -arg3) mod 2^64
 ///   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
@@ -66,6 +74,7 @@ inline fn subborrowxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) v
 }
 
 /// The function mulxU64 is a multiplication, returning the full double-width result.
+///
 /// Postconditions:
 ///   out1 = (arg1 * arg2) mod 2^64
 ///   out2 = ⌊arg1 * arg2 / 2^64⌋
@@ -85,6 +94,7 @@ inline fn mulxU64(out1: *u64, out2: *u64, arg1: u64, arg2: u64) void {
 }
 
 /// The function cmovznzU64 is a single-word conditional move.
+///
 /// Postconditions:
 ///   out1 = (if arg1 = 0 then arg2 else arg3)
 ///
@@ -102,6 +112,7 @@ inline fn cmovznzU64(out1: *u64, arg1: u1, arg2: u64, arg3: u64) void {
 }
 
 /// The function mul multiplies two field elements in the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 ///   0 ≤ eval arg2 < m
@@ -109,12 +120,7 @@ inline fn cmovznzU64(out1: *u64, arg1: u1, arg2: u64, arg3: u64) void {
 ///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn mul(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+pub fn mul(out1: *MontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement, arg2: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     const x1 = (arg1[1]);
@@ -399,17 +405,14 @@ pub fn mul(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
 }
 
 /// The function square squares a field element in the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
 ///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn square(out1: *[4]u64, arg1: [4]u64) void {
+pub fn square(out1: *MontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     const x1 = (arg1[1]);
@@ -694,6 +697,7 @@ pub fn square(out1: *[4]u64, arg1: [4]u64) void {
 }
 
 /// The function add adds two field elements in the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 ///   0 ≤ eval arg2 < m
@@ -701,12 +705,7 @@ pub fn square(out1: *[4]u64, arg1: [4]u64) void {
 ///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn add(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+pub fn add(out1: *MontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement, arg2: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     var x1: u64 = undefined;
@@ -751,6 +750,7 @@ pub fn add(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
 }
 
 /// The function sub subtracts two field elements in the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 ///   0 ≤ eval arg2 < m
@@ -758,12 +758,7 @@ pub fn add(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
 ///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn sub(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+pub fn sub(out1: *MontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement, arg2: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     var x1: u64 = undefined;
@@ -799,17 +794,14 @@ pub fn sub(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
 }
 
 /// The function opp negates a field element in the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
 ///   eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn opp(out1: *[4]u64, arg1: [4]u64) void {
+pub fn opp(out1: *MontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     var x1: u64 = undefined;
@@ -845,17 +837,14 @@ pub fn opp(out1: *[4]u64, arg1: [4]u64) void {
 }
 
 /// The function fromMontgomery translates a field element out of the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
 ///   eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn fromMontgomery(out1: *[4]u64, arg1: [4]u64) void {
+pub fn fromMontgomery(out1: *NonMontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     const x1 = (arg1[0]);
@@ -1001,17 +990,14 @@ pub fn fromMontgomery(out1: *[4]u64, arg1: [4]u64) void {
 }
 
 /// The function toMontgomery translates a field element into the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
 ///   eval (from_montgomery out1) mod m = eval arg1 mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn toMontgomery(out1: *[4]u64, arg1: [4]u64) void {
+pub fn toMontgomery(out1: *MontgomeryDomainFieldElement, arg1: NonMontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     const x1 = (arg1[1]);
@@ -1276,6 +1262,7 @@ pub fn toMontgomery(out1: *[4]u64, arg1: [4]u64) void {
 }
 
 /// The function nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
@@ -1293,6 +1280,7 @@ pub fn nonzero(out1: *u64, arg1: [4]u64) void {
 }
 
 /// The function selectznz is a multi-limb conditional select.
+///
 /// Postconditions:
 ///   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
 ///
@@ -1320,6 +1308,7 @@ pub fn selectznz(out1: *[4]u64, arg1: u1, arg2: [4]u64, arg3: [4]u64) void {
 }
 
 /// The function toBytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
@@ -1427,6 +1416,7 @@ pub fn toBytes(out1: *[32]u8, arg1: [4]u64) void {
 }
 
 /// The function fromBytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
+///
 /// Preconditions:
 ///   0 ≤ bytes_eval arg1 < m
 /// Postconditions:
@@ -1507,14 +1497,12 @@ pub fn fromBytes(out1: *[4]u64, arg1: [32]u8) void {
 }
 
 /// The function setOne returns the field element one in the Montgomery domain.
+///
 /// Postconditions:
 ///   eval (from_montgomery out1) mod m = 1 mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn setOne(out1: *[4]u64) void {
+pub fn setOne(out1: *MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     out1[0] = cast(u64, 0x1);
@@ -1524,11 +1512,11 @@ pub fn setOne(out1: *[4]u64) void {
 }
 
 /// The function msat returns the saturated representation of the prime modulus.
+///
 /// Postconditions:
 ///   twos_complement_eval out1 = m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 pub fn msat(out1: *[5]u64) void {
@@ -1542,6 +1530,7 @@ pub fn msat(out1: *[5]u64) void {
 }
 
 /// The function divstep computes a divstep.
+///
 /// Preconditions:
 ///   0 ≤ eval arg4 < m
 ///   0 ≤ eval arg5 < m
@@ -1795,11 +1784,11 @@ pub fn divstep(out1: *u64, out2: *[5]u64, out3: *[5]u64, out4: *[4]u64, out5: *[
 }
 
 /// The function divstepPrecomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form).
+///
 /// Postconditions:
-///   eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if (log2 m) + 1 < 46 then ⌊(49 * ((log2 m) + 1) + 80) / 17⌋ else ⌊(49 * ((log2 m) + 1) + 57) / 17⌋)
+///   eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if ⌊log2 m⌋ + 1 < 46 then ⌊(49 * (⌊log2 m⌋ + 1) + 80) / 17⌋ else ⌊(49 * (⌊log2 m⌋ + 1) + 57) / 17⌋)
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 pub fn divstepPrecomp(out1: *[4]u64) void {
lib/std/crypto/pcurves/p256/p256_scalar_64.zig
@@ -1,7 +1,7 @@
-// Autogenerated: './src/ExtractionOCaml/word_by_word_montgomery' --lang Zig --internal-static --public-function-case camelCase --private-function-case camelCase --no-prefix-fiat --package-name p256-scalar '' 64 115792089210356248762697446949407573529996955224135760342422259061068512044369
+// Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --lang Zig --internal-static --public-function-case camelCase --private-function-case camelCase --public-type-case UpperCamelCase --private-type-case UpperCamelCase --no-prefix-fiat --package-name p256-scalar '' 64 115792089210356248762697446949407573529996955224135760342422259061068512044369 mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp
 // curve description (via package name): p256-scalar
 // machine_wordsize = 64 (from "64")
-// requested operations: (all)
+// requested operations: mul, square, add, sub, opp, from_montgomery, to_montgomery, nonzero, selectznz, to_bytes, from_bytes, one, msat, divstep, divstep_precomp
 // m = 0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551 (from "115792089210356248762697446949407573529996955224135760342422259061068512044369")
 //
 // NOTE: In addition to the bounds specified above each function, all
@@ -12,18 +12,25 @@
 //   return values.
 //
 // Computed values:
-// eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192)
-// bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248)
-// twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in
-//                          if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256
+//   eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192)
+//   bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248)
+//   twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in
+//                            if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256
 
 const std = @import("std");
 const cast = std.meta.cast;
 const mode = std.builtin.mode; // Checked arithmetic is disabled in non-debug modes to avoid side channels
 
-pub const Limbs = [4]u64;
+// The type MontgomeryDomainFieldElement is a field element in the Montgomery domain.
+// Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub const MontgomeryDomainFieldElement = [4]u64;
+
+// The type NonMontgomeryDomainFieldElement is a field element NOT in the Montgomery domain.
+// Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub const NonMontgomeryDomainFieldElement = [4]u64;
 
 /// The function addcarryxU64 is an addition with carry.
+///
 /// Postconditions:
 ///   out1 = (arg1 + arg2 + arg3) mod 2^64
 ///   out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
@@ -45,6 +52,7 @@ inline fn addcarryxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) vo
 }
 
 /// The function subborrowxU64 is a subtraction with borrow.
+///
 /// Postconditions:
 ///   out1 = (-arg1 + arg2 + -arg3) mod 2^64
 ///   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
@@ -66,6 +74,7 @@ inline fn subborrowxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) v
 }
 
 /// The function mulxU64 is a multiplication, returning the full double-width result.
+///
 /// Postconditions:
 ///   out1 = (arg1 * arg2) mod 2^64
 ///   out2 = ⌊arg1 * arg2 / 2^64⌋
@@ -85,6 +94,7 @@ inline fn mulxU64(out1: *u64, out2: *u64, arg1: u64, arg2: u64) void {
 }
 
 /// The function cmovznzU64 is a single-word conditional move.
+///
 /// Postconditions:
 ///   out1 = (if arg1 = 0 then arg2 else arg3)
 ///
@@ -102,6 +112,7 @@ inline fn cmovznzU64(out1: *u64, arg1: u1, arg2: u64, arg3: u64) void {
 }
 
 /// The function mul multiplies two field elements in the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 ///   0 ≤ eval arg2 < m
@@ -109,12 +120,7 @@ inline fn cmovznzU64(out1: *u64, arg1: u1, arg2: u64, arg3: u64) void {
 ///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn mul(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+pub fn mul(out1: *MontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement, arg2: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     const x1 = (arg1[1]);
@@ -447,17 +453,14 @@ pub fn mul(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
 }
 
 /// The function square squares a field element in the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
 ///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn square(out1: *[4]u64, arg1: [4]u64) void {
+pub fn square(out1: *MontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     const x1 = (arg1[1]);
@@ -790,6 +793,7 @@ pub fn square(out1: *[4]u64, arg1: [4]u64) void {
 }
 
 /// The function add adds two field elements in the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 ///   0 ≤ eval arg2 < m
@@ -797,12 +801,7 @@ pub fn square(out1: *[4]u64, arg1: [4]u64) void {
 ///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn add(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+pub fn add(out1: *MontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement, arg2: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     var x1: u64 = undefined;
@@ -847,6 +846,7 @@ pub fn add(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
 }
 
 /// The function sub subtracts two field elements in the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 ///   0 ≤ eval arg2 < m
@@ -854,12 +854,7 @@ pub fn add(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
 ///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn sub(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+pub fn sub(out1: *MontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement, arg2: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     var x1: u64 = undefined;
@@ -895,17 +890,14 @@ pub fn sub(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
 }
 
 /// The function opp negates a field element in the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
 ///   eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn opp(out1: *[4]u64, arg1: [4]u64) void {
+pub fn opp(out1: *MontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     var x1: u64 = undefined;
@@ -941,17 +933,14 @@ pub fn opp(out1: *[4]u64, arg1: [4]u64) void {
 }
 
 /// The function fromMontgomery translates a field element out of the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
 ///   eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn fromMontgomery(out1: *[4]u64, arg1: [4]u64) void {
+pub fn fromMontgomery(out1: *NonMontgomeryDomainFieldElement, arg1: MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     const x1 = (arg1[0]);
@@ -1157,17 +1146,14 @@ pub fn fromMontgomery(out1: *[4]u64, arg1: [4]u64) void {
 }
 
 /// The function toMontgomery translates a field element into the Montgomery domain.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
 ///   eval (from_montgomery out1) mod m = eval arg1 mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn toMontgomery(out1: *[4]u64, arg1: [4]u64) void {
+pub fn toMontgomery(out1: *MontgomeryDomainFieldElement, arg1: NonMontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     const x1 = (arg1[1]);
@@ -1480,6 +1466,7 @@ pub fn toMontgomery(out1: *[4]u64, arg1: [4]u64) void {
 }
 
 /// The function nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
@@ -1497,6 +1484,7 @@ pub fn nonzero(out1: *u64, arg1: [4]u64) void {
 }
 
 /// The function selectznz is a multi-limb conditional select.
+///
 /// Postconditions:
 ///   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
 ///
@@ -1524,6 +1512,7 @@ pub fn selectznz(out1: *[4]u64, arg1: u1, arg2: [4]u64, arg3: [4]u64) void {
 }
 
 /// The function toBytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
+///
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
@@ -1631,6 +1620,7 @@ pub fn toBytes(out1: *[32]u8, arg1: [4]u64) void {
 }
 
 /// The function fromBytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
+///
 /// Preconditions:
 ///   0 ≤ bytes_eval arg1 < m
 /// Postconditions:
@@ -1711,14 +1701,12 @@ pub fn fromBytes(out1: *[4]u64, arg1: [32]u8) void {
 }
 
 /// The function setOne returns the field element one in the Montgomery domain.
+///
 /// Postconditions:
 ///   eval (from_montgomery out1) mod m = 1 mod m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn setOne(out1: *[4]u64) void {
+pub fn setOne(out1: *MontgomeryDomainFieldElement) void {
     @setRuntimeSafety(mode == .Debug);
 
     out1[0] = 0xc46353d039cdaaf;
@@ -1728,11 +1716,11 @@ pub fn setOne(out1: *[4]u64) void {
 }
 
 /// The function msat returns the saturated representation of the prime modulus.
+///
 /// Postconditions:
 ///   twos_complement_eval out1 = m
 ///   0 ≤ eval out1 < m
 ///
-/// Input Bounds:
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 pub fn msat(out1: *[5]u64) void {
@@ -1745,24 +1733,8 @@ pub fn msat(out1: *[5]u64) void {
     out1[4] = cast(u64, 0x0);
 }
 
-/// The function divstepPrecomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form).
-/// Postconditions:
-///   eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if (log2 m) + 1 < 46 then ⌊(49 * ((log2 m) + 1) + 80) / 17⌋ else ⌊(49 * ((log2 m) + 1) + 57) / 17⌋)
-///   0 ≤ eval out1 < m
-///
-/// Input Bounds:
-/// Output Bounds:
-///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn divstepPrecomp(out1: *[4]u64) void {
-    @setRuntimeSafety(mode == .Debug);
-
-    out1[0] = 0xd739262fb7fcfbb5;
-    out1[1] = 0x8ac6f75d20074414;
-    out1[2] = 0xc67428bfb5e3c256;
-    out1[3] = 0x444962f2eda7aedf;
-}
-
 /// The function divstep computes a divstep.
+///
 /// Preconditions:
 ///   0 ≤ eval arg4 < m
 ///   0 ≤ eval arg5 < m
@@ -2014,3 +1986,20 @@ pub fn divstep(out1: *u64, out2: *[5]u64, out3: *[5]u64, out4: *[4]u64, out5: *[
     out5[2] = x125;
     out5[3] = x126;
 }
+
+/// The function divstepPrecomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form).
+///
+/// Postconditions:
+///   eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if ⌊log2 m⌋ + 1 < 46 then ⌊(49 * (⌊log2 m⌋ + 1) + 80) / 17⌋ else ⌊(49 * (⌊log2 m⌋ + 1) + 57) / 17⌋)
+///   0 ≤ eval out1 < m
+///
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn divstepPrecomp(out1: *[4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    out1[0] = 0xd739262fb7fcfbb5;
+    out1[1] = 0x8ac6f75d20074414;
+    out1[2] = 0xc67428bfb5e3c256;
+    out1[3] = 0x444962f2eda7aedf;
+}
lib/std/crypto/pcurves/common.zig
@@ -20,12 +20,13 @@ pub const FieldParams = struct {
 /// A field element, internally stored in Montgomery domain.
 pub fn Field(comptime params: FieldParams) type {
     const fiat = params.fiat;
-    const Limbs = fiat.Limbs;
+    const MontgomeryDomainFieldElement = fiat.MontgomeryDomainFieldElement;
+    const NonMontgomeryDomainFieldElement = fiat.NonMontgomeryDomainFieldElement;
 
     return struct {
         const Fe = @This();
 
-        limbs: Limbs,
+        limbs: MontgomeryDomainFieldElement,
 
         /// Field size.
         pub const field_order = params.field_order;
@@ -40,7 +41,7 @@ pub fn Field(comptime params: FieldParams) type {
         pub const encoded_length = params.encoded_length;
 
         /// Zero.
-        pub const zero: Fe = Fe{ .limbs = mem.zeroes(Limbs) };
+        pub const zero: Fe = Fe{ .limbs = mem.zeroes(MontgomeryDomainFieldElement) };
 
         /// One.
         pub const one = one: {
@@ -73,16 +74,16 @@ pub fn Field(comptime params: FieldParams) type {
         pub fn fromBytes(s_: [encoded_length]u8, endian: builtin.Endian) NonCanonicalError!Fe {
             var s = if (endian == .Little) s_ else orderSwap(s_);
             try rejectNonCanonical(s, .Little);
-            var limbs_z: Limbs = undefined;
+            var limbs_z: NonMontgomeryDomainFieldElement = undefined;
             fiat.fromBytes(&limbs_z, s);
-            var limbs: Limbs = undefined;
+            var limbs: MontgomeryDomainFieldElement = undefined;
             fiat.toMontgomery(&limbs, limbs_z);
             return Fe{ .limbs = limbs };
         }
 
         /// Pack a field element.
         pub fn toBytes(fe: Fe, endian: builtin.Endian) [encoded_length]u8 {
-            var limbs_z: Limbs = undefined;
+            var limbs_z: NonMontgomeryDomainFieldElement = undefined;
             fiat.fromMontgomery(&limbs_z, fe.limbs);
             var s: [encoded_length]u8 = undefined;
             fiat.toBytes(&s, limbs_z);
@@ -198,6 +199,7 @@ pub fn Field(comptime params: FieldParams) type {
         // Field inversion from https://eprint.iacr.org/2021/549.pdf
         pub fn invert(a: Fe) Fe {
             const iterations = (49 * field_bits + 57) / 17;
+            const Limbs = @TypeOf(a.limbs);
             const Word = @TypeOf(a.limbs[0]);
             const XLimbs = [a.limbs.len + 1]Word;
 
lib/std/crypto/pcurves/p256.zig
@@ -286,6 +286,11 @@ pub const P256 = struct {
         return p.add(q.neg());
     }
 
+    /// Subtract P256 points, the second being specified using affine coordinates.
+    pub fn subMixed(p: P256, q: AffineCoordinates) P256 {
+        return p.addMixed(q.neg());
+    }
+
     /// Return affine coordinates.
     pub fn affineCoordinates(p: P256) AffineCoordinates {
         const zinv = p.z.invert();
@@ -312,7 +317,7 @@ pub const P256 = struct {
         p.z.cMov(a.z, c);
     }
 
-    fn pcSelect(comptime n: usize, pc: [n]P256, b: u8) P256 {
+    fn pcSelect(comptime n: usize, pc: *const [n]P256, b: u8) P256 {
         var t = P256.identityElement;
         comptime var i: u8 = 1;
         inline while (i < pc.len) : (i += 1) {
@@ -341,7 +346,7 @@ pub const P256 = struct {
         return e;
     }
 
-    fn pcMul(pc: [9]P256, s: [32]u8, comptime vartime: bool) IdentityElementError!P256 {
+    fn pcMul(pc: *const [9]P256, s: [32]u8, comptime vartime: bool) IdentityElementError!P256 {
         std.debug.assert(vartime);
         const e = slide(s);
         var q = P256.identityElement;
@@ -360,7 +365,7 @@ pub const P256 = struct {
         return q;
     }
 
-    fn pcMul16(pc: [16]P256, s: [32]u8, comptime vartime: bool) IdentityElementError!P256 {
+    fn pcMul16(pc: *const [16]P256, s: [32]u8, comptime vartime: bool) IdentityElementError!P256 {
         var q = P256.identityElement;
         var pos: usize = 252;
         while (true) : (pos -= 4) {
@@ -395,33 +400,69 @@ pub const P256 = struct {
         break :pc precompute(P256.basePoint, 15);
     };
 
-    const basePointPc8 = pc: {
-        @setEvalBranchQuota(50000);
-        break :pc precompute(P256.basePoint, 8);
-    };
-
     /// Multiply an elliptic curve point by a scalar.
     /// Return error.IdentityElement if the result is the identity element.
     pub fn mul(p: P256, s_: [32]u8, endian: builtin.Endian) IdentityElementError!P256 {
         const s = if (endian == .Little) s_ else Fe.orderSwap(s_);
-        const pc = if (p.is_base) basePointPc else pc: {
-            try p.rejectIdentity();
-            const xpc = precompute(p, 15);
-            break :pc xpc;
-        };
-        return pcMul16(pc, s, false);
+        if (p.is_base) {
+            return pcMul16(&basePointPc, s, false);
+        }
+        try p.rejectIdentity();
+        const pc = precompute(p, 15);
+        return pcMul16(&pc, s, false);
     }
 
     /// Multiply an elliptic curve point by a *PUBLIC* scalar *IN VARIABLE TIME*
     /// This can be used for signature verification.
     pub fn mulPublic(p: P256, s_: [32]u8, endian: builtin.Endian) IdentityElementError!P256 {
         const s = if (endian == .Little) s_ else Fe.orderSwap(s_);
-        const pc = if (p.is_base) basePointPc8 else pc: {
-            try p.rejectIdentity();
-            const xpc = precompute(p, 8);
-            break :pc xpc;
+        if (p.is_base) {
+            return pcMul16(&basePointPc, s, true);
+        }
+        try p.rejectIdentity();
+        const pc = precompute(p, 8);
+        return pcMul(&pc, s, true);
+    }
+
+    /// Double-base multiplication of public parameters - Compute (p1*s1)+(p2*s2) *IN VARIABLE TIME*
+    /// This can be used for signature verification.
+    pub fn mulDoubleBasePublic(p1: P256, s1_: [32]u8, p2: P256, s2_: [32]u8, endian: builtin.Endian) IdentityElementError!P256 {
+        const s1 = if (endian == .Little) s1_ else Fe.orderSwap(s1_);
+        const s2 = if (endian == .Little) s2_ else Fe.orderSwap(s2_);
+        try p1.rejectIdentity();
+        var pc1_array: [9]P256 = undefined;
+        const pc1 = if (p1.is_base) basePointPc[0..9] else pc: {
+            pc1_array = precompute(p1, 8);
+            break :pc &pc1_array;
         };
-        return pcMul(pc, s, true);
+        try p2.rejectIdentity();
+        var pc2_array: [9]P256 = undefined;
+        const pc2 = if (p2.is_base) basePointPc[0..9] else pc: {
+            pc2_array = precompute(p2, 8);
+            break :pc &pc2_array;
+        };
+        const e1 = slide(s1);
+        const e2 = slide(s2);
+        var q = P256.identityElement;
+        var pos: usize = 2 * 32 - 1;
+        while (true) : (pos -= 1) {
+            const slot1 = e1[pos];
+            if (slot1 > 0) {
+                q = q.add(pc1[@intCast(usize, slot1)]);
+            } else if (slot1 < 0) {
+                q = q.sub(pc1[@intCast(usize, -slot1)]);
+            }
+            const slot2 = e2[pos];
+            if (slot2 > 0) {
+                q = q.add(pc2[@intCast(usize, slot2)]);
+            } else if (slot2 < 0) {
+                q = q.sub(pc2[@intCast(usize, -slot2)]);
+            }
+            if (pos == 0) break;
+            q = q.dbl().dbl().dbl().dbl();
+        }
+        try q.rejectIdentity();
+        return q;
     }
 };
 
lib/std/crypto/pcurves/tests.zig
@@ -107,3 +107,13 @@ test "p256 neutral element decoding" {
     const p = try P256.fromAffineCoordinates(.{ .x = P256.Fe.zero, .y = P256.Fe.one });
     try testing.expectError(error.IdentityElement, p.rejectIdentity());
 }
+
+test "p256 double base multiplication" {
+    const p1 = P256.basePoint;
+    const p2 = P256.basePoint.dbl();
+    const s1 = [_]u8{0x01} ** 32;
+    const s2 = [_]u8{0x02} ** 32;
+    const pr1 = try P256.mulDoubleBasePublic(p1, s1, p2, s2, .Little);
+    const pr2 = (try p1.mul(s1, .Little)).add(try p2.mul(s2, .Little));
+    try testing.expect(pr1.equivalent(pr2));
+}