Commit fe8781357a

Frank Denis <124872+jedisct1@users.noreply.github.com>
2021-05-01 08:14:32
std.crypto: add support for the NIST P-256 curve (#8627)
Uses verified code generated by fiat-crypto for field arithmetic, and complete formulas to avoid side channels. There's still plenty of room for optimizations, especially with a fixed base. But this gives us a framework to easily add other similar curves.
1 parent 557eb41
Changed files (6)
lib/std/crypto/pcurves/p256/field.zig
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2015-2021 Zig Contributors
+// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
+// The MIT license requires this copyright notice to be included in all copies
+// and substantial portions of the software.
+
+const std = @import("std");
+const builtin = std.builtin;
+const crypto = std.crypto;
+const debug = std.debug;
+const mem = std.mem;
+const meta = std.meta;
+
+const fiat = @import("p256_64.zig");
+
+const NonCanonicalError = crypto.errors.NonCanonicalError;
+const NotSquareError = crypto.errors.NotSquareError;
+
+const Limbs = fiat.Limbs;
+
+/// A field element, internally stored in Montgomery domain.
+pub const Fe = struct {
+    limbs: Limbs,
+
+    /// Field size.
+    pub const field_order = 115792089210356248762697446949407573530086143415290314195533631308867097853951;
+
+    /// Numer of bits that can be saturated without overflowing.
+    pub const saturated_bits = 255;
+
+    /// Zero.
+    pub const zero: Fe = Fe{ .limbs = mem.zeroes(Limbs) };
+
+    /// One.
+    pub const one = comptime one: {
+        var fe: Fe = undefined;
+        fiat.p256SetOne(&fe.limbs);
+        break :one fe;
+    };
+
+    /// Reject non-canonical encodings of an element.
+    pub fn rejectNonCanonical(s_: [32]u8, endian: builtin.Endian) NonCanonicalError!void {
+        var s = if (endian == .Little) s_ else orderSwap(s_);
+        const field_order_s = comptime fos: {
+            var fos: [32]u8 = undefined;
+            mem.writeIntLittle(u256, &fos, field_order);
+            break :fos fos;
+        };
+        if (crypto.utils.timingSafeCompare(u8, &s, &field_order_s, .Little) != .lt) {
+            return error.NonCanonical;
+        }
+    }
+
+    /// Swap the endianness of an encoded element.
+    pub fn orderSwap(s: [32]u8) [32]u8 {
+        var t = s;
+        for (s) |x, i| t[t.len - 1 - i] = x;
+        return t;
+    }
+
+    /// Unpack a field element.
+    pub fn fromBytes(s_: [32]u8, endian: builtin.Endian) NonCanonicalError!Fe {
+        var s = if (endian == .Little) s_ else orderSwap(s_);
+        try rejectNonCanonical(s, .Little);
+        var limbs_z: Limbs = undefined;
+        fiat.p256FromBytes(&limbs_z, s);
+        var limbs: Limbs = undefined;
+        fiat.p256ToMontgomery(&limbs, limbs_z);
+        return Fe{ .limbs = limbs };
+    }
+
+    /// Pack a field element.
+    pub fn toBytes(fe: Fe, endian: builtin.Endian) [32]u8 {
+        var limbs_z: Limbs = undefined;
+        fiat.p256FromMontgomery(&limbs_z, fe.limbs);
+        var s: [32]u8 = undefined;
+        fiat.p256ToBytes(&s, limbs_z);
+        return if (endian == .Little) s else orderSwap(s);
+    }
+
+    /// Create a field element from an integer.
+    pub fn fromInt(comptime x: u256) NonCanonicalError!Fe {
+        var s: [32]u8 = undefined;
+        mem.writeIntLittle(u256, &s, x);
+        return fromBytes(s, .Little);
+    }
+
+    /// Return the field element as an integer.
+    pub fn toInt(fe: Fe) u256 {
+        const s = fe.toBytes(.Little);
+        return mem.readIntLittle(u256, &s);
+    }
+
+    /// Return true if the field element is zero.
+    pub fn isZero(fe: Fe) bool {
+        var z: @TypeOf(fe.limbs[0]) = undefined;
+        fiat.p256Nonzero(&z, fe.limbs);
+        return z == 0;
+    }
+
+    /// Return true if both field elements are equivalent.
+    pub fn equivalent(a: Fe, b: Fe) bool {
+        return a.sub(b).isZero();
+    }
+
+    /// Return true if the element is odd.
+    pub fn isOdd(fe: Fe) bool {
+        const s = fe.toBytes(.Little);
+        return @truncate(u1, s[0]) != 0;
+    }
+
+    /// Conditonally replace a field element with `a` if `c` is positive.
+    pub fn cMov(fe: *Fe, a: Fe, c: u1) void {
+        fiat.p256Selectznz(&fe.limbs, c, fe.limbs, a.limbs);
+    }
+
+    /// Add field elements.
+    pub fn add(a: Fe, b: Fe) Fe {
+        var fe: Fe = undefined;
+        fiat.p256Add(&fe.limbs, a.limbs, b.limbs);
+        return fe;
+    }
+
+    /// Subtract field elements.
+    pub fn sub(a: Fe, b: Fe) Fe {
+        var fe: Fe = undefined;
+        fiat.p256Sub(&fe.limbs, a.limbs, b.limbs);
+        return fe;
+    }
+
+    /// Double a field element.
+    pub fn dbl(a: Fe) Fe {
+        var fe: Fe = undefined;
+        fiat.p256Add(&fe.limbs, a.limbs, a.limbs);
+        return fe;
+    }
+
+    /// Multiply field elements.
+    pub fn mul(a: Fe, b: Fe) Fe {
+        var fe: Fe = undefined;
+        fiat.p256Mul(&fe.limbs, a.limbs, b.limbs);
+        return fe;
+    }
+
+    /// Square a field element.
+    pub fn sq(a: Fe) Fe {
+        var fe: Fe = undefined;
+        fiat.p256Square(&fe.limbs, a.limbs);
+        return fe;
+    }
+
+    /// Square a field element n times.
+    fn sqn(a: Fe, comptime n: comptime_int) Fe {
+        var i: usize = 0;
+        var fe = a;
+        while (i < n) : (i += 1) {
+            fe = fe.sq();
+        }
+        return fe;
+    }
+
+    /// Compute a^n.
+    pub fn pow(a: Fe, comptime T: type, comptime n: T) Fe {
+        var fe = one;
+        var x: T = n;
+        var t = a;
+        while (true) {
+            if (@truncate(u1, x) != 0) fe = fe.mul(t);
+            x >>= 1;
+            if (x == 0) break;
+            t = t.sq();
+        }
+        return fe;
+    }
+
+    /// Negate a field element.
+    pub fn neg(a: Fe) Fe {
+        var fe: Fe = undefined;
+        fiat.p256Opp(&fe.limbs, a.limbs);
+        return fe;
+    }
+
+    /// Return the inverse of a field element, or 0 if a=0.
+    // Field inversion from https://eprint.iacr.org/2021/549.pdf
+    pub fn invert(a: Fe) Fe {
+        const len_prime = 256;
+        const iterations = (49 * len_prime + 57) / 17;
+        const Word = @TypeOf(a.limbs[0]);
+        const XLimbs = [a.limbs.len + 1]Word;
+
+        var d: Word = 1;
+        var f: XLimbs = undefined;
+        fiat.p256Msat(&f);
+
+        var g: XLimbs = undefined;
+        fiat.p256FromMontgomery(g[0..a.limbs.len], a.limbs);
+        g[g.len - 1] = 0;
+
+        var r: Limbs = undefined;
+        fiat.p256SetOne(&r);
+        var v = mem.zeroes(Limbs);
+
+        var precomp: Limbs = undefined;
+        fiat.p256DivstepPrecomp(&precomp);
+
+        var out1: Word = undefined;
+        var out2: XLimbs = undefined;
+        var out3: XLimbs = undefined;
+        var out4: Limbs = undefined;
+        var out5: Limbs = undefined;
+
+        var i: usize = 0;
+        while (i < iterations - iterations % 2) : (i += 2) {
+            fiat.p256Divstep(&out1, &out2, &out3, &out4, &out5, d, f, g, v, r);
+            fiat.p256Divstep(&d, &f, &g, &v, &r, out1, out2, out3, out4, out5);
+        }
+        if (iterations % 2 != 0) {
+            fiat.p256Divstep(&out1, &out2, &out3, &out4, &out5, d, f, g, v, r);
+            mem.copy(Word, &v, &out4);
+            mem.copy(Word, &f, &out2);
+        }
+        var v_opp: Limbs = undefined;
+        fiat.p256Opp(&v_opp, v);
+        fiat.p256Selectznz(&v, @truncate(u1, f[f.len - 1] >> (meta.bitCount(Word) - 1)), v, v_opp);
+        var fe: Fe = undefined;
+        fiat.p256Mul(&fe.limbs, v, precomp);
+        return fe;
+    }
+
+    /// Return true if the field element is a square.
+    pub fn isSquare(x2: Fe) bool {
+        const t110 = x2.mul(x2.sq()).sq();
+        const t111 = x2.mul(t110);
+        const t111111 = t111.mul(x2.mul(t110).sqn(3));
+        const x15 = t111111.sqn(6).mul(t111111).sqn(3).mul(t111);
+        const x16 = x15.sq().mul(x2);
+        const x53 = x16.sqn(16).mul(x16).sqn(15);
+        const x47 = x15.mul(x53);
+        const ls = x47.mul(((x53.sqn(17).mul(x2)).sqn(143).mul(x47)).sqn(47)).sq().mul(x2); // Legendre symbol, (p-1)/2
+        return ls.equivalent(Fe.one);
+    }
+
+    // x=x2^((field_order+1)/4) w/ field order=3 (mod 4).
+    fn uncheckedSqrt(x2: Fe) Fe {
+        comptime debug.assert(field_order % 4 == 3);
+        const t11 = x2.mul(x2.sq());
+        const t1111 = t11.mul(t11.sqn(2));
+        const t11111111 = t1111.mul(t1111.sqn(4));
+        const x16 = t11111111.sqn(8).mul(t11111111);
+        return x16.sqn(16).mul(x16).sqn(32).mul(x2).sqn(96).mul(x2).sqn(94);
+    }
+
+    /// Compute the square root of `x2`, returning `error.NotSquare` if `x2` was not a square.
+    pub fn sqrt(x2: Fe) NotSquareError!Fe {
+        const x = x2.uncheckedSqrt();
+        if (x.sq().equivalent(x2)) {
+            return x;
+        }
+        return error.NotSquare;
+    }
+};
lib/std/crypto/pcurves/p256/p256_64.zig
@@ -0,0 +1,1774 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2015-2021 Zig Contributors
+// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
+// The MIT license requires this copyright notice to be included in all copies
+// and substantial portions of the software.
+
+// Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --lang Zig --internal-static --public-function-case camelCase --private-function-case camelCase p256 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp
+// curve description: p256
+// machine_wordsize = 64 (from "64")
+// requested operations: mul, square, add, sub, opp, from_montgomery, to_montgomery, nonzero, selectznz, to_bytes, from_bytes, one, msat, divstep, divstep_precomp
+// m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1")
+//
+// NOTE: In addition to the bounds specified above each function, all
+//   functions synthesized for this Montgomery arithmetic require the
+//   input to be strictly less than the prime modulus (m), and also
+//   require the input to be in the unique saturated representation.
+//   All functions also ensure that these two properties are true of
+//   return values.
+//
+// Computed values:
+// eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192)
+// bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248)
+
+pub const Limbs = [4]u64;
+
+/// The function p256AddcarryxU64 is an addition with carry.
+/// Postconditions:
+///   out1 = (arg1 + arg2 + arg3) mod 2^64
+///   out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0x1]
+///   arg2: [0x0 ~> 0xffffffffffffffff]
+///   arg3: [0x0 ~> 0xffffffffffffffff]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+///   out2: [0x0 ~> 0x1]
+fn p256AddcarryxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+    var t: u64 = undefined;
+    const carry1 = @addWithOverflow(u64, arg2, arg3, &t);
+    const carry2 = @addWithOverflow(u64, t, arg1, out1);
+    out2.* = @boolToInt(carry1) | @boolToInt(carry2);
+}
+
+/// The function p256SubborrowxU64 is a subtraction with borrow.
+/// Postconditions:
+///   out1 = (-arg1 + arg2 + -arg3) mod 2^64
+///   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0x1]
+///   arg2: [0x0 ~> 0xffffffffffffffff]
+///   arg3: [0x0 ~> 0xffffffffffffffff]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+///   out2: [0x0 ~> 0x1]
+fn p256SubborrowxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+    var t: u64 = undefined;
+    const carry1 = @subWithOverflow(u64, arg2, arg3, &t);
+    const carry2 = @subWithOverflow(u64, t, arg1, out1);
+    out2.* = @boolToInt(carry1) | @boolToInt(carry2);
+}
+
+/// The function p256MulxU64 is a multiplication, returning the full double-width result.
+/// Postconditions:
+///   out1 = (arg1 * arg2) mod 2^64
+///   out2 = ⌊arg1 * arg2 / 2^64⌋
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0xffffffffffffffff]
+///   arg2: [0x0 ~> 0xffffffffffffffff]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+///   out2: [0x0 ~> 0xffffffffffffffff]
+fn p256MulxU64(out1: *u64, out2: *u64, arg1: u64, arg2: u64) callconv(.Inline) void {
+    const x = @as(u128, arg1) * @as(u128, arg2);
+    out1.* = @truncate(u64, x);
+    out2.* = @truncate(u64, x >> 64);
+}
+
+/// The function p256CmovznzU64 is a single-word conditional move.
+/// Postconditions:
+///   out1 = (if arg1 = 0 then arg2 else arg3)
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0x1]
+///   arg2: [0x0 ~> 0xffffffffffffffff]
+///   arg3: [0x0 ~> 0xffffffffffffffff]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+fn p256CmovznzU64(out1: *u64, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+    const mask = 0 -% @as(u64, arg1);
+    out1.* = (mask & arg3) | ((~mask) & arg2);
+}
+
+/// The function p256Mul multiplies two field elements in the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+///   0 ≤ eval arg2 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256Mul(out1: *Limbs, arg1: Limbs, arg2: Limbs) void {
+    const x1: u64 = (arg1[1]);
+    const x2: u64 = (arg1[2]);
+    const x3: u64 = (arg1[3]);
+    const x4: u64 = (arg1[0]);
+    var x5: u64 = undefined;
+    var x6: u64 = undefined;
+    p256MulxU64(&x5, &x6, x4, (arg2[3]));
+    var x7: u64 = undefined;
+    var x8: u64 = undefined;
+    p256MulxU64(&x7, &x8, x4, (arg2[2]));
+    var x9: u64 = undefined;
+    var x10: u64 = undefined;
+    p256MulxU64(&x9, &x10, x4, (arg2[1]));
+    var x11: u64 = undefined;
+    var x12: u64 = undefined;
+    p256MulxU64(&x11, &x12, x4, (arg2[0]));
+    var x13: u64 = undefined;
+    var x14: u1 = undefined;
+    p256AddcarryxU64(&x13, &x14, 0x0, x12, x9);
+    var x15: u64 = undefined;
+    var x16: u1 = undefined;
+    p256AddcarryxU64(&x15, &x16, x14, x10, x7);
+    var x17: u64 = undefined;
+    var x18: u1 = undefined;
+    p256AddcarryxU64(&x17, &x18, x16, x8, x5);
+    const x19: u64 = (@intCast(u64, x18) + x6);
+    var x20: u64 = undefined;
+    var x21: u64 = undefined;
+    p256MulxU64(&x20, &x21, x11, 0xffffffff00000001);
+    var x22: u64 = undefined;
+    var x23: u64 = undefined;
+    p256MulxU64(&x22, &x23, x11, 0xffffffff);
+    var x24: u64 = undefined;
+    var x25: u64 = undefined;
+    p256MulxU64(&x24, &x25, x11, 0xffffffffffffffff);
+    var x26: u64 = undefined;
+    var x27: u1 = undefined;
+    p256AddcarryxU64(&x26, &x27, 0x0, x25, x22);
+    const x28: u64 = (@intCast(u64, x27) + x23);
+    var x29: u64 = undefined;
+    var x30: u1 = undefined;
+    p256AddcarryxU64(&x29, &x30, 0x0, x11, x24);
+    var x31: u64 = undefined;
+    var x32: u1 = undefined;
+    p256AddcarryxU64(&x31, &x32, x30, x13, x26);
+    var x33: u64 = undefined;
+    var x34: u1 = undefined;
+    p256AddcarryxU64(&x33, &x34, x32, x15, x28);
+    var x35: u64 = undefined;
+    var x36: u1 = undefined;
+    p256AddcarryxU64(&x35, &x36, x34, x17, x20);
+    var x37: u64 = undefined;
+    var x38: u1 = undefined;
+    p256AddcarryxU64(&x37, &x38, x36, x19, x21);
+    var x39: u64 = undefined;
+    var x40: u64 = undefined;
+    p256MulxU64(&x39, &x40, x1, (arg2[3]));
+    var x41: u64 = undefined;
+    var x42: u64 = undefined;
+    p256MulxU64(&x41, &x42, x1, (arg2[2]));
+    var x43: u64 = undefined;
+    var x44: u64 = undefined;
+    p256MulxU64(&x43, &x44, x1, (arg2[1]));
+    var x45: u64 = undefined;
+    var x46: u64 = undefined;
+    p256MulxU64(&x45, &x46, x1, (arg2[0]));
+    var x47: u64 = undefined;
+    var x48: u1 = undefined;
+    p256AddcarryxU64(&x47, &x48, 0x0, x46, x43);
+    var x49: u64 = undefined;
+    var x50: u1 = undefined;
+    p256AddcarryxU64(&x49, &x50, x48, x44, x41);
+    var x51: u64 = undefined;
+    var x52: u1 = undefined;
+    p256AddcarryxU64(&x51, &x52, x50, x42, x39);
+    const x53: u64 = (@intCast(u64, x52) + x40);
+    var x54: u64 = undefined;
+    var x55: u1 = undefined;
+    p256AddcarryxU64(&x54, &x55, 0x0, x31, x45);
+    var x56: u64 = undefined;
+    var x57: u1 = undefined;
+    p256AddcarryxU64(&x56, &x57, x55, x33, x47);
+    var x58: u64 = undefined;
+    var x59: u1 = undefined;
+    p256AddcarryxU64(&x58, &x59, x57, x35, x49);
+    var x60: u64 = undefined;
+    var x61: u1 = undefined;
+    p256AddcarryxU64(&x60, &x61, x59, x37, x51);
+    var x62: u64 = undefined;
+    var x63: u1 = undefined;
+    p256AddcarryxU64(&x62, &x63, x61, @intCast(u64, x38), x53);
+    var x64: u64 = undefined;
+    var x65: u64 = undefined;
+    p256MulxU64(&x64, &x65, x54, 0xffffffff00000001);
+    var x66: u64 = undefined;
+    var x67: u64 = undefined;
+    p256MulxU64(&x66, &x67, x54, 0xffffffff);
+    var x68: u64 = undefined;
+    var x69: u64 = undefined;
+    p256MulxU64(&x68, &x69, x54, 0xffffffffffffffff);
+    var x70: u64 = undefined;
+    var x71: u1 = undefined;
+    p256AddcarryxU64(&x70, &x71, 0x0, x69, x66);
+    const x72: u64 = (@intCast(u64, x71) + x67);
+    var x73: u64 = undefined;
+    var x74: u1 = undefined;
+    p256AddcarryxU64(&x73, &x74, 0x0, x54, x68);
+    var x75: u64 = undefined;
+    var x76: u1 = undefined;
+    p256AddcarryxU64(&x75, &x76, x74, x56, x70);
+    var x77: u64 = undefined;
+    var x78: u1 = undefined;
+    p256AddcarryxU64(&x77, &x78, x76, x58, x72);
+    var x79: u64 = undefined;
+    var x80: u1 = undefined;
+    p256AddcarryxU64(&x79, &x80, x78, x60, x64);
+    var x81: u64 = undefined;
+    var x82: u1 = undefined;
+    p256AddcarryxU64(&x81, &x82, x80, x62, x65);
+    const x83: u64 = (@intCast(u64, x82) + @intCast(u64, x63));
+    var x84: u64 = undefined;
+    var x85: u64 = undefined;
+    p256MulxU64(&x84, &x85, x2, (arg2[3]));
+    var x86: u64 = undefined;
+    var x87: u64 = undefined;
+    p256MulxU64(&x86, &x87, x2, (arg2[2]));
+    var x88: u64 = undefined;
+    var x89: u64 = undefined;
+    p256MulxU64(&x88, &x89, x2, (arg2[1]));
+    var x90: u64 = undefined;
+    var x91: u64 = undefined;
+    p256MulxU64(&x90, &x91, x2, (arg2[0]));
+    var x92: u64 = undefined;
+    var x93: u1 = undefined;
+    p256AddcarryxU64(&x92, &x93, 0x0, x91, x88);
+    var x94: u64 = undefined;
+    var x95: u1 = undefined;
+    p256AddcarryxU64(&x94, &x95, x93, x89, x86);
+    var x96: u64 = undefined;
+    var x97: u1 = undefined;
+    p256AddcarryxU64(&x96, &x97, x95, x87, x84);
+    const x98: u64 = (@intCast(u64, x97) + x85);
+    var x99: u64 = undefined;
+    var x100: u1 = undefined;
+    p256AddcarryxU64(&x99, &x100, 0x0, x75, x90);
+    var x101: u64 = undefined;
+    var x102: u1 = undefined;
+    p256AddcarryxU64(&x101, &x102, x100, x77, x92);
+    var x103: u64 = undefined;
+    var x104: u1 = undefined;
+    p256AddcarryxU64(&x103, &x104, x102, x79, x94);
+    var x105: u64 = undefined;
+    var x106: u1 = undefined;
+    p256AddcarryxU64(&x105, &x106, x104, x81, x96);
+    var x107: u64 = undefined;
+    var x108: u1 = undefined;
+    p256AddcarryxU64(&x107, &x108, x106, x83, x98);
+    var x109: u64 = undefined;
+    var x110: u64 = undefined;
+    p256MulxU64(&x109, &x110, x99, 0xffffffff00000001);
+    var x111: u64 = undefined;
+    var x112: u64 = undefined;
+    p256MulxU64(&x111, &x112, x99, 0xffffffff);
+    var x113: u64 = undefined;
+    var x114: u64 = undefined;
+    p256MulxU64(&x113, &x114, x99, 0xffffffffffffffff);
+    var x115: u64 = undefined;
+    var x116: u1 = undefined;
+    p256AddcarryxU64(&x115, &x116, 0x0, x114, x111);
+    const x117: u64 = (@intCast(u64, x116) + x112);
+    var x118: u64 = undefined;
+    var x119: u1 = undefined;
+    p256AddcarryxU64(&x118, &x119, 0x0, x99, x113);
+    var x120: u64 = undefined;
+    var x121: u1 = undefined;
+    p256AddcarryxU64(&x120, &x121, x119, x101, x115);
+    var x122: u64 = undefined;
+    var x123: u1 = undefined;
+    p256AddcarryxU64(&x122, &x123, x121, x103, x117);
+    var x124: u64 = undefined;
+    var x125: u1 = undefined;
+    p256AddcarryxU64(&x124, &x125, x123, x105, x109);
+    var x126: u64 = undefined;
+    var x127: u1 = undefined;
+    p256AddcarryxU64(&x126, &x127, x125, x107, x110);
+    const x128: u64 = (@intCast(u64, x127) + @intCast(u64, x108));
+    var x129: u64 = undefined;
+    var x130: u64 = undefined;
+    p256MulxU64(&x129, &x130, x3, (arg2[3]));
+    var x131: u64 = undefined;
+    var x132: u64 = undefined;
+    p256MulxU64(&x131, &x132, x3, (arg2[2]));
+    var x133: u64 = undefined;
+    var x134: u64 = undefined;
+    p256MulxU64(&x133, &x134, x3, (arg2[1]));
+    var x135: u64 = undefined;
+    var x136: u64 = undefined;
+    p256MulxU64(&x135, &x136, x3, (arg2[0]));
+    var x137: u64 = undefined;
+    var x138: u1 = undefined;
+    p256AddcarryxU64(&x137, &x138, 0x0, x136, x133);
+    var x139: u64 = undefined;
+    var x140: u1 = undefined;
+    p256AddcarryxU64(&x139, &x140, x138, x134, x131);
+    var x141: u64 = undefined;
+    var x142: u1 = undefined;
+    p256AddcarryxU64(&x141, &x142, x140, x132, x129);
+    const x143: u64 = (@intCast(u64, x142) + x130);
+    var x144: u64 = undefined;
+    var x145: u1 = undefined;
+    p256AddcarryxU64(&x144, &x145, 0x0, x120, x135);
+    var x146: u64 = undefined;
+    var x147: u1 = undefined;
+    p256AddcarryxU64(&x146, &x147, x145, x122, x137);
+    var x148: u64 = undefined;
+    var x149: u1 = undefined;
+    p256AddcarryxU64(&x148, &x149, x147, x124, x139);
+    var x150: u64 = undefined;
+    var x151: u1 = undefined;
+    p256AddcarryxU64(&x150, &x151, x149, x126, x141);
+    var x152: u64 = undefined;
+    var x153: u1 = undefined;
+    p256AddcarryxU64(&x152, &x153, x151, x128, x143);
+    var x154: u64 = undefined;
+    var x155: u64 = undefined;
+    p256MulxU64(&x154, &x155, x144, 0xffffffff00000001);
+    var x156: u64 = undefined;
+    var x157: u64 = undefined;
+    p256MulxU64(&x156, &x157, x144, 0xffffffff);
+    var x158: u64 = undefined;
+    var x159: u64 = undefined;
+    p256MulxU64(&x158, &x159, x144, 0xffffffffffffffff);
+    var x160: u64 = undefined;
+    var x161: u1 = undefined;
+    p256AddcarryxU64(&x160, &x161, 0x0, x159, x156);
+    const x162: u64 = (@intCast(u64, x161) + x157);
+    var x163: u64 = undefined;
+    var x164: u1 = undefined;
+    p256AddcarryxU64(&x163, &x164, 0x0, x144, x158);
+    var x165: u64 = undefined;
+    var x166: u1 = undefined;
+    p256AddcarryxU64(&x165, &x166, x164, x146, x160);
+    var x167: u64 = undefined;
+    var x168: u1 = undefined;
+    p256AddcarryxU64(&x167, &x168, x166, x148, x162);
+    var x169: u64 = undefined;
+    var x170: u1 = undefined;
+    p256AddcarryxU64(&x169, &x170, x168, x150, x154);
+    var x171: u64 = undefined;
+    var x172: u1 = undefined;
+    p256AddcarryxU64(&x171, &x172, x170, x152, x155);
+    const x173: u64 = (@intCast(u64, x172) + @intCast(u64, x153));
+    var x174: u64 = undefined;
+    var x175: u1 = undefined;
+    p256SubborrowxU64(&x174, &x175, 0x0, x165, 0xffffffffffffffff);
+    var x176: u64 = undefined;
+    var x177: u1 = undefined;
+    p256SubborrowxU64(&x176, &x177, x175, x167, 0xffffffff);
+    var x178: u64 = undefined;
+    var x179: u1 = undefined;
+    p256SubborrowxU64(&x178, &x179, x177, x169, @intCast(u64, 0x0));
+    var x180: u64 = undefined;
+    var x181: u1 = undefined;
+    p256SubborrowxU64(&x180, &x181, x179, x171, 0xffffffff00000001);
+    var x182: u64 = undefined;
+    var x183: u1 = undefined;
+    p256SubborrowxU64(&x182, &x183, x181, x173, @intCast(u64, 0x0));
+    var x184: u64 = undefined;
+    p256CmovznzU64(&x184, x183, x174, x165);
+    var x185: u64 = undefined;
+    p256CmovznzU64(&x185, x183, x176, x167);
+    var x186: u64 = undefined;
+    p256CmovznzU64(&x186, x183, x178, x169);
+    var x187: u64 = undefined;
+    p256CmovznzU64(&x187, x183, x180, x171);
+    out1[0] = x184;
+    out1[1] = x185;
+    out1[2] = x186;
+    out1[3] = x187;
+}
+
+/// The function p256Square squares a field element in the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256Square(out1: *Limbs, arg1: Limbs) void {
+    const x1: u64 = (arg1[1]);
+    const x2: u64 = (arg1[2]);
+    const x3: u64 = (arg1[3]);
+    const x4: u64 = (arg1[0]);
+    var x5: u64 = undefined;
+    var x6: u64 = undefined;
+    p256MulxU64(&x5, &x6, x4, (arg1[3]));
+    var x7: u64 = undefined;
+    var x8: u64 = undefined;
+    p256MulxU64(&x7, &x8, x4, (arg1[2]));
+    var x9: u64 = undefined;
+    var x10: u64 = undefined;
+    p256MulxU64(&x9, &x10, x4, (arg1[1]));
+    var x11: u64 = undefined;
+    var x12: u64 = undefined;
+    p256MulxU64(&x11, &x12, x4, (arg1[0]));
+    var x13: u64 = undefined;
+    var x14: u1 = undefined;
+    p256AddcarryxU64(&x13, &x14, 0x0, x12, x9);
+    var x15: u64 = undefined;
+    var x16: u1 = undefined;
+    p256AddcarryxU64(&x15, &x16, x14, x10, x7);
+    var x17: u64 = undefined;
+    var x18: u1 = undefined;
+    p256AddcarryxU64(&x17, &x18, x16, x8, x5);
+    const x19: u64 = (@intCast(u64, x18) + x6);
+    var x20: u64 = undefined;
+    var x21: u64 = undefined;
+    p256MulxU64(&x20, &x21, x11, 0xffffffff00000001);
+    var x22: u64 = undefined;
+    var x23: u64 = undefined;
+    p256MulxU64(&x22, &x23, x11, 0xffffffff);
+    var x24: u64 = undefined;
+    var x25: u64 = undefined;
+    p256MulxU64(&x24, &x25, x11, 0xffffffffffffffff);
+    var x26: u64 = undefined;
+    var x27: u1 = undefined;
+    p256AddcarryxU64(&x26, &x27, 0x0, x25, x22);
+    const x28: u64 = (@intCast(u64, x27) + x23);
+    var x29: u64 = undefined;
+    var x30: u1 = undefined;
+    p256AddcarryxU64(&x29, &x30, 0x0, x11, x24);
+    var x31: u64 = undefined;
+    var x32: u1 = undefined;
+    p256AddcarryxU64(&x31, &x32, x30, x13, x26);
+    var x33: u64 = undefined;
+    var x34: u1 = undefined;
+    p256AddcarryxU64(&x33, &x34, x32, x15, x28);
+    var x35: u64 = undefined;
+    var x36: u1 = undefined;
+    p256AddcarryxU64(&x35, &x36, x34, x17, x20);
+    var x37: u64 = undefined;
+    var x38: u1 = undefined;
+    p256AddcarryxU64(&x37, &x38, x36, x19, x21);
+    var x39: u64 = undefined;
+    var x40: u64 = undefined;
+    p256MulxU64(&x39, &x40, x1, (arg1[3]));
+    var x41: u64 = undefined;
+    var x42: u64 = undefined;
+    p256MulxU64(&x41, &x42, x1, (arg1[2]));
+    var x43: u64 = undefined;
+    var x44: u64 = undefined;
+    p256MulxU64(&x43, &x44, x1, (arg1[1]));
+    var x45: u64 = undefined;
+    var x46: u64 = undefined;
+    p256MulxU64(&x45, &x46, x1, (arg1[0]));
+    var x47: u64 = undefined;
+    var x48: u1 = undefined;
+    p256AddcarryxU64(&x47, &x48, 0x0, x46, x43);
+    var x49: u64 = undefined;
+    var x50: u1 = undefined;
+    p256AddcarryxU64(&x49, &x50, x48, x44, x41);
+    var x51: u64 = undefined;
+    var x52: u1 = undefined;
+    p256AddcarryxU64(&x51, &x52, x50, x42, x39);
+    const x53: u64 = (@intCast(u64, x52) + x40);
+    var x54: u64 = undefined;
+    var x55: u1 = undefined;
+    p256AddcarryxU64(&x54, &x55, 0x0, x31, x45);
+    var x56: u64 = undefined;
+    var x57: u1 = undefined;
+    p256AddcarryxU64(&x56, &x57, x55, x33, x47);
+    var x58: u64 = undefined;
+    var x59: u1 = undefined;
+    p256AddcarryxU64(&x58, &x59, x57, x35, x49);
+    var x60: u64 = undefined;
+    var x61: u1 = undefined;
+    p256AddcarryxU64(&x60, &x61, x59, x37, x51);
+    var x62: u64 = undefined;
+    var x63: u1 = undefined;
+    p256AddcarryxU64(&x62, &x63, x61, @intCast(u64, x38), x53);
+    var x64: u64 = undefined;
+    var x65: u64 = undefined;
+    p256MulxU64(&x64, &x65, x54, 0xffffffff00000001);
+    var x66: u64 = undefined;
+    var x67: u64 = undefined;
+    p256MulxU64(&x66, &x67, x54, 0xffffffff);
+    var x68: u64 = undefined;
+    var x69: u64 = undefined;
+    p256MulxU64(&x68, &x69, x54, 0xffffffffffffffff);
+    var x70: u64 = undefined;
+    var x71: u1 = undefined;
+    p256AddcarryxU64(&x70, &x71, 0x0, x69, x66);
+    const x72: u64 = (@intCast(u64, x71) + x67);
+    var x73: u64 = undefined;
+    var x74: u1 = undefined;
+    p256AddcarryxU64(&x73, &x74, 0x0, x54, x68);
+    var x75: u64 = undefined;
+    var x76: u1 = undefined;
+    p256AddcarryxU64(&x75, &x76, x74, x56, x70);
+    var x77: u64 = undefined;
+    var x78: u1 = undefined;
+    p256AddcarryxU64(&x77, &x78, x76, x58, x72);
+    var x79: u64 = undefined;
+    var x80: u1 = undefined;
+    p256AddcarryxU64(&x79, &x80, x78, x60, x64);
+    var x81: u64 = undefined;
+    var x82: u1 = undefined;
+    p256AddcarryxU64(&x81, &x82, x80, x62, x65);
+    const x83: u64 = (@intCast(u64, x82) + @intCast(u64, x63));
+    var x84: u64 = undefined;
+    var x85: u64 = undefined;
+    p256MulxU64(&x84, &x85, x2, (arg1[3]));
+    var x86: u64 = undefined;
+    var x87: u64 = undefined;
+    p256MulxU64(&x86, &x87, x2, (arg1[2]));
+    var x88: u64 = undefined;
+    var x89: u64 = undefined;
+    p256MulxU64(&x88, &x89, x2, (arg1[1]));
+    var x90: u64 = undefined;
+    var x91: u64 = undefined;
+    p256MulxU64(&x90, &x91, x2, (arg1[0]));
+    var x92: u64 = undefined;
+    var x93: u1 = undefined;
+    p256AddcarryxU64(&x92, &x93, 0x0, x91, x88);
+    var x94: u64 = undefined;
+    var x95: u1 = undefined;
+    p256AddcarryxU64(&x94, &x95, x93, x89, x86);
+    var x96: u64 = undefined;
+    var x97: u1 = undefined;
+    p256AddcarryxU64(&x96, &x97, x95, x87, x84);
+    const x98: u64 = (@intCast(u64, x97) + x85);
+    var x99: u64 = undefined;
+    var x100: u1 = undefined;
+    p256AddcarryxU64(&x99, &x100, 0x0, x75, x90);
+    var x101: u64 = undefined;
+    var x102: u1 = undefined;
+    p256AddcarryxU64(&x101, &x102, x100, x77, x92);
+    var x103: u64 = undefined;
+    var x104: u1 = undefined;
+    p256AddcarryxU64(&x103, &x104, x102, x79, x94);
+    var x105: u64 = undefined;
+    var x106: u1 = undefined;
+    p256AddcarryxU64(&x105, &x106, x104, x81, x96);
+    var x107: u64 = undefined;
+    var x108: u1 = undefined;
+    p256AddcarryxU64(&x107, &x108, x106, x83, x98);
+    var x109: u64 = undefined;
+    var x110: u64 = undefined;
+    p256MulxU64(&x109, &x110, x99, 0xffffffff00000001);
+    var x111: u64 = undefined;
+    var x112: u64 = undefined;
+    p256MulxU64(&x111, &x112, x99, 0xffffffff);
+    var x113: u64 = undefined;
+    var x114: u64 = undefined;
+    p256MulxU64(&x113, &x114, x99, 0xffffffffffffffff);
+    var x115: u64 = undefined;
+    var x116: u1 = undefined;
+    p256AddcarryxU64(&x115, &x116, 0x0, x114, x111);
+    const x117: u64 = (@intCast(u64, x116) + x112);
+    var x118: u64 = undefined;
+    var x119: u1 = undefined;
+    p256AddcarryxU64(&x118, &x119, 0x0, x99, x113);
+    var x120: u64 = undefined;
+    var x121: u1 = undefined;
+    p256AddcarryxU64(&x120, &x121, x119, x101, x115);
+    var x122: u64 = undefined;
+    var x123: u1 = undefined;
+    p256AddcarryxU64(&x122, &x123, x121, x103, x117);
+    var x124: u64 = undefined;
+    var x125: u1 = undefined;
+    p256AddcarryxU64(&x124, &x125, x123, x105, x109);
+    var x126: u64 = undefined;
+    var x127: u1 = undefined;
+    p256AddcarryxU64(&x126, &x127, x125, x107, x110);
+    const x128: u64 = (@intCast(u64, x127) + @intCast(u64, x108));
+    var x129: u64 = undefined;
+    var x130: u64 = undefined;
+    p256MulxU64(&x129, &x130, x3, (arg1[3]));
+    var x131: u64 = undefined;
+    var x132: u64 = undefined;
+    p256MulxU64(&x131, &x132, x3, (arg1[2]));
+    var x133: u64 = undefined;
+    var x134: u64 = undefined;
+    p256MulxU64(&x133, &x134, x3, (arg1[1]));
+    var x135: u64 = undefined;
+    var x136: u64 = undefined;
+    p256MulxU64(&x135, &x136, x3, (arg1[0]));
+    var x137: u64 = undefined;
+    var x138: u1 = undefined;
+    p256AddcarryxU64(&x137, &x138, 0x0, x136, x133);
+    var x139: u64 = undefined;
+    var x140: u1 = undefined;
+    p256AddcarryxU64(&x139, &x140, x138, x134, x131);
+    var x141: u64 = undefined;
+    var x142: u1 = undefined;
+    p256AddcarryxU64(&x141, &x142, x140, x132, x129);
+    const x143: u64 = (@intCast(u64, x142) + x130);
+    var x144: u64 = undefined;
+    var x145: u1 = undefined;
+    p256AddcarryxU64(&x144, &x145, 0x0, x120, x135);
+    var x146: u64 = undefined;
+    var x147: u1 = undefined;
+    p256AddcarryxU64(&x146, &x147, x145, x122, x137);
+    var x148: u64 = undefined;
+    var x149: u1 = undefined;
+    p256AddcarryxU64(&x148, &x149, x147, x124, x139);
+    var x150: u64 = undefined;
+    var x151: u1 = undefined;
+    p256AddcarryxU64(&x150, &x151, x149, x126, x141);
+    var x152: u64 = undefined;
+    var x153: u1 = undefined;
+    p256AddcarryxU64(&x152, &x153, x151, x128, x143);
+    var x154: u64 = undefined;
+    var x155: u64 = undefined;
+    p256MulxU64(&x154, &x155, x144, 0xffffffff00000001);
+    var x156: u64 = undefined;
+    var x157: u64 = undefined;
+    p256MulxU64(&x156, &x157, x144, 0xffffffff);
+    var x158: u64 = undefined;
+    var x159: u64 = undefined;
+    p256MulxU64(&x158, &x159, x144, 0xffffffffffffffff);
+    var x160: u64 = undefined;
+    var x161: u1 = undefined;
+    p256AddcarryxU64(&x160, &x161, 0x0, x159, x156);
+    const x162: u64 = (@intCast(u64, x161) + x157);
+    var x163: u64 = undefined;
+    var x164: u1 = undefined;
+    p256AddcarryxU64(&x163, &x164, 0x0, x144, x158);
+    var x165: u64 = undefined;
+    var x166: u1 = undefined;
+    p256AddcarryxU64(&x165, &x166, x164, x146, x160);
+    var x167: u64 = undefined;
+    var x168: u1 = undefined;
+    p256AddcarryxU64(&x167, &x168, x166, x148, x162);
+    var x169: u64 = undefined;
+    var x170: u1 = undefined;
+    p256AddcarryxU64(&x169, &x170, x168, x150, x154);
+    var x171: u64 = undefined;
+    var x172: u1 = undefined;
+    p256AddcarryxU64(&x171, &x172, x170, x152, x155);
+    const x173: u64 = (@intCast(u64, x172) + @intCast(u64, x153));
+    var x174: u64 = undefined;
+    var x175: u1 = undefined;
+    p256SubborrowxU64(&x174, &x175, 0x0, x165, 0xffffffffffffffff);
+    var x176: u64 = undefined;
+    var x177: u1 = undefined;
+    p256SubborrowxU64(&x176, &x177, x175, x167, 0xffffffff);
+    var x178: u64 = undefined;
+    var x179: u1 = undefined;
+    p256SubborrowxU64(&x178, &x179, x177, x169, @intCast(u64, 0x0));
+    var x180: u64 = undefined;
+    var x181: u1 = undefined;
+    p256SubborrowxU64(&x180, &x181, x179, x171, 0xffffffff00000001);
+    var x182: u64 = undefined;
+    var x183: u1 = undefined;
+    p256SubborrowxU64(&x182, &x183, x181, x173, @intCast(u64, 0x0));
+    var x184: u64 = undefined;
+    p256CmovznzU64(&x184, x183, x174, x165);
+    var x185: u64 = undefined;
+    p256CmovznzU64(&x185, x183, x176, x167);
+    var x186: u64 = undefined;
+    p256CmovznzU64(&x186, x183, x178, x169);
+    var x187: u64 = undefined;
+    p256CmovznzU64(&x187, x183, x180, x171);
+    out1[0] = x184;
+    out1[1] = x185;
+    out1[2] = x186;
+    out1[3] = x187;
+}
+
+/// The function p256Add adds two field elements in the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+///   0 ≤ eval arg2 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256Add(out1: *Limbs, arg1: Limbs, arg2: Limbs) void {
+    var x1: u64 = undefined;
+    var x2: u1 = undefined;
+    p256AddcarryxU64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+    var x3: u64 = undefined;
+    var x4: u1 = undefined;
+    p256AddcarryxU64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+    var x5: u64 = undefined;
+    var x6: u1 = undefined;
+    p256AddcarryxU64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+    var x7: u64 = undefined;
+    var x8: u1 = undefined;
+    p256AddcarryxU64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+    var x9: u64 = undefined;
+    var x10: u1 = undefined;
+    p256SubborrowxU64(&x9, &x10, 0x0, x1, 0xffffffffffffffff);
+    var x11: u64 = undefined;
+    var x12: u1 = undefined;
+    p256SubborrowxU64(&x11, &x12, x10, x3, 0xffffffff);
+    var x13: u64 = undefined;
+    var x14: u1 = undefined;
+    p256SubborrowxU64(&x13, &x14, x12, x5, @intCast(u64, 0x0));
+    var x15: u64 = undefined;
+    var x16: u1 = undefined;
+    p256SubborrowxU64(&x15, &x16, x14, x7, 0xffffffff00000001);
+    var x17: u64 = undefined;
+    var x18: u1 = undefined;
+    p256SubborrowxU64(&x17, &x18, x16, @intCast(u64, x8), @intCast(u64, 0x0));
+    var x19: u64 = undefined;
+    p256CmovznzU64(&x19, x18, x9, x1);
+    var x20: u64 = undefined;
+    p256CmovznzU64(&x20, x18, x11, x3);
+    var x21: u64 = undefined;
+    p256CmovznzU64(&x21, x18, x13, x5);
+    var x22: u64 = undefined;
+    p256CmovznzU64(&x22, x18, x15, x7);
+    out1[0] = x19;
+    out1[1] = x20;
+    out1[2] = x21;
+    out1[3] = x22;
+}
+
+/// The function p256Sub subtracts two field elements in the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+///   0 ≤ eval arg2 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256Sub(out1: *Limbs, arg1: Limbs, arg2: Limbs) void {
+    var x1: u64 = undefined;
+    var x2: u1 = undefined;
+    p256SubborrowxU64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+    var x3: u64 = undefined;
+    var x4: u1 = undefined;
+    p256SubborrowxU64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+    var x5: u64 = undefined;
+    var x6: u1 = undefined;
+    p256SubborrowxU64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+    var x7: u64 = undefined;
+    var x8: u1 = undefined;
+    p256SubborrowxU64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+    var x9: u64 = undefined;
+    p256CmovznzU64(&x9, x8, @intCast(u64, 0x0), 0xffffffffffffffff);
+    var x10: u64 = undefined;
+    var x11: u1 = undefined;
+    p256AddcarryxU64(&x10, &x11, 0x0, x1, x9);
+    var x12: u64 = undefined;
+    var x13: u1 = undefined;
+    p256AddcarryxU64(&x12, &x13, x11, x3, (x9 & 0xffffffff));
+    var x14: u64 = undefined;
+    var x15: u1 = undefined;
+    p256AddcarryxU64(&x14, &x15, x13, x5, @intCast(u64, 0x0));
+    var x16: u64 = undefined;
+    var x17: u1 = undefined;
+    p256AddcarryxU64(&x16, &x17, x15, x7, (x9 & 0xffffffff00000001));
+    out1[0] = x10;
+    out1[1] = x12;
+    out1[2] = x14;
+    out1[3] = x16;
+}
+
+/// The function p256Opp negates a field element in the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256Opp(out1: *Limbs, arg1: Limbs) void {
+    var x1: u64 = undefined;
+    var x2: u1 = undefined;
+    p256SubborrowxU64(&x1, &x2, 0x0, @intCast(u64, 0x0), (arg1[0]));
+    var x3: u64 = undefined;
+    var x4: u1 = undefined;
+    p256SubborrowxU64(&x3, &x4, x2, @intCast(u64, 0x0), (arg1[1]));
+    var x5: u64 = undefined;
+    var x6: u1 = undefined;
+    p256SubborrowxU64(&x5, &x6, x4, @intCast(u64, 0x0), (arg1[2]));
+    var x7: u64 = undefined;
+    var x8: u1 = undefined;
+    p256SubborrowxU64(&x7, &x8, x6, @intCast(u64, 0x0), (arg1[3]));
+    var x9: u64 = undefined;
+    p256CmovznzU64(&x9, x8, @intCast(u64, 0x0), 0xffffffffffffffff);
+    var x10: u64 = undefined;
+    var x11: u1 = undefined;
+    p256AddcarryxU64(&x10, &x11, 0x0, x1, x9);
+    var x12: u64 = undefined;
+    var x13: u1 = undefined;
+    p256AddcarryxU64(&x12, &x13, x11, x3, (x9 & 0xffffffff));
+    var x14: u64 = undefined;
+    var x15: u1 = undefined;
+    p256AddcarryxU64(&x14, &x15, x13, x5, @intCast(u64, 0x0));
+    var x16: u64 = undefined;
+    var x17: u1 = undefined;
+    p256AddcarryxU64(&x16, &x17, x15, x7, (x9 & 0xffffffff00000001));
+    out1[0] = x10;
+    out1[1] = x12;
+    out1[2] = x14;
+    out1[3] = x16;
+}
+
+/// The function p256FromMontgomery translates a field element out of the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256FromMontgomery(out1: *Limbs, arg1: Limbs) void {
+    const x1: u64 = (arg1[0]);
+    var x2: u64 = undefined;
+    var x3: u64 = undefined;
+    p256MulxU64(&x2, &x3, x1, 0xffffffff00000001);
+    var x4: u64 = undefined;
+    var x5: u64 = undefined;
+    p256MulxU64(&x4, &x5, x1, 0xffffffff);
+    var x6: u64 = undefined;
+    var x7: u64 = undefined;
+    p256MulxU64(&x6, &x7, x1, 0xffffffffffffffff);
+    var x8: u64 = undefined;
+    var x9: u1 = undefined;
+    p256AddcarryxU64(&x8, &x9, 0x0, x7, x4);
+    var x10: u64 = undefined;
+    var x11: u1 = undefined;
+    p256AddcarryxU64(&x10, &x11, 0x0, x1, x6);
+    var x12: u64 = undefined;
+    var x13: u1 = undefined;
+    p256AddcarryxU64(&x12, &x13, x11, @intCast(u64, 0x0), x8);
+    var x14: u64 = undefined;
+    var x15: u1 = undefined;
+    p256AddcarryxU64(&x14, &x15, 0x0, x12, (arg1[1]));
+    var x16: u64 = undefined;
+    var x17: u64 = undefined;
+    p256MulxU64(&x16, &x17, x14, 0xffffffff00000001);
+    var x18: u64 = undefined;
+    var x19: u64 = undefined;
+    p256MulxU64(&x18, &x19, x14, 0xffffffff);
+    var x20: u64 = undefined;
+    var x21: u64 = undefined;
+    p256MulxU64(&x20, &x21, x14, 0xffffffffffffffff);
+    var x22: u64 = undefined;
+    var x23: u1 = undefined;
+    p256AddcarryxU64(&x22, &x23, 0x0, x21, x18);
+    var x24: u64 = undefined;
+    var x25: u1 = undefined;
+    p256AddcarryxU64(&x24, &x25, 0x0, x14, x20);
+    var x26: u64 = undefined;
+    var x27: u1 = undefined;
+    p256AddcarryxU64(&x26, &x27, x25, (@intCast(u64, x15) + (@intCast(u64, x13) + (@intCast(u64, x9) + x5))), x22);
+    var x28: u64 = undefined;
+    var x29: u1 = undefined;
+    p256AddcarryxU64(&x28, &x29, x27, x2, (@intCast(u64, x23) + x19));
+    var x30: u64 = undefined;
+    var x31: u1 = undefined;
+    p256AddcarryxU64(&x30, &x31, x29, x3, x16);
+    var x32: u64 = undefined;
+    var x33: u1 = undefined;
+    p256AddcarryxU64(&x32, &x33, 0x0, x26, (arg1[2]));
+    var x34: u64 = undefined;
+    var x35: u1 = undefined;
+    p256AddcarryxU64(&x34, &x35, x33, x28, @intCast(u64, 0x0));
+    var x36: u64 = undefined;
+    var x37: u1 = undefined;
+    p256AddcarryxU64(&x36, &x37, x35, x30, @intCast(u64, 0x0));
+    var x38: u64 = undefined;
+    var x39: u64 = undefined;
+    p256MulxU64(&x38, &x39, x32, 0xffffffff00000001);
+    var x40: u64 = undefined;
+    var x41: u64 = undefined;
+    p256MulxU64(&x40, &x41, x32, 0xffffffff);
+    var x42: u64 = undefined;
+    var x43: u64 = undefined;
+    p256MulxU64(&x42, &x43, x32, 0xffffffffffffffff);
+    var x44: u64 = undefined;
+    var x45: u1 = undefined;
+    p256AddcarryxU64(&x44, &x45, 0x0, x43, x40);
+    var x46: u64 = undefined;
+    var x47: u1 = undefined;
+    p256AddcarryxU64(&x46, &x47, 0x0, x32, x42);
+    var x48: u64 = undefined;
+    var x49: u1 = undefined;
+    p256AddcarryxU64(&x48, &x49, x47, x34, x44);
+    var x50: u64 = undefined;
+    var x51: u1 = undefined;
+    p256AddcarryxU64(&x50, &x51, x49, x36, (@intCast(u64, x45) + x41));
+    var x52: u64 = undefined;
+    var x53: u1 = undefined;
+    p256AddcarryxU64(&x52, &x53, x51, (@intCast(u64, x37) + (@intCast(u64, x31) + x17)), x38);
+    var x54: u64 = undefined;
+    var x55: u1 = undefined;
+    p256AddcarryxU64(&x54, &x55, 0x0, x48, (arg1[3]));
+    var x56: u64 = undefined;
+    var x57: u1 = undefined;
+    p256AddcarryxU64(&x56, &x57, x55, x50, @intCast(u64, 0x0));
+    var x58: u64 = undefined;
+    var x59: u1 = undefined;
+    p256AddcarryxU64(&x58, &x59, x57, x52, @intCast(u64, 0x0));
+    var x60: u64 = undefined;
+    var x61: u64 = undefined;
+    p256MulxU64(&x60, &x61, x54, 0xffffffff00000001);
+    var x62: u64 = undefined;
+    var x63: u64 = undefined;
+    p256MulxU64(&x62, &x63, x54, 0xffffffff);
+    var x64: u64 = undefined;
+    var x65: u64 = undefined;
+    p256MulxU64(&x64, &x65, x54, 0xffffffffffffffff);
+    var x66: u64 = undefined;
+    var x67: u1 = undefined;
+    p256AddcarryxU64(&x66, &x67, 0x0, x65, x62);
+    var x68: u64 = undefined;
+    var x69: u1 = undefined;
+    p256AddcarryxU64(&x68, &x69, 0x0, x54, x64);
+    var x70: u64 = undefined;
+    var x71: u1 = undefined;
+    p256AddcarryxU64(&x70, &x71, x69, x56, x66);
+    var x72: u64 = undefined;
+    var x73: u1 = undefined;
+    p256AddcarryxU64(&x72, &x73, x71, x58, (@intCast(u64, x67) + x63));
+    var x74: u64 = undefined;
+    var x75: u1 = undefined;
+    p256AddcarryxU64(&x74, &x75, x73, (@intCast(u64, x59) + (@intCast(u64, x53) + x39)), x60);
+    const x76: u64 = (@intCast(u64, x75) + x61);
+    var x77: u64 = undefined;
+    var x78: u1 = undefined;
+    p256SubborrowxU64(&x77, &x78, 0x0, x70, 0xffffffffffffffff);
+    var x79: u64 = undefined;
+    var x80: u1 = undefined;
+    p256SubborrowxU64(&x79, &x80, x78, x72, 0xffffffff);
+    var x81: u64 = undefined;
+    var x82: u1 = undefined;
+    p256SubborrowxU64(&x81, &x82, x80, x74, @intCast(u64, 0x0));
+    var x83: u64 = undefined;
+    var x84: u1 = undefined;
+    p256SubborrowxU64(&x83, &x84, x82, x76, 0xffffffff00000001);
+    var x85: u64 = undefined;
+    var x86: u1 = undefined;
+    p256SubborrowxU64(&x85, &x86, x84, @intCast(u64, 0x0), @intCast(u64, 0x0));
+    var x87: u64 = undefined;
+    p256CmovznzU64(&x87, x86, x77, x70);
+    var x88: u64 = undefined;
+    p256CmovznzU64(&x88, x86, x79, x72);
+    var x89: u64 = undefined;
+    p256CmovznzU64(&x89, x86, x81, x74);
+    var x90: u64 = undefined;
+    p256CmovznzU64(&x90, x86, x83, x76);
+    out1[0] = x87;
+    out1[1] = x88;
+    out1[2] = x89;
+    out1[3] = x90;
+}
+
+/// The function p256ToMontgomery translates a field element into the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = eval arg1 mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256ToMontgomery(out1: *Limbs, arg1: Limbs) void {
+    const x1: u64 = (arg1[1]);
+    const x2: u64 = (arg1[2]);
+    const x3: u64 = (arg1[3]);
+    const x4: u64 = (arg1[0]);
+    var x5: u64 = undefined;
+    var x6: u64 = undefined;
+    p256MulxU64(&x5, &x6, x4, 0x4fffffffd);
+    var x7: u64 = undefined;
+    var x8: u64 = undefined;
+    p256MulxU64(&x7, &x8, x4, 0xfffffffffffffffe);
+    var x9: u64 = undefined;
+    var x10: u64 = undefined;
+    p256MulxU64(&x9, &x10, x4, 0xfffffffbffffffff);
+    var x11: u64 = undefined;
+    var x12: u64 = undefined;
+    p256MulxU64(&x11, &x12, x4, 0x3);
+    var x13: u64 = undefined;
+    var x14: u1 = undefined;
+    p256AddcarryxU64(&x13, &x14, 0x0, x12, x9);
+    var x15: u64 = undefined;
+    var x16: u1 = undefined;
+    p256AddcarryxU64(&x15, &x16, x14, x10, x7);
+    var x17: u64 = undefined;
+    var x18: u1 = undefined;
+    p256AddcarryxU64(&x17, &x18, x16, x8, x5);
+    var x19: u64 = undefined;
+    var x20: u64 = undefined;
+    p256MulxU64(&x19, &x20, x11, 0xffffffff00000001);
+    var x21: u64 = undefined;
+    var x22: u64 = undefined;
+    p256MulxU64(&x21, &x22, x11, 0xffffffff);
+    var x23: u64 = undefined;
+    var x24: u64 = undefined;
+    p256MulxU64(&x23, &x24, x11, 0xffffffffffffffff);
+    var x25: u64 = undefined;
+    var x26: u1 = undefined;
+    p256AddcarryxU64(&x25, &x26, 0x0, x24, x21);
+    var x27: u64 = undefined;
+    var x28: u1 = undefined;
+    p256AddcarryxU64(&x27, &x28, 0x0, x11, x23);
+    var x29: u64 = undefined;
+    var x30: u1 = undefined;
+    p256AddcarryxU64(&x29, &x30, x28, x13, x25);
+    var x31: u64 = undefined;
+    var x32: u1 = undefined;
+    p256AddcarryxU64(&x31, &x32, x30, x15, (@intCast(u64, x26) + x22));
+    var x33: u64 = undefined;
+    var x34: u1 = undefined;
+    p256AddcarryxU64(&x33, &x34, x32, x17, x19);
+    var x35: u64 = undefined;
+    var x36: u1 = undefined;
+    p256AddcarryxU64(&x35, &x36, x34, (@intCast(u64, x18) + x6), x20);
+    var x37: u64 = undefined;
+    var x38: u64 = undefined;
+    p256MulxU64(&x37, &x38, x1, 0x4fffffffd);
+    var x39: u64 = undefined;
+    var x40: u64 = undefined;
+    p256MulxU64(&x39, &x40, x1, 0xfffffffffffffffe);
+    var x41: u64 = undefined;
+    var x42: u64 = undefined;
+    p256MulxU64(&x41, &x42, x1, 0xfffffffbffffffff);
+    var x43: u64 = undefined;
+    var x44: u64 = undefined;
+    p256MulxU64(&x43, &x44, x1, 0x3);
+    var x45: u64 = undefined;
+    var x46: u1 = undefined;
+    p256AddcarryxU64(&x45, &x46, 0x0, x44, x41);
+    var x47: u64 = undefined;
+    var x48: u1 = undefined;
+    p256AddcarryxU64(&x47, &x48, x46, x42, x39);
+    var x49: u64 = undefined;
+    var x50: u1 = undefined;
+    p256AddcarryxU64(&x49, &x50, x48, x40, x37);
+    var x51: u64 = undefined;
+    var x52: u1 = undefined;
+    p256AddcarryxU64(&x51, &x52, 0x0, x29, x43);
+    var x53: u64 = undefined;
+    var x54: u1 = undefined;
+    p256AddcarryxU64(&x53, &x54, x52, x31, x45);
+    var x55: u64 = undefined;
+    var x56: u1 = undefined;
+    p256AddcarryxU64(&x55, &x56, x54, x33, x47);
+    var x57: u64 = undefined;
+    var x58: u1 = undefined;
+    p256AddcarryxU64(&x57, &x58, x56, x35, x49);
+    var x59: u64 = undefined;
+    var x60: u64 = undefined;
+    p256MulxU64(&x59, &x60, x51, 0xffffffff00000001);
+    var x61: u64 = undefined;
+    var x62: u64 = undefined;
+    p256MulxU64(&x61, &x62, x51, 0xffffffff);
+    var x63: u64 = undefined;
+    var x64: u64 = undefined;
+    p256MulxU64(&x63, &x64, x51, 0xffffffffffffffff);
+    var x65: u64 = undefined;
+    var x66: u1 = undefined;
+    p256AddcarryxU64(&x65, &x66, 0x0, x64, x61);
+    var x67: u64 = undefined;
+    var x68: u1 = undefined;
+    p256AddcarryxU64(&x67, &x68, 0x0, x51, x63);
+    var x69: u64 = undefined;
+    var x70: u1 = undefined;
+    p256AddcarryxU64(&x69, &x70, x68, x53, x65);
+    var x71: u64 = undefined;
+    var x72: u1 = undefined;
+    p256AddcarryxU64(&x71, &x72, x70, x55, (@intCast(u64, x66) + x62));
+    var x73: u64 = undefined;
+    var x74: u1 = undefined;
+    p256AddcarryxU64(&x73, &x74, x72, x57, x59);
+    var x75: u64 = undefined;
+    var x76: u1 = undefined;
+    p256AddcarryxU64(&x75, &x76, x74, ((@intCast(u64, x58) + @intCast(u64, x36)) + (@intCast(u64, x50) + x38)), x60);
+    var x77: u64 = undefined;
+    var x78: u64 = undefined;
+    p256MulxU64(&x77, &x78, x2, 0x4fffffffd);
+    var x79: u64 = undefined;
+    var x80: u64 = undefined;
+    p256MulxU64(&x79, &x80, x2, 0xfffffffffffffffe);
+    var x81: u64 = undefined;
+    var x82: u64 = undefined;
+    p256MulxU64(&x81, &x82, x2, 0xfffffffbffffffff);
+    var x83: u64 = undefined;
+    var x84: u64 = undefined;
+    p256MulxU64(&x83, &x84, x2, 0x3);
+    var x85: u64 = undefined;
+    var x86: u1 = undefined;
+    p256AddcarryxU64(&x85, &x86, 0x0, x84, x81);
+    var x87: u64 = undefined;
+    var x88: u1 = undefined;
+    p256AddcarryxU64(&x87, &x88, x86, x82, x79);
+    var x89: u64 = undefined;
+    var x90: u1 = undefined;
+    p256AddcarryxU64(&x89, &x90, x88, x80, x77);
+    var x91: u64 = undefined;
+    var x92: u1 = undefined;
+    p256AddcarryxU64(&x91, &x92, 0x0, x69, x83);
+    var x93: u64 = undefined;
+    var x94: u1 = undefined;
+    p256AddcarryxU64(&x93, &x94, x92, x71, x85);
+    var x95: u64 = undefined;
+    var x96: u1 = undefined;
+    p256AddcarryxU64(&x95, &x96, x94, x73, x87);
+    var x97: u64 = undefined;
+    var x98: u1 = undefined;
+    p256AddcarryxU64(&x97, &x98, x96, x75, x89);
+    var x99: u64 = undefined;
+    var x100: u64 = undefined;
+    p256MulxU64(&x99, &x100, x91, 0xffffffff00000001);
+    var x101: u64 = undefined;
+    var x102: u64 = undefined;
+    p256MulxU64(&x101, &x102, x91, 0xffffffff);
+    var x103: u64 = undefined;
+    var x104: u64 = undefined;
+    p256MulxU64(&x103, &x104, x91, 0xffffffffffffffff);
+    var x105: u64 = undefined;
+    var x106: u1 = undefined;
+    p256AddcarryxU64(&x105, &x106, 0x0, x104, x101);
+    var x107: u64 = undefined;
+    var x108: u1 = undefined;
+    p256AddcarryxU64(&x107, &x108, 0x0, x91, x103);
+    var x109: u64 = undefined;
+    var x110: u1 = undefined;
+    p256AddcarryxU64(&x109, &x110, x108, x93, x105);
+    var x111: u64 = undefined;
+    var x112: u1 = undefined;
+    p256AddcarryxU64(&x111, &x112, x110, x95, (@intCast(u64, x106) + x102));
+    var x113: u64 = undefined;
+    var x114: u1 = undefined;
+    p256AddcarryxU64(&x113, &x114, x112, x97, x99);
+    var x115: u64 = undefined;
+    var x116: u1 = undefined;
+    p256AddcarryxU64(&x115, &x116, x114, ((@intCast(u64, x98) + @intCast(u64, x76)) + (@intCast(u64, x90) + x78)), x100);
+    var x117: u64 = undefined;
+    var x118: u64 = undefined;
+    p256MulxU64(&x117, &x118, x3, 0x4fffffffd);
+    var x119: u64 = undefined;
+    var x120: u64 = undefined;
+    p256MulxU64(&x119, &x120, x3, 0xfffffffffffffffe);
+    var x121: u64 = undefined;
+    var x122: u64 = undefined;
+    p256MulxU64(&x121, &x122, x3, 0xfffffffbffffffff);
+    var x123: u64 = undefined;
+    var x124: u64 = undefined;
+    p256MulxU64(&x123, &x124, x3, 0x3);
+    var x125: u64 = undefined;
+    var x126: u1 = undefined;
+    p256AddcarryxU64(&x125, &x126, 0x0, x124, x121);
+    var x127: u64 = undefined;
+    var x128: u1 = undefined;
+    p256AddcarryxU64(&x127, &x128, x126, x122, x119);
+    var x129: u64 = undefined;
+    var x130: u1 = undefined;
+    p256AddcarryxU64(&x129, &x130, x128, x120, x117);
+    var x131: u64 = undefined;
+    var x132: u1 = undefined;
+    p256AddcarryxU64(&x131, &x132, 0x0, x109, x123);
+    var x133: u64 = undefined;
+    var x134: u1 = undefined;
+    p256AddcarryxU64(&x133, &x134, x132, x111, x125);
+    var x135: u64 = undefined;
+    var x136: u1 = undefined;
+    p256AddcarryxU64(&x135, &x136, x134, x113, x127);
+    var x137: u64 = undefined;
+    var x138: u1 = undefined;
+    p256AddcarryxU64(&x137, &x138, x136, x115, x129);
+    var x139: u64 = undefined;
+    var x140: u64 = undefined;
+    p256MulxU64(&x139, &x140, x131, 0xffffffff00000001);
+    var x141: u64 = undefined;
+    var x142: u64 = undefined;
+    p256MulxU64(&x141, &x142, x131, 0xffffffff);
+    var x143: u64 = undefined;
+    var x144: u64 = undefined;
+    p256MulxU64(&x143, &x144, x131, 0xffffffffffffffff);
+    var x145: u64 = undefined;
+    var x146: u1 = undefined;
+    p256AddcarryxU64(&x145, &x146, 0x0, x144, x141);
+    var x147: u64 = undefined;
+    var x148: u1 = undefined;
+    p256AddcarryxU64(&x147, &x148, 0x0, x131, x143);
+    var x149: u64 = undefined;
+    var x150: u1 = undefined;
+    p256AddcarryxU64(&x149, &x150, x148, x133, x145);
+    var x151: u64 = undefined;
+    var x152: u1 = undefined;
+    p256AddcarryxU64(&x151, &x152, x150, x135, (@intCast(u64, x146) + x142));
+    var x153: u64 = undefined;
+    var x154: u1 = undefined;
+    p256AddcarryxU64(&x153, &x154, x152, x137, x139);
+    var x155: u64 = undefined;
+    var x156: u1 = undefined;
+    p256AddcarryxU64(&x155, &x156, x154, ((@intCast(u64, x138) + @intCast(u64, x116)) + (@intCast(u64, x130) + x118)), x140);
+    var x157: u64 = undefined;
+    var x158: u1 = undefined;
+    p256SubborrowxU64(&x157, &x158, 0x0, x149, 0xffffffffffffffff);
+    var x159: u64 = undefined;
+    var x160: u1 = undefined;
+    p256SubborrowxU64(&x159, &x160, x158, x151, 0xffffffff);
+    var x161: u64 = undefined;
+    var x162: u1 = undefined;
+    p256SubborrowxU64(&x161, &x162, x160, x153, @intCast(u64, 0x0));
+    var x163: u64 = undefined;
+    var x164: u1 = undefined;
+    p256SubborrowxU64(&x163, &x164, x162, x155, 0xffffffff00000001);
+    var x165: u64 = undefined;
+    var x166: u1 = undefined;
+    p256SubborrowxU64(&x165, &x166, x164, @intCast(u64, x156), @intCast(u64, 0x0));
+    var x167: u64 = undefined;
+    p256CmovznzU64(&x167, x166, x157, x149);
+    var x168: u64 = undefined;
+    p256CmovznzU64(&x168, x166, x159, x151);
+    var x169: u64 = undefined;
+    p256CmovznzU64(&x169, x166, x161, x153);
+    var x170: u64 = undefined;
+    p256CmovznzU64(&x170, x166, x163, x155);
+    out1[0] = x167;
+    out1[1] = x168;
+    out1[2] = x169;
+    out1[3] = x170;
+}
+
+/// The function p256Nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+pub fn p256Nonzero(out1: *u64, arg1: Limbs) void {
+    const x1: u64 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | (arg1[3]))));
+    out1.* = x1;
+}
+
+/// The function p256Selectznz is a multi-limb conditional select.
+/// Postconditions:
+///   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0x1]
+///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256Selectznz(out1: *Limbs, arg1: u1, arg2: Limbs, arg3: Limbs) void {
+    var x1: u64 = undefined;
+    p256CmovznzU64(&x1, arg1, (arg2[0]), (arg3[0]));
+    var x2: u64 = undefined;
+    p256CmovznzU64(&x2, arg1, (arg2[1]), (arg3[1]));
+    var x3: u64 = undefined;
+    p256CmovznzU64(&x3, arg1, (arg2[2]), (arg3[2]));
+    var x4: u64 = undefined;
+    p256CmovznzU64(&x4, arg1, (arg2[3]), (arg3[3]));
+    out1[0] = x1;
+    out1[1] = x2;
+    out1[2] = x3;
+    out1[3] = x4;
+}
+
+/// The function p256ToBytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
+pub fn p256ToBytes(out1: *[32]u8, arg1: Limbs) void {
+    const x1: u64 = (arg1[3]);
+    const x2: u64 = (arg1[2]);
+    const x3: u64 = (arg1[1]);
+    const x4: u64 = (arg1[0]);
+    const x5: u8 = @intCast(u8, (x4 & @intCast(u64, 0xff)));
+    const x6: u64 = (x4 >> 8);
+    const x7: u8 = @intCast(u8, (x6 & @intCast(u64, 0xff)));
+    const x8: u64 = (x6 >> 8);
+    const x9: u8 = @intCast(u8, (x8 & @intCast(u64, 0xff)));
+    const x10: u64 = (x8 >> 8);
+    const x11: u8 = @intCast(u8, (x10 & @intCast(u64, 0xff)));
+    const x12: u64 = (x10 >> 8);
+    const x13: u8 = @intCast(u8, (x12 & @intCast(u64, 0xff)));
+    const x14: u64 = (x12 >> 8);
+    const x15: u8 = @intCast(u8, (x14 & @intCast(u64, 0xff)));
+    const x16: u64 = (x14 >> 8);
+    const x17: u8 = @intCast(u8, (x16 & @intCast(u64, 0xff)));
+    const x18: u8 = @intCast(u8, (x16 >> 8));
+    const x19: u8 = @intCast(u8, (x3 & @intCast(u64, 0xff)));
+    const x20: u64 = (x3 >> 8);
+    const x21: u8 = @intCast(u8, (x20 & @intCast(u64, 0xff)));
+    const x22: u64 = (x20 >> 8);
+    const x23: u8 = @intCast(u8, (x22 & @intCast(u64, 0xff)));
+    const x24: u64 = (x22 >> 8);
+    const x25: u8 = @intCast(u8, (x24 & @intCast(u64, 0xff)));
+    const x26: u64 = (x24 >> 8);
+    const x27: u8 = @intCast(u8, (x26 & @intCast(u64, 0xff)));
+    const x28: u64 = (x26 >> 8);
+    const x29: u8 = @intCast(u8, (x28 & @intCast(u64, 0xff)));
+    const x30: u64 = (x28 >> 8);
+    const x31: u8 = @intCast(u8, (x30 & @intCast(u64, 0xff)));
+    const x32: u8 = @intCast(u8, (x30 >> 8));
+    const x33: u8 = @intCast(u8, (x2 & @intCast(u64, 0xff)));
+    const x34: u64 = (x2 >> 8);
+    const x35: u8 = @intCast(u8, (x34 & @intCast(u64, 0xff)));
+    const x36: u64 = (x34 >> 8);
+    const x37: u8 = @intCast(u8, (x36 & @intCast(u64, 0xff)));
+    const x38: u64 = (x36 >> 8);
+    const x39: u8 = @intCast(u8, (x38 & @intCast(u64, 0xff)));
+    const x40: u64 = (x38 >> 8);
+    const x41: u8 = @intCast(u8, (x40 & @intCast(u64, 0xff)));
+    const x42: u64 = (x40 >> 8);
+    const x43: u8 = @intCast(u8, (x42 & @intCast(u64, 0xff)));
+    const x44: u64 = (x42 >> 8);
+    const x45: u8 = @intCast(u8, (x44 & @intCast(u64, 0xff)));
+    const x46: u8 = @intCast(u8, (x44 >> 8));
+    const x47: u8 = @intCast(u8, (x1 & @intCast(u64, 0xff)));
+    const x48: u64 = (x1 >> 8);
+    const x49: u8 = @intCast(u8, (x48 & @intCast(u64, 0xff)));
+    const x50: u64 = (x48 >> 8);
+    const x51: u8 = @intCast(u8, (x50 & @intCast(u64, 0xff)));
+    const x52: u64 = (x50 >> 8);
+    const x53: u8 = @intCast(u8, (x52 & @intCast(u64, 0xff)));
+    const x54: u64 = (x52 >> 8);
+    const x55: u8 = @intCast(u8, (x54 & @intCast(u64, 0xff)));
+    const x56: u64 = (x54 >> 8);
+    const x57: u8 = @intCast(u8, (x56 & @intCast(u64, 0xff)));
+    const x58: u64 = (x56 >> 8);
+    const x59: u8 = @intCast(u8, (x58 & @intCast(u64, 0xff)));
+    const x60: u8 = @intCast(u8, (x58 >> 8));
+    out1[0] = x5;
+    out1[1] = x7;
+    out1[2] = x9;
+    out1[3] = x11;
+    out1[4] = x13;
+    out1[5] = x15;
+    out1[6] = x17;
+    out1[7] = x18;
+    out1[8] = x19;
+    out1[9] = x21;
+    out1[10] = x23;
+    out1[11] = x25;
+    out1[12] = x27;
+    out1[13] = x29;
+    out1[14] = x31;
+    out1[15] = x32;
+    out1[16] = x33;
+    out1[17] = x35;
+    out1[18] = x37;
+    out1[19] = x39;
+    out1[20] = x41;
+    out1[21] = x43;
+    out1[22] = x45;
+    out1[23] = x46;
+    out1[24] = x47;
+    out1[25] = x49;
+    out1[26] = x51;
+    out1[27] = x53;
+    out1[28] = x55;
+    out1[29] = x57;
+    out1[30] = x59;
+    out1[31] = x60;
+}
+
+/// The function p256FromBytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
+/// Preconditions:
+///   0 ≤ bytes_eval arg1 < m
+/// Postconditions:
+///   eval out1 mod m = bytes_eval arg1 mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256FromBytes(out1: *Limbs, arg1: [32]u8) void {
+    const x1: u64 = (@intCast(u64, (arg1[31])) << 56);
+    const x2: u64 = (@intCast(u64, (arg1[30])) << 48);
+    const x3: u64 = (@intCast(u64, (arg1[29])) << 40);
+    const x4: u64 = (@intCast(u64, (arg1[28])) << 32);
+    const x5: u64 = (@intCast(u64, (arg1[27])) << 24);
+    const x6: u64 = (@intCast(u64, (arg1[26])) << 16);
+    const x7: u64 = (@intCast(u64, (arg1[25])) << 8);
+    const x8: u8 = (arg1[24]);
+    const x9: u64 = (@intCast(u64, (arg1[23])) << 56);
+    const x10: u64 = (@intCast(u64, (arg1[22])) << 48);
+    const x11: u64 = (@intCast(u64, (arg1[21])) << 40);
+    const x12: u64 = (@intCast(u64, (arg1[20])) << 32);
+    const x13: u64 = (@intCast(u64, (arg1[19])) << 24);
+    const x14: u64 = (@intCast(u64, (arg1[18])) << 16);
+    const x15: u64 = (@intCast(u64, (arg1[17])) << 8);
+    const x16: u8 = (arg1[16]);
+    const x17: u64 = (@intCast(u64, (arg1[15])) << 56);
+    const x18: u64 = (@intCast(u64, (arg1[14])) << 48);
+    const x19: u64 = (@intCast(u64, (arg1[13])) << 40);
+    const x20: u64 = (@intCast(u64, (arg1[12])) << 32);
+    const x21: u64 = (@intCast(u64, (arg1[11])) << 24);
+    const x22: u64 = (@intCast(u64, (arg1[10])) << 16);
+    const x23: u64 = (@intCast(u64, (arg1[9])) << 8);
+    const x24: u8 = (arg1[8]);
+    const x25: u64 = (@intCast(u64, (arg1[7])) << 56);
+    const x26: u64 = (@intCast(u64, (arg1[6])) << 48);
+    const x27: u64 = (@intCast(u64, (arg1[5])) << 40);
+    const x28: u64 = (@intCast(u64, (arg1[4])) << 32);
+    const x29: u64 = (@intCast(u64, (arg1[3])) << 24);
+    const x30: u64 = (@intCast(u64, (arg1[2])) << 16);
+    const x31: u64 = (@intCast(u64, (arg1[1])) << 8);
+    const x32: u8 = (arg1[0]);
+    const x33: u64 = (x31 + @intCast(u64, x32));
+    const x34: u64 = (x30 + x33);
+    const x35: u64 = (x29 + x34);
+    const x36: u64 = (x28 + x35);
+    const x37: u64 = (x27 + x36);
+    const x38: u64 = (x26 + x37);
+    const x39: u64 = (x25 + x38);
+    const x40: u64 = (x23 + @intCast(u64, x24));
+    const x41: u64 = (x22 + x40);
+    const x42: u64 = (x21 + x41);
+    const x43: u64 = (x20 + x42);
+    const x44: u64 = (x19 + x43);
+    const x45: u64 = (x18 + x44);
+    const x46: u64 = (x17 + x45);
+    const x47: u64 = (x15 + @intCast(u64, x16));
+    const x48: u64 = (x14 + x47);
+    const x49: u64 = (x13 + x48);
+    const x50: u64 = (x12 + x49);
+    const x51: u64 = (x11 + x50);
+    const x52: u64 = (x10 + x51);
+    const x53: u64 = (x9 + x52);
+    const x54: u64 = (x7 + @intCast(u64, x8));
+    const x55: u64 = (x6 + x54);
+    const x56: u64 = (x5 + x55);
+    const x57: u64 = (x4 + x56);
+    const x58: u64 = (x3 + x57);
+    const x59: u64 = (x2 + x58);
+    const x60: u64 = (x1 + x59);
+    out1[0] = x39;
+    out1[1] = x46;
+    out1[2] = x53;
+    out1[3] = x60;
+}
+
+/// The function p256SetOne returns the field element one in the Montgomery domain.
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = 1 mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256SetOne(out1: *Limbs) void {
+    out1[0] = @intCast(u64, 0x1);
+    out1[1] = 0xffffffff00000000;
+    out1[2] = 0xffffffffffffffff;
+    out1[3] = 0xfffffffe;
+}
+
+/// The function p256Msat returns the saturated representation of the prime modulus.
+/// Postconditions:
+///   twos_complement_eval out1 = m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256Msat(out1: *[5]u64) void {
+    out1[0] = 0xffffffffffffffff;
+    out1[1] = 0xffffffff;
+    out1[2] = @intCast(u64, 0x0);
+    out1[3] = 0xffffffff00000001;
+    out1[4] = @intCast(u64, 0x0);
+}
+
+/// The function p256Divstep computes a divstep.
+/// Preconditions:
+///   0 ≤ eval arg4 < m
+///   0 ≤ eval arg5 < m
+/// Postconditions:
+///   out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1)
+///   twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2)
+///   twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋)
+///   eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m)
+///   eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m)
+///   0 ≤ eval out5 < m
+///   0 ≤ eval out5 < m
+///   0 ≤ eval out2 < m
+///   0 ≤ eval out3 < m
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0xffffffffffffffff]
+///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+///   out2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   out3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   out4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   out5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256Divstep(out1: *u64, out2: *[5]u64, out3: *[5]u64, out4: *Limbs, out5: *Limbs, arg1: u64, arg2: [5]u64, arg3: [5]u64, arg4: Limbs, arg5: Limbs) void {
+    var x1: u64 = undefined;
+    var x2: u1 = undefined;
+    p256AddcarryxU64(&x1, &x2, 0x0, (~arg1), @intCast(u64, 0x1));
+    const x3: u1 = (@intCast(u1, (x1 >> 63)) & @intCast(u1, ((arg3[0]) & @intCast(u64, 0x1))));
+    var x4: u64 = undefined;
+    var x5: u1 = undefined;
+    p256AddcarryxU64(&x4, &x5, 0x0, (~arg1), @intCast(u64, 0x1));
+    var x6: u64 = undefined;
+    p256CmovznzU64(&x6, x3, arg1, x4);
+    var x7: u64 = undefined;
+    p256CmovznzU64(&x7, x3, (arg2[0]), (arg3[0]));
+    var x8: u64 = undefined;
+    p256CmovznzU64(&x8, x3, (arg2[1]), (arg3[1]));
+    var x9: u64 = undefined;
+    p256CmovznzU64(&x9, x3, (arg2[2]), (arg3[2]));
+    var x10: u64 = undefined;
+    p256CmovznzU64(&x10, x3, (arg2[3]), (arg3[3]));
+    var x11: u64 = undefined;
+    p256CmovznzU64(&x11, x3, (arg2[4]), (arg3[4]));
+    var x12: u64 = undefined;
+    var x13: u1 = undefined;
+    p256AddcarryxU64(&x12, &x13, 0x0, @intCast(u64, 0x1), (~(arg2[0])));
+    var x14: u64 = undefined;
+    var x15: u1 = undefined;
+    p256AddcarryxU64(&x14, &x15, x13, @intCast(u64, 0x0), (~(arg2[1])));
+    var x16: u64 = undefined;
+    var x17: u1 = undefined;
+    p256AddcarryxU64(&x16, &x17, x15, @intCast(u64, 0x0), (~(arg2[2])));
+    var x18: u64 = undefined;
+    var x19: u1 = undefined;
+    p256AddcarryxU64(&x18, &x19, x17, @intCast(u64, 0x0), (~(arg2[3])));
+    var x20: u64 = undefined;
+    var x21: u1 = undefined;
+    p256AddcarryxU64(&x20, &x21, x19, @intCast(u64, 0x0), (~(arg2[4])));
+    var x22: u64 = undefined;
+    p256CmovznzU64(&x22, x3, (arg3[0]), x12);
+    var x23: u64 = undefined;
+    p256CmovznzU64(&x23, x3, (arg3[1]), x14);
+    var x24: u64 = undefined;
+    p256CmovznzU64(&x24, x3, (arg3[2]), x16);
+    var x25: u64 = undefined;
+    p256CmovznzU64(&x25, x3, (arg3[3]), x18);
+    var x26: u64 = undefined;
+    p256CmovznzU64(&x26, x3, (arg3[4]), x20);
+    var x27: u64 = undefined;
+    p256CmovznzU64(&x27, x3, (arg4[0]), (arg5[0]));
+    var x28: u64 = undefined;
+    p256CmovznzU64(&x28, x3, (arg4[1]), (arg5[1]));
+    var x29: u64 = undefined;
+    p256CmovznzU64(&x29, x3, (arg4[2]), (arg5[2]));
+    var x30: u64 = undefined;
+    p256CmovznzU64(&x30, x3, (arg4[3]), (arg5[3]));
+    var x31: u64 = undefined;
+    var x32: u1 = undefined;
+    p256AddcarryxU64(&x31, &x32, 0x0, x27, x27);
+    var x33: u64 = undefined;
+    var x34: u1 = undefined;
+    p256AddcarryxU64(&x33, &x34, x32, x28, x28);
+    var x35: u64 = undefined;
+    var x36: u1 = undefined;
+    p256AddcarryxU64(&x35, &x36, x34, x29, x29);
+    var x37: u64 = undefined;
+    var x38: u1 = undefined;
+    p256AddcarryxU64(&x37, &x38, x36, x30, x30);
+    var x39: u64 = undefined;
+    var x40: u1 = undefined;
+    p256SubborrowxU64(&x39, &x40, 0x0, x31, 0xffffffffffffffff);
+    var x41: u64 = undefined;
+    var x42: u1 = undefined;
+    p256SubborrowxU64(&x41, &x42, x40, x33, 0xffffffff);
+    var x43: u64 = undefined;
+    var x44: u1 = undefined;
+    p256SubborrowxU64(&x43, &x44, x42, x35, @intCast(u64, 0x0));
+    var x45: u64 = undefined;
+    var x46: u1 = undefined;
+    p256SubborrowxU64(&x45, &x46, x44, x37, 0xffffffff00000001);
+    var x47: u64 = undefined;
+    var x48: u1 = undefined;
+    p256SubborrowxU64(&x47, &x48, x46, @intCast(u64, x38), @intCast(u64, 0x0));
+    const x49: u64 = (arg4[3]);
+    const x50: u64 = (arg4[2]);
+    const x51: u64 = (arg4[1]);
+    const x52: u64 = (arg4[0]);
+    var x53: u64 = undefined;
+    var x54: u1 = undefined;
+    p256SubborrowxU64(&x53, &x54, 0x0, @intCast(u64, 0x0), x52);
+    var x55: u64 = undefined;
+    var x56: u1 = undefined;
+    p256SubborrowxU64(&x55, &x56, x54, @intCast(u64, 0x0), x51);
+    var x57: u64 = undefined;
+    var x58: u1 = undefined;
+    p256SubborrowxU64(&x57, &x58, x56, @intCast(u64, 0x0), x50);
+    var x59: u64 = undefined;
+    var x60: u1 = undefined;
+    p256SubborrowxU64(&x59, &x60, x58, @intCast(u64, 0x0), x49);
+    var x61: u64 = undefined;
+    p256CmovznzU64(&x61, x60, @intCast(u64, 0x0), 0xffffffffffffffff);
+    var x62: u64 = undefined;
+    var x63: u1 = undefined;
+    p256AddcarryxU64(&x62, &x63, 0x0, x53, x61);
+    var x64: u64 = undefined;
+    var x65: u1 = undefined;
+    p256AddcarryxU64(&x64, &x65, x63, x55, (x61 & 0xffffffff));
+    var x66: u64 = undefined;
+    var x67: u1 = undefined;
+    p256AddcarryxU64(&x66, &x67, x65, x57, @intCast(u64, 0x0));
+    var x68: u64 = undefined;
+    var x69: u1 = undefined;
+    p256AddcarryxU64(&x68, &x69, x67, x59, (x61 & 0xffffffff00000001));
+    var x70: u64 = undefined;
+    p256CmovznzU64(&x70, x3, (arg5[0]), x62);
+    var x71: u64 = undefined;
+    p256CmovznzU64(&x71, x3, (arg5[1]), x64);
+    var x72: u64 = undefined;
+    p256CmovznzU64(&x72, x3, (arg5[2]), x66);
+    var x73: u64 = undefined;
+    p256CmovznzU64(&x73, x3, (arg5[3]), x68);
+    const x74: u1 = @intCast(u1, (x22 & @intCast(u64, 0x1)));
+    var x75: u64 = undefined;
+    p256CmovznzU64(&x75, x74, @intCast(u64, 0x0), x7);
+    var x76: u64 = undefined;
+    p256CmovznzU64(&x76, x74, @intCast(u64, 0x0), x8);
+    var x77: u64 = undefined;
+    p256CmovznzU64(&x77, x74, @intCast(u64, 0x0), x9);
+    var x78: u64 = undefined;
+    p256CmovznzU64(&x78, x74, @intCast(u64, 0x0), x10);
+    var x79: u64 = undefined;
+    p256CmovznzU64(&x79, x74, @intCast(u64, 0x0), x11);
+    var x80: u64 = undefined;
+    var x81: u1 = undefined;
+    p256AddcarryxU64(&x80, &x81, 0x0, x22, x75);
+    var x82: u64 = undefined;
+    var x83: u1 = undefined;
+    p256AddcarryxU64(&x82, &x83, x81, x23, x76);
+    var x84: u64 = undefined;
+    var x85: u1 = undefined;
+    p256AddcarryxU64(&x84, &x85, x83, x24, x77);
+    var x86: u64 = undefined;
+    var x87: u1 = undefined;
+    p256AddcarryxU64(&x86, &x87, x85, x25, x78);
+    var x88: u64 = undefined;
+    var x89: u1 = undefined;
+    p256AddcarryxU64(&x88, &x89, x87, x26, x79);
+    var x90: u64 = undefined;
+    p256CmovznzU64(&x90, x74, @intCast(u64, 0x0), x27);
+    var x91: u64 = undefined;
+    p256CmovznzU64(&x91, x74, @intCast(u64, 0x0), x28);
+    var x92: u64 = undefined;
+    p256CmovznzU64(&x92, x74, @intCast(u64, 0x0), x29);
+    var x93: u64 = undefined;
+    p256CmovznzU64(&x93, x74, @intCast(u64, 0x0), x30);
+    var x94: u64 = undefined;
+    var x95: u1 = undefined;
+    p256AddcarryxU64(&x94, &x95, 0x0, x70, x90);
+    var x96: u64 = undefined;
+    var x97: u1 = undefined;
+    p256AddcarryxU64(&x96, &x97, x95, x71, x91);
+    var x98: u64 = undefined;
+    var x99: u1 = undefined;
+    p256AddcarryxU64(&x98, &x99, x97, x72, x92);
+    var x100: u64 = undefined;
+    var x101: u1 = undefined;
+    p256AddcarryxU64(&x100, &x101, x99, x73, x93);
+    var x102: u64 = undefined;
+    var x103: u1 = undefined;
+    p256SubborrowxU64(&x102, &x103, 0x0, x94, 0xffffffffffffffff);
+    var x104: u64 = undefined;
+    var x105: u1 = undefined;
+    p256SubborrowxU64(&x104, &x105, x103, x96, 0xffffffff);
+    var x106: u64 = undefined;
+    var x107: u1 = undefined;
+    p256SubborrowxU64(&x106, &x107, x105, x98, @intCast(u64, 0x0));
+    var x108: u64 = undefined;
+    var x109: u1 = undefined;
+    p256SubborrowxU64(&x108, &x109, x107, x100, 0xffffffff00000001);
+    var x110: u64 = undefined;
+    var x111: u1 = undefined;
+    p256SubborrowxU64(&x110, &x111, x109, @intCast(u64, x101), @intCast(u64, 0x0));
+    var x112: u64 = undefined;
+    var x113: u1 = undefined;
+    p256AddcarryxU64(&x112, &x113, 0x0, x6, @intCast(u64, 0x1));
+    const x114: u64 = ((x80 >> 1) | ((x82 << 63) & 0xffffffffffffffff));
+    const x115: u64 = ((x82 >> 1) | ((x84 << 63) & 0xffffffffffffffff));
+    const x116: u64 = ((x84 >> 1) | ((x86 << 63) & 0xffffffffffffffff));
+    const x117: u64 = ((x86 >> 1) | ((x88 << 63) & 0xffffffffffffffff));
+    const x118: u64 = ((x88 & 0x8000000000000000) | (x88 >> 1));
+    var x119: u64 = undefined;
+    p256CmovznzU64(&x119, x48, x39, x31);
+    var x120: u64 = undefined;
+    p256CmovznzU64(&x120, x48, x41, x33);
+    var x121: u64 = undefined;
+    p256CmovznzU64(&x121, x48, x43, x35);
+    var x122: u64 = undefined;
+    p256CmovznzU64(&x122, x48, x45, x37);
+    var x123: u64 = undefined;
+    p256CmovznzU64(&x123, x111, x102, x94);
+    var x124: u64 = undefined;
+    p256CmovznzU64(&x124, x111, x104, x96);
+    var x125: u64 = undefined;
+    p256CmovznzU64(&x125, x111, x106, x98);
+    var x126: u64 = undefined;
+    p256CmovznzU64(&x126, x111, x108, x100);
+    out1.* = x112;
+    out2[0] = x7;
+    out2[1] = x8;
+    out2[2] = x9;
+    out2[3] = x10;
+    out2[4] = x11;
+    out3[0] = x114;
+    out3[1] = x115;
+    out3[2] = x116;
+    out3[3] = x117;
+    out3[4] = x118;
+    out4[0] = x119;
+    out4[1] = x120;
+    out4[2] = x121;
+    out4[3] = x122;
+    out5[0] = x123;
+    out5[1] = x124;
+    out5[2] = x125;
+    out5[3] = x126;
+}
+
+/// The function p256DivstepPrecomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form).
+/// Postconditions:
+///   eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if (log2 m) + 1 < 46 then ⌊(49 * ((log2 m) + 1) + 80) / 17⌋ else ⌊(49 * ((log2 m) + 1) + 57) / 17⌋)
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn p256DivstepPrecomp(out1: *Limbs) void {
+    out1[0] = 0x67ffffffb8000000;
+    out1[1] = 0xc000000038000000;
+    out1[2] = 0xd80000007fffffff;
+    out1[3] = 0x2fffffffffffffff;
+}
lib/std/crypto/pcurves/p256/scalar.zig
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2015-2021 Zig Contributors
+// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
+// The MIT license requires this copyright notice to be included in all copies
+// and substantial portions of the software.
+
+const std = @import("std");
+const builtin = std.builtin;
+const crypto = std.crypto;
+const debug = std.debug;
+const math = std.math;
+const mem = std.mem;
+
+const Fe = @import("field.zig").Fe;
+
+const NonCanonicalError = std.crypto.errors.NonCanonicalError;
+const NotSquareError = std.crypto.errors.NotSquareError;
+
+/// A compressed scalar, in canonical form.
+pub const CompressedScalar = [32]u8;
+
+/// Reject a scalar whose encoding is not canonical.
+pub fn rejectNonCanonical(s: CompressedScalar) NonCanonicalError!void {
+    return Fe.rejectNonCanonical(s);
+}
+
+/// Reduce a 48-bytes scalar to the field size.
+pub fn reduce48(s: [48]u8) CompressedScalar {
+    return Scalar.fromBytes48(s).toBytes();
+}
+
+/// Reduce a 64-bytes scalar to the field size.
+pub fn reduce64(s: [64]u8) CompressedScalar {
+    return ScalarDouble.fromBytes64(s).toBytes();
+}
+
+/// Return a*b (mod L)
+pub fn mul(a: CompressedScalar, b: CompressedScalar, endian: builtin.Endian) NonCanonicalError!CompressedScalar {
+    return (try Scalar.fromBytes(a, endian)).mul(try Scalar.fromBytes(b, endian)).toBytes(endian);
+}
+
+/// Return a*b+c (mod L)
+pub fn mulAdd(a: CompressedScalar, b: CompressedScalar, c: CompressedScalar, endian: builtin.Endian) NonCanonicalError!CompressedScalar {
+    return (try Scalar.fromBytes(a, endian)).mul(try Scalar.fromBytes(b, endian)).add(try Scalar.fromBytes(c, endian)).toBytes(endian);
+}
+
+/// Return a+b (mod L)
+pub fn add(a: CompressedScalar, b: CompressedScalar, endian: builtin.Endian) NonCanonicalError!CompressedScalar {
+    return (try Scalar.fromBytes(a, endian)).add(try Scalar.fromBytes(b, endian)).toBytes(endian);
+}
+
+/// Return -s (mod L)
+pub fn neg(s: CompressedScalar, endian: builtin.Endian) NonCanonicalError!CompressedScalar {
+    return (try Scalar.fromBytes(a, endian)).neg().toBytes(endian);
+}
+
+/// Return (a-b) (mod L)
+pub fn sub(a: CompressedScalar, b: CompressedScalar, endian: builtin.Endian) NonCanonicalError!CompressedScalar {
+    return (try Scalar.fromBytes(a, endian)).sub(try Scalar.fromBytes(b.endian)).toBytes(endian);
+}
+
+/// Return a random scalar
+pub fn random(endian: builtin.Endian) CompressedScalar {
+    return Scalar.random().toBytes(endian);
+}
+
+/// A scalar in unpacked representation.
+pub const Scalar = struct {
+    fe: Fe,
+
+    /// Zero.
+    pub const zero = Scalar{ .fe = Fe.zero };
+
+    /// One.
+    pub const one = Scalar{ .fe = Fe.one };
+
+    /// Unpack a serialized representation of a scalar.
+    pub fn fromBytes(s: CompressedScalar, endian: builtin.Endian) NonCanonicalError!Scalar {
+        return Scalar{ .fe = try Fe.fromBytes(s, endian) };
+    }
+
+    /// Reduce a 384 bit input to the field size.
+    pub fn fromBytes48(s: [48]u8, endian: builtin.Endian) Scalar {
+        const t = ScalarDouble.fromBytes(384, s, endian);
+        return t.reduce(384);
+    }
+
+    /// Reduce a 512 bit input to the field size.
+    pub fn fromBytes64(s: [64]u8, endian: builtin.Endian) Scalar {
+        const t = ScalarDouble.fromBytes(512, s, endian);
+        return t.reduce(512);
+    }
+
+    /// Pack a scalar into bytes.
+    pub fn toBytes(n: Scalar, endian: builtin.Endian) CompressedScalar {
+        return n.fe.toBytes(endian);
+    }
+
+    /// Return true if the scalar is zero..
+    pub fn isZero(n: Scalar) bool {
+        return n.fe.isZero();
+    }
+
+    /// Return true if a and b are equivalent.
+    pub fn equivalent(a: Scalar, b: Scalar) bool {
+        return a.fe.equivalent(b.fe);
+    }
+
+    /// Compute x+y (mod L)
+    pub fn add(x: Scalar, y: Scalar) Scalar {
+        return Scalar{ .fe = x.fe().add(y.fe) };
+    }
+
+    /// Compute x-y (mod L)
+    pub fn sub(x: Scalar, y: Scalar) Scalar {
+        return Scalar{ .fe = x.fe().sub(y.fe) };
+    }
+
+    /// Compute 2n (mod L)
+    pub fn dbl(n: Scalar) Scalar {
+        return Scalar{ .fe = n.fe.dbl() };
+    }
+
+    /// Compute x*y (mod L)
+    pub fn mul(x: Scalar, y: Scalar) Scalar {
+        return Scalar{ .fe = x.fe().mul(y.fe) };
+    }
+
+    /// Compute x^2 (mod L)
+    pub fn sq(n: Scalar) Scalar {
+        return Scalar{ .fe = n.fe.sq() };
+    }
+
+    /// Compute x^n (mod L)
+    pub fn pow(a: Scalar, comptime T: type, comptime n: T) Scalar {
+        return Scalar{ .fe = a.fe.pow(n) };
+    }
+
+    /// Compute -x (mod L)
+    pub fn neg(n: Scalar) Scalar {
+        return Scalar{ .fe = n.fe.neg() };
+    }
+
+    /// Compute x^-1 (mod L)
+    pub fn invert(n: Scalar) Scalar {
+        return Scalar{ .fe = n.fe.invert() };
+    }
+
+    /// Return true if n is a quadratic residue mod L.
+    pub fn isSquare(n: Scalar) Scalar {
+        return n.fe.isSquare();
+    }
+
+    /// Return the square root of L, or NotSquare if there isn't any solutions.
+    pub fn sqrt(n: Scalar) NotSquareError!Scalar {
+        return Scalar{ .fe = try n.fe.sqrt() };
+    }
+
+    /// Return a random scalar < L.
+    pub fn random() Scalar {
+        var s: [48]u8 = undefined;
+        while (true) {
+            crypto.random.bytes(&s);
+            const n = Scalar.fromBytes48(s, .Little);
+            if (!n.isZero()) {
+                return n;
+            }
+        }
+    }
+};
+
+const ScalarDouble = struct {
+    x1: Fe,
+    x2: Fe,
+    x3: Fe,
+
+    fn fromBytes(comptime bits: usize, s_: [bits / 8]u8, endian: builtin.Endian) ScalarDouble {
+        debug.assert(bits > 0 and bits <= 512 and bits >= Fe.saturated_bits and bits <= Fe.saturated_bits * 3);
+
+        var s = s_;
+        if (endian == .Big) {
+            for (s_) |x, i| s[s.len - 1 - i] = x;
+        }
+        var t = ScalarDouble{ .x1 = undefined, .x2 = Fe.zero, .x3 = Fe.zero };
+        {
+            var b = [_]u8{0} ** 32;
+            const len = math.min(s.len, 24);
+            mem.copy(u8, b[0..len], s[0..len]);
+            t.x1 = Fe.fromBytes(b, .Little) catch unreachable;
+        }
+        if (s_.len >= 24) {
+            var b = [_]u8{0} ** 32;
+            const len = math.min(s.len - 24, 24);
+            mem.copy(u8, b[0..len], s[24..][0..len]);
+            t.x2 = Fe.fromBytes(b, .Little) catch unreachable;
+        }
+        if (s_.len >= 48) {
+            var b = [_]u8{0} ** 32;
+            const len = s.len - 48;
+            mem.copy(u8, b[0..len], s[48..][0..len]);
+            t.x3 = Fe.fromBytes(b, .Little) catch unreachable;
+        }
+        return t;
+    }
+
+    fn reduce(expanded: ScalarDouble, comptime bits: usize) Scalar {
+        debug.assert(bits > 0 and bits <= Fe.saturated_bits * 3 and bits <= 512);
+        var fe = expanded.x1;
+        if (bits >= 192) {
+            const st1 = Fe.fromInt(1 << 192) catch unreachable;
+            fe = fe.add(expanded.x2.mul(st1));
+            if (bits >= 384) {
+                const st2 = st1.sq();
+                fe = fe.add(expanded.x3.mul(st2));
+            }
+        }
+        return Scalar{ .fe = fe };
+    }
+};
lib/std/crypto/pcurves/p256.zig
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2015-2021 Zig Contributors
+// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
+// The MIT license requires this copyright notice to be included in all copies
+// and substantial portions of the software.
+
+const std = @import("std");
+const builtin = std.builtin;
+const crypto = std.crypto;
+const mem = std.mem;
+const meta = std.meta;
+
+const EncodingError = crypto.errors.EncodingError;
+const IdentityElementError = crypto.errors.IdentityElementError;
+const NonCanonicalError = crypto.errors.NonCanonicalError;
+const NotSquareError = crypto.errors.NotSquareError;
+
+/// Group operations over P256.
+pub const P256 = struct {
+    /// The underlying prime field.
+    pub const Fe = @import("p256/field.zig").Fe;
+    /// Field arithmetic mod the order of the main subgroup.
+    pub const scalar = @import("p256/scalar.zig");
+
+    x: Fe,
+    y: Fe,
+    z: Fe = Fe.one,
+
+    is_base: bool = false,
+
+    /// The P256 base point.
+    pub const basePoint = P256{
+        .x = try Fe.fromInt(48439561293906451759052585252797914202762949526041747995844080717082404635286),
+        .y = try Fe.fromInt(36134250956749795798585127919587881956611106672985015071877198253568414405109),
+        .z = Fe.one,
+        .is_base = true,
+    };
+
+    /// The P256 neutral element.
+    pub const identityElement = P256{ .x = Fe.zero, .y = Fe.one, .z = Fe.zero };
+
+    pub const B = try Fe.fromInt(41058363725152142129326129780047268409114441015993725554835256314039467401291);
+
+    /// Reject the neutral element.
+    pub fn rejectIdentity(p: P256) IdentityElementError!void {
+        if (p.x.isZero()) {
+            return error.IdentityElement;
+        }
+    }
+
+    /// Create a point from affine coordinates after checking that they match the curve equation.
+    pub fn fromAffineCoordinates(x: Fe, y: Fe) EncodingError!P256 {
+        const x3AxB = x.sq().mul(x).sub(x).sub(x).sub(x).add(B);
+        const yy = y.sq();
+        if (!x3AxB.equivalent(yy)) {
+            return error.InvalidEncoding;
+        }
+        const p: P256 = .{ .x = x, .y = y, .z = Fe.one };
+        return p;
+    }
+
+    /// Create a point from serialized affine coordinates.
+    pub fn fromSerializedAffineCoordinates(xs: [32]u8, ys: [32]u8, endian: builtin.Endian) (NonCanonicalError || EncodingError)!P256 {
+        const x = try Fe.fromBytes(xs, endian);
+        const y = try Fe.fromBytes(ys, endian);
+        return fromAffineCoordinates(x, y);
+    }
+
+    /// Recover the Y coordinate from the X coordinate.
+    pub fn recoverY(x: Fe, is_odd: bool) NotSquareError!Fe {
+        const x3AxB = x.sq().mul(x).sub(x).sub(x).sub(x).add(B);
+        var y = try x3AxB.sqrt();
+        const yn = y.neg();
+        y.cMov(yn, @boolToInt(is_odd) ^ @boolToInt(y.isOdd()));
+        return y;
+    }
+
+    /// Deserialize a SEC1-encoded point.
+    pub fn fromSec1(s: []const u8) (EncodingError || NotSquareError || NonCanonicalError)!P256 {
+        if (s.len < 1) return error.InvalidEncoding;
+        const encoding_type = s[0];
+        const encoded = s[1..];
+        switch (encoding_type) {
+            0 => {
+                if (encoded.len != 0) return error.InvalidEncoding;
+                return P256.identityElement;
+            },
+            2, 3 => {
+                if (encoded.len != 32) return error.InvalidEncoding;
+                const x = try Fe.fromBytes(encoded[0..32].*, .Big);
+                const y_is_odd = (encoding_type == 3);
+                const y = try recoverY(x, y_is_odd);
+                return P256{ .x = x, .y = y };
+            },
+            4 => {
+                if (encoded.len != 64) return error.InvalidEncoding;
+                const x = try Fe.fromBytes(encoded[0..32].*, .Big);
+                const y = try Fe.fromBytes(encoded[32..64].*, .Big);
+                return P256.fromAffineCoordinates(x, y);
+            },
+            else => return error.InvalidEncoding,
+        }
+    }
+
+    /// Serialize a point using the compressed SEC-1 format.
+    pub fn toCompressedSec1(p: P256) [33]u8 {
+        var out: [33]u8 = undefined;
+        const xy = p.affineCoordinates();
+        out[0] = if (xy.y.isOdd()) 3 else 2;
+        mem.copy(u8, out[1..], &xy.x.toBytes(.Big));
+        return out;
+    }
+
+    /// Serialize a point using the uncompressed SEC-1 format.
+    pub fn toUncompressedSec1(p: P256) [65]u8 {
+        var out: [65]u8 = undefined;
+        out[0] = 4;
+        const xy = p.affineCoordinates();
+        mem.copy(u8, out[1..33], &xy.x.toBytes(.Big));
+        mem.copy(u8, out[33..65], &xy.y.toBytes(.Big));
+        return out;
+    }
+
+    /// Return a random point.
+    pub fn random() P256 {
+        const n = scalar.random(.Little);
+        return basePoint.mul(n, .Little) catch unreachable;
+    }
+
+    /// Flip the sign of the X coordinate.
+    pub fn neg(p: P256) P256 {
+        return .{ .x = p.x, .y = p.y.neg(), .z = p.z };
+    }
+
+    /// Double a P256 point.
+    // Algorithm 6 from https://eprint.iacr.org/2015/1060.pdf
+    pub fn dbl(p: P256) P256 {
+        var t0 = p.x.sq();
+        var t1 = p.y.sq();
+        var t2 = p.z.sq();
+        var t3 = p.x.mul(p.y);
+        t3 = t3.dbl();
+        var Z3 = p.x.mul(p.z);
+        Z3 = Z3.add(Z3);
+        var Y3 = B.mul(t2);
+        Y3 = Y3.sub(Z3);
+        var X3 = Y3.dbl();
+        Y3 = X3.add(Y3);
+        X3 = t1.sub(Y3);
+        Y3 = t1.add(Y3);
+        Y3 = X3.mul(Y3);
+        X3 = X3.mul(t3);
+        t3 = t2.dbl();
+        t2 = t2.add(t3);
+        Z3 = B.mul(Z3);
+        Z3 = Z3.sub(t2);
+        Z3 = Z3.sub(t0);
+        t3 = Z3.dbl();
+        Z3 = Z3.add(t3);
+        t3 = t0.dbl();
+        t0 = t3.add(t0);
+        t0 = t0.sub(t2);
+        t0 = t0.mul(Z3);
+        Y3 = Y3.add(t0);
+        t0 = p.y.mul(p.z);
+        t0 = t0.dbl();
+        Z3 = t0.mul(Z3);
+        X3 = X3.sub(Z3);
+        Z3 = t0.mul(t1);
+        Z3 = Z3.dbl().dbl();
+        return .{
+            .x = X3,
+            .y = Y3,
+            .z = Z3,
+        };
+    }
+
+    /// Add P256 points, the second being specified using affine coordinates.
+    // Algorithm 5 from https://eprint.iacr.org/2015/1060.pdf
+    pub fn addMixed(p: P256, q: struct { x: Fe, y: Fe }) P256 {
+        var t0 = p.x.mul(q.x);
+        var t1 = p.y.mul(q.y);
+        var t3 = q.x.add(q.y);
+        var t4 = p.x.add(p.y);
+        t3 = t3.mul(t4);
+        t4 = t0.add(t1);
+        t3 = t3.sub(t4);
+        t4 = q.y.mul(p.z);
+        t4 = t4.add(p.y);
+        var Y3 = q.x.mul(p.z);
+        Y3 = Y3.add(p.x);
+        var Z3 = B.mul(p.z);
+        var X3 = Y3.sub(Z3);
+        Z3 = X3.dbl();
+        X3 = X3.add(Z3);
+        Z3 = t1.sub(X3);
+        X3 = t1.dbl();
+        Y3 = B.mul(Y3);
+        t1 = p.z.add(p.z);
+        var t2 = t1.add(p.z);
+        Y3 = Y3.sub(t2);
+        Y3 = Y3.sub(t0);
+        t1 = Y3.dbl();
+        Y3 = t1.add(Y3);
+        t1 = t0.dbl();
+        t0 = t1.add(t0);
+        t0 = t0.sub(t2);
+        t1 = t4.mul(Y3);
+        t2 = t0.mul(Y3);
+        Y3 = X3.mul(Z3);
+        Y3 = Y3.add(t2);
+        X3 = t3.mul(X3);
+        X3 = X3.sub(t1);
+        Z3 = t4.mul(Z3);
+        t1 = t3.mul(t0);
+        Z3 = Z3.add(t1);
+        return .{
+            .x = X3,
+            .y = Y3,
+            .z = Z3,
+        };
+    }
+
+    // Add P256 points.
+    // Algorithm 4 from https://eprint.iacr.org/2015/1060.pdf
+    pub fn add(p: P256, q: P256) P256 {
+        var t0 = p.x.mul(q.x);
+        var t1 = p.y.mul(q.y);
+        var t2 = p.z.mul(q.z);
+        var t3 = p.x.add(p.y);
+        var t4 = q.x.add(q.y);
+        t3 = t3.mul(t4);
+        t4 = t0.add(t1);
+        t3 = t3.sub(t4);
+        t4 = p.y.add(p.z);
+        var X3 = q.y.add(q.z);
+        t4 = t4.mul(X3);
+        X3 = t1.add(t2);
+        t4 = t4.sub(X3);
+        X3 = p.x.add(p.z);
+        var Y3 = q.x.add(q.z);
+        X3 = X3.mul(Y3);
+        Y3 = t0.add(t2);
+        Y3 = X3.sub(Y3);
+        var Z3 = B.mul(t2);
+        X3 = Y3.sub(Z3);
+        Z3 = X3.dbl();
+        X3 = X3.add(Z3);
+        Z3 = t1.sub(X3);
+        X3 = t1.add(X3);
+        Y3 = B.mul(Y3);
+        t1 = t2.dbl();
+        t2 = t1.add(t2);
+        Y3 = Y3.sub(t2);
+        Y3 = Y3.sub(t0);
+        t1 = Y3.dbl();
+        Y3 = t1.add(Y3);
+        t1 = t0.dbl();
+        t0 = t1.add(t0);
+        t0 = t0.sub(t2);
+        t1 = t4.mul(Y3);
+        t2 = t0.mul(Y3);
+        Y3 = X3.mul(Z3);
+        Y3 = Y3.add(t2);
+        X3 = t3.mul(X3);
+        X3 = X3.sub(t1);
+        Z3 = t4.mul(Z3);
+        t1 = t3.mul(t0);
+        Z3 = Z3.add(t1);
+        return .{
+            .x = X3,
+            .y = Y3,
+            .z = Z3,
+        };
+    }
+
+    // Subtract P256 points.
+    pub fn sub(p: P256, q: P256) P256 {
+        return p.add(q.neg());
+    }
+
+    /// Return affine coordinates.
+    pub fn affineCoordinates(p: P256) struct { x: Fe, y: Fe } {
+        const zinv = p.z.invert();
+        const ret = .{
+            .x = p.x.mul(zinv),
+            .y = p.y.mul(zinv),
+        };
+        return ret;
+    }
+
+    /// Return true if both coordinate sets represent the same point.
+    pub fn equivalent(a: P256, b: P256) bool {
+        if (a.sub(b).rejectIdentity()) {
+            return false;
+        } else |_| {
+            return true;
+        }
+    }
+
+    fn cMov(p: *P256, a: P256, c: u1) void {
+        p.x.cMov(a.x, c);
+        p.y.cMov(a.y, c);
+        p.z.cMov(a.z, c);
+    }
+
+    fn pcSelect(comptime n: usize, pc: [n]P256, b: u8) P256 {
+        var t = P256.identityElement;
+        comptime var i: u8 = 1;
+        inline while (i < pc.len) : (i += 1) {
+            t.cMov(pc[i], @truncate(u1, (@as(usize, b ^ i) -% 1) >> 8));
+        }
+        return t;
+    }
+
+    fn slide(s: [32]u8) [2 * 32 + 1]i8 {
+        var e: [2 * 32 + 1]i8 = undefined;
+        for (s) |x, i| {
+            e[i * 2 + 0] = @as(i8, @truncate(u4, x));
+            e[i * 2 + 1] = @as(i8, @truncate(u4, x >> 4));
+        }
+        // Now, e[0..63] is between 0 and 15, e[63] is between 0 and 7
+        var carry: i8 = 0;
+        for (e[0..64]) |*x| {
+            x.* += carry;
+            carry = (x.* + 8) >> 4;
+            x.* -= carry * 16;
+            std.debug.assert(x.* >= -8 and x.* <= 8);
+        }
+        e[64] = carry;
+        // Now, e[*] is between -8 and 8, including e[64]
+        std.debug.assert(carry >= -8 and carry <= 8);
+        return e;
+    }
+
+    fn pcMul(pc: [9]P256, s: [32]u8, comptime vartime: bool) IdentityElementError!P256 {
+        std.debug.assert(vartime);
+        const e = slide(s);
+        var q = P256.identityElement;
+        var pos = e.len - 1;
+        while (true) : (pos -= 1) {
+            const slot = e[pos];
+            if (slot > 0) {
+                q = q.add(pc[@intCast(usize, slot)]);
+            } else if (slot < 0) {
+                q = q.sub(pc[@intCast(usize, -slot)]);
+            }
+            if (pos == 0) break;
+            q = q.dbl().dbl().dbl().dbl();
+        }
+        try q.rejectIdentity();
+        return q;
+    }
+
+    fn pcMul16(pc: [16]P256, s: [32]u8, comptime vartime: bool) IdentityElementError!P256 {
+        var q = P256.identityElement;
+        var pos: usize = 252;
+        while (true) : (pos -= 4) {
+            const slot = @truncate(u4, (s[pos >> 3] >> @truncate(u3, pos)));
+            if (vartime) {
+                if (slot != 0) {
+                    q = q.add(pc[slot]);
+                }
+            } else {
+                q = q.add(pcSelect(16, pc, slot));
+            }
+            if (pos == 0) break;
+            q = q.dbl().dbl().dbl().dbl();
+        }
+        try q.rejectIdentity();
+        return q;
+    }
+
+    fn precompute(p: P256, comptime count: usize) [1 + count]P256 {
+        var pc: [1 + count]P256 = undefined;
+        pc[0] = P256.identityElement;
+        pc[1] = p;
+        var i: usize = 2;
+        while (i <= count) : (i += 1) {
+            pc[i] = if (i % 2 == 0) pc[i / 2].dbl() else pc[i - 1].add(p);
+        }
+        return pc;
+    }
+
+    /// Multiply an elliptic curve point by a scalar.
+    /// Return error.IdentityElement if the result is the identity element.
+    pub fn mul(p: P256, s_: [32]u8, endian: builtin.Endian) IdentityElementError!P256 {
+        const s = if (endian == .Little) s_ else Fe.orderSwap(s_);
+        const pc = if (p.is_base) precompute(P256.basePoint, 15) else pc: {
+            try p.rejectIdentity();
+            const xpc = precompute(p, 15);
+            break :pc xpc;
+        };
+        return pcMul16(pc, s, false);
+    }
+
+    /// Multiply an elliptic curve point by a *PUBLIC* scalar *IN VARIABLE TIME*
+    /// This can be used for signature verification.
+    pub fn mulPublic(p: P256, s_: [32]u8, endian: builtin.Endian) IdentityElementError!P256 {
+        const s = if (endian == .Little) s_ else Fe.orderSwap(s_);
+        const pc = if (p.is_base) precompute(P256.basePoint, 8) else pc: {
+            try p.rejectIdentity();
+            const xpc = precompute(p, 8);
+            break :pc xpc;
+        };
+        return pcMul(pc, s, true);
+    }
+};
+
+test "p256" {
+    _ = @import("tests.zig");
+}
lib/std/crypto/pcurves/tests.zig
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2015-2021 Zig Contributors
+// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
+// The MIT license requires this copyright notice to be included in all copies
+// and substantial portions of the software.
+
+const std = @import("std");
+const fmt = std.fmt;
+const testing = std.testing;
+
+const P256 = @import("p256.zig").P256;
+
+test "p256 ECDH key exchange" {
+    const dha = P256.scalar.random(.Little);
+    const dhb = P256.scalar.random(.Little);
+    const dhA = try P256.basePoint.mul(dha, .Little);
+    const dhB = try P256.basePoint.mul(dhb, .Little);
+    const shareda = try dhA.mul(dhb, .Little);
+    const sharedb = try dhB.mul(dha, .Little);
+    testing.expect(shareda.equivalent(sharedb));
+}
+
+test "p256 point from affine coordinates" {
+    const xh = "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296";
+    const yh = "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5";
+    var xs: [32]u8 = undefined;
+    _ = try fmt.hexToBytes(&xs, xh);
+    var ys: [32]u8 = undefined;
+    _ = try fmt.hexToBytes(&ys, yh);
+    var p = try P256.fromSerializedAffineCoordinates(xs, ys, .Big);
+    testing.expect(p.equivalent(P256.basePoint));
+}
+
+test "p256 test vectors" {
+    const expected = [_][]const u8{
+        "0000000000000000000000000000000000000000000000000000000000000000",
+        "6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296",
+        "7cf27b188d034f7e8a52380304b51ac3c08969e277f21b35a60b48fc47669978",
+        "5ecbe4d1a6330a44c8f7ef951d4bf165e6c6b721efada985fb41661bc6e7fd6c",
+        "e2534a3532d08fbba02dde659ee62bd0031fe2db785596ef509302446b030852",
+        "51590b7a515140d2d784c85608668fdfef8c82fd1f5be52421554a0dc3d033ed",
+        "b01a172a76a4602c92d3242cb897dde3024c740debb215b4c6b0aae93c2291a9",
+        "8e533b6fa0bf7b4625bb30667c01fb607ef9f8b8a80fef5b300628703187b2a3",
+        "62d9779dbee9b0534042742d3ab54cadc1d238980fce97dbb4dd9dc1db6fb393",
+        "ea68d7b6fedf0b71878938d51d71f8729e0acb8c2c6df8b3d79e8a4b90949ee0",
+    };
+    var p = P256.identityElement;
+    for (expected) |xh| {
+        const x = p.affineCoordinates().x;
+        p = p.add(P256.basePoint);
+        var xs: [32]u8 = undefined;
+        _ = try fmt.hexToBytes(&xs, xh);
+        testing.expectEqualSlices(u8, &x.toBytes(.Big), &xs);
+    }
+}
+
+test "p256 test vectors - doubling" {
+    const expected = [_][]const u8{
+        "6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296",
+        "7cf27b188d034f7e8a52380304b51ac3c08969e277f21b35a60b48fc47669978",
+        "e2534a3532d08fbba02dde659ee62bd0031fe2db785596ef509302446b030852",
+        "62d9779dbee9b0534042742d3ab54cadc1d238980fce97dbb4dd9dc1db6fb393",
+    };
+    var p = P256.basePoint;
+    for (expected) |xh| {
+        const x = p.affineCoordinates().x;
+        p = p.dbl();
+        var xs: [32]u8 = undefined;
+        _ = try fmt.hexToBytes(&xs, xh);
+        testing.expectEqualSlices(u8, &x.toBytes(.Big), &xs);
+    }
+}
+
+test "p256 compressed sec1 encoding/decoding" {
+    const p = P256.random();
+    const s = p.toCompressedSec1();
+    const q = try P256.fromSec1(&s);
+    testing.expect(p.equivalent(q));
+}
+
+test "p256 uncompressed sec1 encoding/decoding" {
+    const p = P256.random();
+    const s = p.toUncompressedSec1();
+    const q = try P256.fromSec1(&s);
+    testing.expect(p.equivalent(q));
+}
+
+test "p256 public key is the neutral element" {
+    const n = P256.scalar.Scalar.zero.toBytes(.Little);
+    const p = P256.random();
+    testing.expectError(error.IdentityElement, p.mul(n, .Little));
+}
+
+test "p256 public key is the neutral element (public verification)" {
+    const n = P256.scalar.Scalar.zero.toBytes(.Little);
+    const p = P256.random();
+    testing.expectError(error.IdentityElement, p.mulPublic(n, .Little));
+}
+
+test "p256 field element non-canonical encoding" {
+    const s = [_]u8{0xff} ** 32;
+    testing.expectError(error.NonCanonical, P256.Fe.fromBytes(s, .Little));
+}
lib/std/crypto.zig
@@ -67,6 +67,7 @@ pub const dh = struct {
 pub const ecc = struct {
     pub const Curve25519 = @import("crypto/25519/curve25519.zig").Curve25519;
     pub const Edwards25519 = @import("crypto/25519/edwards25519.zig").Edwards25519;
+    pub const P256 = @import("crypto/pcurves/p256.zig").P256;
     pub const Ristretto255 = @import("crypto/25519/ristretto255.zig").Ristretto255;
 };