Commit 97779442d0

Frank Denis <124872+jedisct1@users.noreply.github.com>
2021-05-03 09:57:45
Prepare std/crypto/pcurves for ecdsa and other curves (#8670)
Functions generated by Fiat-crypto are not prefixed by their description any more. This matches an upstream change. We can now use a single type for different curves and implementations. The field type is now generic, so we can properly handle the base field and scalars without code duplication.
1 parent c47028c
Changed files (5)
lib/std/crypto/pcurves/p256/field.zig
@@ -5,257 +5,14 @@
 // and substantial portions of the software.
 
 const std = @import("std");
-const builtin = std.builtin;
-const crypto = std.crypto;
-const debug = std.debug;
-const mem = std.mem;
-const meta = std.meta;
+const common = @import("../common.zig");
 
-const fiat = @import("p256_64.zig");
+const Field = common.Field;
 
-const NonCanonicalError = crypto.errors.NonCanonicalError;
-const NotSquareError = crypto.errors.NotSquareError;
-
-const Limbs = fiat.Limbs;
-
-/// A field element, internally stored in Montgomery domain.
-pub const Fe = struct {
-    limbs: Limbs,
-
-    /// Field size.
-    pub const field_order = 115792089210356248762697446949407573530086143415290314195533631308867097853951;
-
-    /// Numer of bits that can be saturated without overflowing.
-    pub const saturated_bits = 255;
-
-    /// Zero.
-    pub const zero: Fe = Fe{ .limbs = mem.zeroes(Limbs) };
-
-    /// One.
-    pub const one = comptime one: {
-        var fe: Fe = undefined;
-        fiat.p256SetOne(&fe.limbs);
-        break :one fe;
-    };
-
-    /// Reject non-canonical encodings of an element.
-    pub fn rejectNonCanonical(s_: [32]u8, endian: builtin.Endian) NonCanonicalError!void {
-        var s = if (endian == .Little) s_ else orderSwap(s_);
-        const field_order_s = comptime fos: {
-            var fos: [32]u8 = undefined;
-            mem.writeIntLittle(u256, &fos, field_order);
-            break :fos fos;
-        };
-        if (crypto.utils.timingSafeCompare(u8, &s, &field_order_s, .Little) != .lt) {
-            return error.NonCanonical;
-        }
-    }
-
-    /// Swap the endianness of an encoded element.
-    pub fn orderSwap(s: [32]u8) [32]u8 {
-        var t = s;
-        for (s) |x, i| t[t.len - 1 - i] = x;
-        return t;
-    }
-
-    /// Unpack a field element.
-    pub fn fromBytes(s_: [32]u8, endian: builtin.Endian) NonCanonicalError!Fe {
-        var s = if (endian == .Little) s_ else orderSwap(s_);
-        try rejectNonCanonical(s, .Little);
-        var limbs_z: Limbs = undefined;
-        fiat.p256FromBytes(&limbs_z, s);
-        var limbs: Limbs = undefined;
-        fiat.p256ToMontgomery(&limbs, limbs_z);
-        return Fe{ .limbs = limbs };
-    }
-
-    /// Pack a field element.
-    pub fn toBytes(fe: Fe, endian: builtin.Endian) [32]u8 {
-        var limbs_z: Limbs = undefined;
-        fiat.p256FromMontgomery(&limbs_z, fe.limbs);
-        var s: [32]u8 = undefined;
-        fiat.p256ToBytes(&s, limbs_z);
-        return if (endian == .Little) s else orderSwap(s);
-    }
-
-    /// Create a field element from an integer.
-    pub fn fromInt(comptime x: u256) NonCanonicalError!Fe {
-        var s: [32]u8 = undefined;
-        mem.writeIntLittle(u256, &s, x);
-        return fromBytes(s, .Little);
-    }
-
-    /// Return the field element as an integer.
-    pub fn toInt(fe: Fe) u256 {
-        const s = fe.toBytes(.Little);
-        return mem.readIntLittle(u256, &s);
-    }
-
-    /// Return true if the field element is zero.
-    pub fn isZero(fe: Fe) bool {
-        var z: @TypeOf(fe.limbs[0]) = undefined;
-        fiat.p256Nonzero(&z, fe.limbs);
-        return z == 0;
-    }
-
-    /// Return true if both field elements are equivalent.
-    pub fn equivalent(a: Fe, b: Fe) bool {
-        return a.sub(b).isZero();
-    }
-
-    /// Return true if the element is odd.
-    pub fn isOdd(fe: Fe) bool {
-        const s = fe.toBytes(.Little);
-        return @truncate(u1, s[0]) != 0;
-    }
-
-    /// Conditonally replace a field element with `a` if `c` is positive.
-    pub fn cMov(fe: *Fe, a: Fe, c: u1) void {
-        fiat.p256Selectznz(&fe.limbs, c, fe.limbs, a.limbs);
-    }
-
-    /// Add field elements.
-    pub fn add(a: Fe, b: Fe) Fe {
-        var fe: Fe = undefined;
-        fiat.p256Add(&fe.limbs, a.limbs, b.limbs);
-        return fe;
-    }
-
-    /// Subtract field elements.
-    pub fn sub(a: Fe, b: Fe) Fe {
-        var fe: Fe = undefined;
-        fiat.p256Sub(&fe.limbs, a.limbs, b.limbs);
-        return fe;
-    }
-
-    /// Double a field element.
-    pub fn dbl(a: Fe) Fe {
-        var fe: Fe = undefined;
-        fiat.p256Add(&fe.limbs, a.limbs, a.limbs);
-        return fe;
-    }
-
-    /// Multiply field elements.
-    pub fn mul(a: Fe, b: Fe) Fe {
-        var fe: Fe = undefined;
-        fiat.p256Mul(&fe.limbs, a.limbs, b.limbs);
-        return fe;
-    }
-
-    /// Square a field element.
-    pub fn sq(a: Fe) Fe {
-        var fe: Fe = undefined;
-        fiat.p256Square(&fe.limbs, a.limbs);
-        return fe;
-    }
-
-    /// Square a field element n times.
-    fn sqn(a: Fe, comptime n: comptime_int) Fe {
-        var i: usize = 0;
-        var fe = a;
-        while (i < n) : (i += 1) {
-            fe = fe.sq();
-        }
-        return fe;
-    }
-
-    /// Compute a^n.
-    pub fn pow(a: Fe, comptime T: type, comptime n: T) Fe {
-        var fe = one;
-        var x: T = n;
-        var t = a;
-        while (true) {
-            if (@truncate(u1, x) != 0) fe = fe.mul(t);
-            x >>= 1;
-            if (x == 0) break;
-            t = t.sq();
-        }
-        return fe;
-    }
-
-    /// Negate a field element.
-    pub fn neg(a: Fe) Fe {
-        var fe: Fe = undefined;
-        fiat.p256Opp(&fe.limbs, a.limbs);
-        return fe;
-    }
-
-    /// Return the inverse of a field element, or 0 if a=0.
-    // Field inversion from https://eprint.iacr.org/2021/549.pdf
-    pub fn invert(a: Fe) Fe {
-        const len_prime = 256;
-        const iterations = (49 * len_prime + 57) / 17;
-        const Word = @TypeOf(a.limbs[0]);
-        const XLimbs = [a.limbs.len + 1]Word;
-
-        var d: Word = 1;
-        var f: XLimbs = undefined;
-        fiat.p256Msat(&f);
-
-        var g: XLimbs = undefined;
-        fiat.p256FromMontgomery(g[0..a.limbs.len], a.limbs);
-        g[g.len - 1] = 0;
-
-        var r: Limbs = undefined;
-        fiat.p256SetOne(&r);
-        var v = mem.zeroes(Limbs);
-
-        var precomp: Limbs = undefined;
-        fiat.p256DivstepPrecomp(&precomp);
-
-        var out1: Word = undefined;
-        var out2: XLimbs = undefined;
-        var out3: XLimbs = undefined;
-        var out4: Limbs = undefined;
-        var out5: Limbs = undefined;
-
-        var i: usize = 0;
-        while (i < iterations - iterations % 2) : (i += 2) {
-            fiat.p256Divstep(&out1, &out2, &out3, &out4, &out5, d, f, g, v, r);
-            fiat.p256Divstep(&d, &f, &g, &v, &r, out1, out2, out3, out4, out5);
-        }
-        if (iterations % 2 != 0) {
-            fiat.p256Divstep(&out1, &out2, &out3, &out4, &out5, d, f, g, v, r);
-            mem.copy(Word, &v, &out4);
-            mem.copy(Word, &f, &out2);
-        }
-        var v_opp: Limbs = undefined;
-        fiat.p256Opp(&v_opp, v);
-        fiat.p256Selectznz(&v, @truncate(u1, f[f.len - 1] >> (meta.bitCount(Word) - 1)), v, v_opp);
-        var fe: Fe = undefined;
-        fiat.p256Mul(&fe.limbs, v, precomp);
-        return fe;
-    }
-
-    /// Return true if the field element is a square.
-    pub fn isSquare(x2: Fe) bool {
-        const t110 = x2.mul(x2.sq()).sq();
-        const t111 = x2.mul(t110);
-        const t111111 = t111.mul(x2.mul(t110).sqn(3));
-        const x15 = t111111.sqn(6).mul(t111111).sqn(3).mul(t111);
-        const x16 = x15.sq().mul(x2);
-        const x53 = x16.sqn(16).mul(x16).sqn(15);
-        const x47 = x15.mul(x53);
-        const ls = x47.mul(((x53.sqn(17).mul(x2)).sqn(143).mul(x47)).sqn(47)).sq().mul(x2); // Legendre symbol, (p-1)/2
-        return ls.equivalent(Fe.one);
-    }
-
-    // x=x2^((field_order+1)/4) w/ field order=3 (mod 4).
-    fn uncheckedSqrt(x2: Fe) Fe {
-        comptime debug.assert(field_order % 4 == 3);
-        const t11 = x2.mul(x2.sq());
-        const t1111 = t11.mul(t11.sqn(2));
-        const t11111111 = t1111.mul(t1111.sqn(4));
-        const x16 = t11111111.sqn(8).mul(t11111111);
-        return x16.sqn(16).mul(x16).sqn(32).mul(x2).sqn(96).mul(x2).sqn(94);
-    }
-
-    /// Compute the square root of `x2`, returning `error.NotSquare` if `x2` was not a square.
-    pub fn sqrt(x2: Fe) NotSquareError!Fe {
-        const x = x2.uncheckedSqrt();
-        if (x.sq().equivalent(x2)) {
-            return x;
-        }
-        return error.NotSquare;
-    }
-};
+pub const Fe = Field(.{
+    .fiat = @import("p256_64.zig"),
+    .field_order = 115792089210356248762697446949407573530086143415290314195533631308867097853951,
+    .field_bits = 256,
+    .saturated_bits = 255,
+    .encoded_length = 32,
+});
lib/std/crypto/pcurves/p256/p256_64.zig
@@ -1,11 +1,5 @@
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2015-2021 Zig Contributors
-// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
-// The MIT license requires this copyright notice to be included in all copies
-// and substantial portions of the software.
-
-// Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --lang Zig --internal-static --public-function-case camelCase --private-function-case camelCase p256 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp
-// curve description: p256
+// Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --lang Zig --internal-static --public-function-case camelCase --private-function-case camelCase --no-prefix-fiat --package-name p256 '' 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp
+// curve description (via package name): p256
 // machine_wordsize = 64 (from "64")
 // requested operations: mul, square, add, sub, opp, from_montgomery, to_montgomery, nonzero, selectznz, to_bytes, from_bytes, one, msat, divstep, divstep_precomp
 // m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1")
@@ -20,10 +14,16 @@
 // Computed values:
 // eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192)
 // bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248)
+// twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in
+//                          if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256
+
+const std = @import("std");
+const cast = std.meta.cast;
+const mode = std.builtin.mode; // Checked arithmetic is disabled in non-debug modes to avoid side channels
 
 pub const Limbs = [4]u64;
 
-/// The function p256AddcarryxU64 is an addition with carry.
+/// The function addcarryxU64 is an addition with carry.
 /// Postconditions:
 ///   out1 = (arg1 + arg2 + arg3) mod 2^64
 ///   out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
@@ -35,14 +35,16 @@ pub const Limbs = [4]u64;
 /// Output Bounds:
 ///   out1: [0x0 ~> 0xffffffffffffffff]
 ///   out2: [0x0 ~> 0x1]
-fn p256AddcarryxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+fn addcarryxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+    @setRuntimeSafety(mode == .Debug);
+
     var t: u64 = undefined;
     const carry1 = @addWithOverflow(u64, arg2, arg3, &t);
     const carry2 = @addWithOverflow(u64, t, arg1, out1);
     out2.* = @boolToInt(carry1) | @boolToInt(carry2);
 }
 
-/// The function p256SubborrowxU64 is a subtraction with borrow.
+/// The function subborrowxU64 is a subtraction with borrow.
 /// Postconditions:
 ///   out1 = (-arg1 + arg2 + -arg3) mod 2^64
 ///   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
@@ -54,14 +56,16 @@ fn p256AddcarryxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) callc
 /// Output Bounds:
 ///   out1: [0x0 ~> 0xffffffffffffffff]
 ///   out2: [0x0 ~> 0x1]
-fn p256SubborrowxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+fn subborrowxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+    @setRuntimeSafety(mode == .Debug);
+
     var t: u64 = undefined;
     const carry1 = @subWithOverflow(u64, arg2, arg3, &t);
     const carry2 = @subWithOverflow(u64, t, arg1, out1);
     out2.* = @boolToInt(carry1) | @boolToInt(carry2);
 }
 
-/// The function p256MulxU64 is a multiplication, returning the full double-width result.
+/// The function mulxU64 is a multiplication, returning the full double-width result.
 /// Postconditions:
 ///   out1 = (arg1 * arg2) mod 2^64
 ///   out2 = ⌊arg1 * arg2 / 2^64⌋
@@ -72,13 +76,15 @@ fn p256SubborrowxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) call
 /// Output Bounds:
 ///   out1: [0x0 ~> 0xffffffffffffffff]
 ///   out2: [0x0 ~> 0xffffffffffffffff]
-fn p256MulxU64(out1: *u64, out2: *u64, arg1: u64, arg2: u64) callconv(.Inline) void {
+fn mulxU64(out1: *u64, out2: *u64, arg1: u64, arg2: u64) callconv(.Inline) void {
+    @setRuntimeSafety(mode == .Debug);
+
     const x = @as(u128, arg1) * @as(u128, arg2);
     out1.* = @truncate(u64, x);
     out2.* = @truncate(u64, x >> 64);
 }
 
-/// The function p256CmovznzU64 is a single-word conditional move.
+/// The function cmovznzU64 is a single-word conditional move.
 /// Postconditions:
 ///   out1 = (if arg1 = 0 then arg2 else arg3)
 ///
@@ -88,12 +94,14 @@ fn p256MulxU64(out1: *u64, out2: *u64, arg1: u64, arg2: u64) callconv(.Inline) v
 ///   arg3: [0x0 ~> 0xffffffffffffffff]
 /// Output Bounds:
 ///   out1: [0x0 ~> 0xffffffffffffffff]
-fn p256CmovznzU64(out1: *u64, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+fn cmovznzU64(out1: *u64, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+    @setRuntimeSafety(mode == .Debug);
+
     const mask = 0 -% @as(u64, arg1);
     out1.* = (mask & arg3) | ((~mask) & arg2);
 }
 
-/// The function p256Mul multiplies two field elements in the Montgomery domain.
+/// The function mul multiplies two field elements in the Montgomery domain.
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 ///   0 ≤ eval arg2 < m
@@ -106,289 +114,291 @@ fn p256CmovznzU64(out1: *u64, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline)
 ///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256Mul(out1: *Limbs, arg1: Limbs, arg2: Limbs) void {
-    const x1: u64 = (arg1[1]);
-    const x2: u64 = (arg1[2]);
-    const x3: u64 = (arg1[3]);
-    const x4: u64 = (arg1[0]);
+pub fn mul(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (arg1[1]);
+    const x2 = (arg1[2]);
+    const x3 = (arg1[3]);
+    const x4 = (arg1[0]);
     var x5: u64 = undefined;
     var x6: u64 = undefined;
-    p256MulxU64(&x5, &x6, x4, (arg2[3]));
+    mulxU64(&x5, &x6, x4, (arg2[3]));
     var x7: u64 = undefined;
     var x8: u64 = undefined;
-    p256MulxU64(&x7, &x8, x4, (arg2[2]));
+    mulxU64(&x7, &x8, x4, (arg2[2]));
     var x9: u64 = undefined;
     var x10: u64 = undefined;
-    p256MulxU64(&x9, &x10, x4, (arg2[1]));
+    mulxU64(&x9, &x10, x4, (arg2[1]));
     var x11: u64 = undefined;
     var x12: u64 = undefined;
-    p256MulxU64(&x11, &x12, x4, (arg2[0]));
+    mulxU64(&x11, &x12, x4, (arg2[0]));
     var x13: u64 = undefined;
     var x14: u1 = undefined;
-    p256AddcarryxU64(&x13, &x14, 0x0, x12, x9);
+    addcarryxU64(&x13, &x14, 0x0, x12, x9);
     var x15: u64 = undefined;
     var x16: u1 = undefined;
-    p256AddcarryxU64(&x15, &x16, x14, x10, x7);
+    addcarryxU64(&x15, &x16, x14, x10, x7);
     var x17: u64 = undefined;
     var x18: u1 = undefined;
-    p256AddcarryxU64(&x17, &x18, x16, x8, x5);
-    const x19: u64 = (@intCast(u64, x18) + x6);
+    addcarryxU64(&x17, &x18, x16, x8, x5);
+    const x19 = (cast(u64, x18) + x6);
     var x20: u64 = undefined;
     var x21: u64 = undefined;
-    p256MulxU64(&x20, &x21, x11, 0xffffffff00000001);
+    mulxU64(&x20, &x21, x11, 0xffffffff00000001);
     var x22: u64 = undefined;
     var x23: u64 = undefined;
-    p256MulxU64(&x22, &x23, x11, 0xffffffff);
+    mulxU64(&x22, &x23, x11, 0xffffffff);
     var x24: u64 = undefined;
     var x25: u64 = undefined;
-    p256MulxU64(&x24, &x25, x11, 0xffffffffffffffff);
+    mulxU64(&x24, &x25, x11, 0xffffffffffffffff);
     var x26: u64 = undefined;
     var x27: u1 = undefined;
-    p256AddcarryxU64(&x26, &x27, 0x0, x25, x22);
-    const x28: u64 = (@intCast(u64, x27) + x23);
+    addcarryxU64(&x26, &x27, 0x0, x25, x22);
+    const x28 = (cast(u64, x27) + x23);
     var x29: u64 = undefined;
     var x30: u1 = undefined;
-    p256AddcarryxU64(&x29, &x30, 0x0, x11, x24);
+    addcarryxU64(&x29, &x30, 0x0, x11, x24);
     var x31: u64 = undefined;
     var x32: u1 = undefined;
-    p256AddcarryxU64(&x31, &x32, x30, x13, x26);
+    addcarryxU64(&x31, &x32, x30, x13, x26);
     var x33: u64 = undefined;
     var x34: u1 = undefined;
-    p256AddcarryxU64(&x33, &x34, x32, x15, x28);
+    addcarryxU64(&x33, &x34, x32, x15, x28);
     var x35: u64 = undefined;
     var x36: u1 = undefined;
-    p256AddcarryxU64(&x35, &x36, x34, x17, x20);
+    addcarryxU64(&x35, &x36, x34, x17, x20);
     var x37: u64 = undefined;
     var x38: u1 = undefined;
-    p256AddcarryxU64(&x37, &x38, x36, x19, x21);
+    addcarryxU64(&x37, &x38, x36, x19, x21);
     var x39: u64 = undefined;
     var x40: u64 = undefined;
-    p256MulxU64(&x39, &x40, x1, (arg2[3]));
+    mulxU64(&x39, &x40, x1, (arg2[3]));
     var x41: u64 = undefined;
     var x42: u64 = undefined;
-    p256MulxU64(&x41, &x42, x1, (arg2[2]));
+    mulxU64(&x41, &x42, x1, (arg2[2]));
     var x43: u64 = undefined;
     var x44: u64 = undefined;
-    p256MulxU64(&x43, &x44, x1, (arg2[1]));
+    mulxU64(&x43, &x44, x1, (arg2[1]));
     var x45: u64 = undefined;
     var x46: u64 = undefined;
-    p256MulxU64(&x45, &x46, x1, (arg2[0]));
+    mulxU64(&x45, &x46, x1, (arg2[0]));
     var x47: u64 = undefined;
     var x48: u1 = undefined;
-    p256AddcarryxU64(&x47, &x48, 0x0, x46, x43);
+    addcarryxU64(&x47, &x48, 0x0, x46, x43);
     var x49: u64 = undefined;
     var x50: u1 = undefined;
-    p256AddcarryxU64(&x49, &x50, x48, x44, x41);
+    addcarryxU64(&x49, &x50, x48, x44, x41);
     var x51: u64 = undefined;
     var x52: u1 = undefined;
-    p256AddcarryxU64(&x51, &x52, x50, x42, x39);
-    const x53: u64 = (@intCast(u64, x52) + x40);
+    addcarryxU64(&x51, &x52, x50, x42, x39);
+    const x53 = (cast(u64, x52) + x40);
     var x54: u64 = undefined;
     var x55: u1 = undefined;
-    p256AddcarryxU64(&x54, &x55, 0x0, x31, x45);
+    addcarryxU64(&x54, &x55, 0x0, x31, x45);
     var x56: u64 = undefined;
     var x57: u1 = undefined;
-    p256AddcarryxU64(&x56, &x57, x55, x33, x47);
+    addcarryxU64(&x56, &x57, x55, x33, x47);
     var x58: u64 = undefined;
     var x59: u1 = undefined;
-    p256AddcarryxU64(&x58, &x59, x57, x35, x49);
+    addcarryxU64(&x58, &x59, x57, x35, x49);
     var x60: u64 = undefined;
     var x61: u1 = undefined;
-    p256AddcarryxU64(&x60, &x61, x59, x37, x51);
+    addcarryxU64(&x60, &x61, x59, x37, x51);
     var x62: u64 = undefined;
     var x63: u1 = undefined;
-    p256AddcarryxU64(&x62, &x63, x61, @intCast(u64, x38), x53);
+    addcarryxU64(&x62, &x63, x61, cast(u64, x38), x53);
     var x64: u64 = undefined;
     var x65: u64 = undefined;
-    p256MulxU64(&x64, &x65, x54, 0xffffffff00000001);
+    mulxU64(&x64, &x65, x54, 0xffffffff00000001);
     var x66: u64 = undefined;
     var x67: u64 = undefined;
-    p256MulxU64(&x66, &x67, x54, 0xffffffff);
+    mulxU64(&x66, &x67, x54, 0xffffffff);
     var x68: u64 = undefined;
     var x69: u64 = undefined;
-    p256MulxU64(&x68, &x69, x54, 0xffffffffffffffff);
+    mulxU64(&x68, &x69, x54, 0xffffffffffffffff);
     var x70: u64 = undefined;
     var x71: u1 = undefined;
-    p256AddcarryxU64(&x70, &x71, 0x0, x69, x66);
-    const x72: u64 = (@intCast(u64, x71) + x67);
+    addcarryxU64(&x70, &x71, 0x0, x69, x66);
+    const x72 = (cast(u64, x71) + x67);
     var x73: u64 = undefined;
     var x74: u1 = undefined;
-    p256AddcarryxU64(&x73, &x74, 0x0, x54, x68);
+    addcarryxU64(&x73, &x74, 0x0, x54, x68);
     var x75: u64 = undefined;
     var x76: u1 = undefined;
-    p256AddcarryxU64(&x75, &x76, x74, x56, x70);
+    addcarryxU64(&x75, &x76, x74, x56, x70);
     var x77: u64 = undefined;
     var x78: u1 = undefined;
-    p256AddcarryxU64(&x77, &x78, x76, x58, x72);
+    addcarryxU64(&x77, &x78, x76, x58, x72);
     var x79: u64 = undefined;
     var x80: u1 = undefined;
-    p256AddcarryxU64(&x79, &x80, x78, x60, x64);
+    addcarryxU64(&x79, &x80, x78, x60, x64);
     var x81: u64 = undefined;
     var x82: u1 = undefined;
-    p256AddcarryxU64(&x81, &x82, x80, x62, x65);
-    const x83: u64 = (@intCast(u64, x82) + @intCast(u64, x63));
+    addcarryxU64(&x81, &x82, x80, x62, x65);
+    const x83 = (cast(u64, x82) + cast(u64, x63));
     var x84: u64 = undefined;
     var x85: u64 = undefined;
-    p256MulxU64(&x84, &x85, x2, (arg2[3]));
+    mulxU64(&x84, &x85, x2, (arg2[3]));
     var x86: u64 = undefined;
     var x87: u64 = undefined;
-    p256MulxU64(&x86, &x87, x2, (arg2[2]));
+    mulxU64(&x86, &x87, x2, (arg2[2]));
     var x88: u64 = undefined;
     var x89: u64 = undefined;
-    p256MulxU64(&x88, &x89, x2, (arg2[1]));
+    mulxU64(&x88, &x89, x2, (arg2[1]));
     var x90: u64 = undefined;
     var x91: u64 = undefined;
-    p256MulxU64(&x90, &x91, x2, (arg2[0]));
+    mulxU64(&x90, &x91, x2, (arg2[0]));
     var x92: u64 = undefined;
     var x93: u1 = undefined;
-    p256AddcarryxU64(&x92, &x93, 0x0, x91, x88);
+    addcarryxU64(&x92, &x93, 0x0, x91, x88);
     var x94: u64 = undefined;
     var x95: u1 = undefined;
-    p256AddcarryxU64(&x94, &x95, x93, x89, x86);
+    addcarryxU64(&x94, &x95, x93, x89, x86);
     var x96: u64 = undefined;
     var x97: u1 = undefined;
-    p256AddcarryxU64(&x96, &x97, x95, x87, x84);
-    const x98: u64 = (@intCast(u64, x97) + x85);
+    addcarryxU64(&x96, &x97, x95, x87, x84);
+    const x98 = (cast(u64, x97) + x85);
     var x99: u64 = undefined;
     var x100: u1 = undefined;
-    p256AddcarryxU64(&x99, &x100, 0x0, x75, x90);
+    addcarryxU64(&x99, &x100, 0x0, x75, x90);
     var x101: u64 = undefined;
     var x102: u1 = undefined;
-    p256AddcarryxU64(&x101, &x102, x100, x77, x92);
+    addcarryxU64(&x101, &x102, x100, x77, x92);
     var x103: u64 = undefined;
     var x104: u1 = undefined;
-    p256AddcarryxU64(&x103, &x104, x102, x79, x94);
+    addcarryxU64(&x103, &x104, x102, x79, x94);
     var x105: u64 = undefined;
     var x106: u1 = undefined;
-    p256AddcarryxU64(&x105, &x106, x104, x81, x96);
+    addcarryxU64(&x105, &x106, x104, x81, x96);
     var x107: u64 = undefined;
     var x108: u1 = undefined;
-    p256AddcarryxU64(&x107, &x108, x106, x83, x98);
+    addcarryxU64(&x107, &x108, x106, x83, x98);
     var x109: u64 = undefined;
     var x110: u64 = undefined;
-    p256MulxU64(&x109, &x110, x99, 0xffffffff00000001);
+    mulxU64(&x109, &x110, x99, 0xffffffff00000001);
     var x111: u64 = undefined;
     var x112: u64 = undefined;
-    p256MulxU64(&x111, &x112, x99, 0xffffffff);
+    mulxU64(&x111, &x112, x99, 0xffffffff);
     var x113: u64 = undefined;
     var x114: u64 = undefined;
-    p256MulxU64(&x113, &x114, x99, 0xffffffffffffffff);
+    mulxU64(&x113, &x114, x99, 0xffffffffffffffff);
     var x115: u64 = undefined;
     var x116: u1 = undefined;
-    p256AddcarryxU64(&x115, &x116, 0x0, x114, x111);
-    const x117: u64 = (@intCast(u64, x116) + x112);
+    addcarryxU64(&x115, &x116, 0x0, x114, x111);
+    const x117 = (cast(u64, x116) + x112);
     var x118: u64 = undefined;
     var x119: u1 = undefined;
-    p256AddcarryxU64(&x118, &x119, 0x0, x99, x113);
+    addcarryxU64(&x118, &x119, 0x0, x99, x113);
     var x120: u64 = undefined;
     var x121: u1 = undefined;
-    p256AddcarryxU64(&x120, &x121, x119, x101, x115);
+    addcarryxU64(&x120, &x121, x119, x101, x115);
     var x122: u64 = undefined;
     var x123: u1 = undefined;
-    p256AddcarryxU64(&x122, &x123, x121, x103, x117);
+    addcarryxU64(&x122, &x123, x121, x103, x117);
     var x124: u64 = undefined;
     var x125: u1 = undefined;
-    p256AddcarryxU64(&x124, &x125, x123, x105, x109);
+    addcarryxU64(&x124, &x125, x123, x105, x109);
     var x126: u64 = undefined;
     var x127: u1 = undefined;
-    p256AddcarryxU64(&x126, &x127, x125, x107, x110);
-    const x128: u64 = (@intCast(u64, x127) + @intCast(u64, x108));
+    addcarryxU64(&x126, &x127, x125, x107, x110);
+    const x128 = (cast(u64, x127) + cast(u64, x108));
     var x129: u64 = undefined;
     var x130: u64 = undefined;
-    p256MulxU64(&x129, &x130, x3, (arg2[3]));
+    mulxU64(&x129, &x130, x3, (arg2[3]));
     var x131: u64 = undefined;
     var x132: u64 = undefined;
-    p256MulxU64(&x131, &x132, x3, (arg2[2]));
+    mulxU64(&x131, &x132, x3, (arg2[2]));
     var x133: u64 = undefined;
     var x134: u64 = undefined;
-    p256MulxU64(&x133, &x134, x3, (arg2[1]));
+    mulxU64(&x133, &x134, x3, (arg2[1]));
     var x135: u64 = undefined;
     var x136: u64 = undefined;
-    p256MulxU64(&x135, &x136, x3, (arg2[0]));
+    mulxU64(&x135, &x136, x3, (arg2[0]));
     var x137: u64 = undefined;
     var x138: u1 = undefined;
-    p256AddcarryxU64(&x137, &x138, 0x0, x136, x133);
+    addcarryxU64(&x137, &x138, 0x0, x136, x133);
     var x139: u64 = undefined;
     var x140: u1 = undefined;
-    p256AddcarryxU64(&x139, &x140, x138, x134, x131);
+    addcarryxU64(&x139, &x140, x138, x134, x131);
     var x141: u64 = undefined;
     var x142: u1 = undefined;
-    p256AddcarryxU64(&x141, &x142, x140, x132, x129);
-    const x143: u64 = (@intCast(u64, x142) + x130);
+    addcarryxU64(&x141, &x142, x140, x132, x129);
+    const x143 = (cast(u64, x142) + x130);
     var x144: u64 = undefined;
     var x145: u1 = undefined;
-    p256AddcarryxU64(&x144, &x145, 0x0, x120, x135);
+    addcarryxU64(&x144, &x145, 0x0, x120, x135);
     var x146: u64 = undefined;
     var x147: u1 = undefined;
-    p256AddcarryxU64(&x146, &x147, x145, x122, x137);
+    addcarryxU64(&x146, &x147, x145, x122, x137);
     var x148: u64 = undefined;
     var x149: u1 = undefined;
-    p256AddcarryxU64(&x148, &x149, x147, x124, x139);
+    addcarryxU64(&x148, &x149, x147, x124, x139);
     var x150: u64 = undefined;
     var x151: u1 = undefined;
-    p256AddcarryxU64(&x150, &x151, x149, x126, x141);
+    addcarryxU64(&x150, &x151, x149, x126, x141);
     var x152: u64 = undefined;
     var x153: u1 = undefined;
-    p256AddcarryxU64(&x152, &x153, x151, x128, x143);
+    addcarryxU64(&x152, &x153, x151, x128, x143);
     var x154: u64 = undefined;
     var x155: u64 = undefined;
-    p256MulxU64(&x154, &x155, x144, 0xffffffff00000001);
+    mulxU64(&x154, &x155, x144, 0xffffffff00000001);
     var x156: u64 = undefined;
     var x157: u64 = undefined;
-    p256MulxU64(&x156, &x157, x144, 0xffffffff);
+    mulxU64(&x156, &x157, x144, 0xffffffff);
     var x158: u64 = undefined;
     var x159: u64 = undefined;
-    p256MulxU64(&x158, &x159, x144, 0xffffffffffffffff);
+    mulxU64(&x158, &x159, x144, 0xffffffffffffffff);
     var x160: u64 = undefined;
     var x161: u1 = undefined;
-    p256AddcarryxU64(&x160, &x161, 0x0, x159, x156);
-    const x162: u64 = (@intCast(u64, x161) + x157);
+    addcarryxU64(&x160, &x161, 0x0, x159, x156);
+    const x162 = (cast(u64, x161) + x157);
     var x163: u64 = undefined;
     var x164: u1 = undefined;
-    p256AddcarryxU64(&x163, &x164, 0x0, x144, x158);
+    addcarryxU64(&x163, &x164, 0x0, x144, x158);
     var x165: u64 = undefined;
     var x166: u1 = undefined;
-    p256AddcarryxU64(&x165, &x166, x164, x146, x160);
+    addcarryxU64(&x165, &x166, x164, x146, x160);
     var x167: u64 = undefined;
     var x168: u1 = undefined;
-    p256AddcarryxU64(&x167, &x168, x166, x148, x162);
+    addcarryxU64(&x167, &x168, x166, x148, x162);
     var x169: u64 = undefined;
     var x170: u1 = undefined;
-    p256AddcarryxU64(&x169, &x170, x168, x150, x154);
+    addcarryxU64(&x169, &x170, x168, x150, x154);
     var x171: u64 = undefined;
     var x172: u1 = undefined;
-    p256AddcarryxU64(&x171, &x172, x170, x152, x155);
-    const x173: u64 = (@intCast(u64, x172) + @intCast(u64, x153));
+    addcarryxU64(&x171, &x172, x170, x152, x155);
+    const x173 = (cast(u64, x172) + cast(u64, x153));
     var x174: u64 = undefined;
     var x175: u1 = undefined;
-    p256SubborrowxU64(&x174, &x175, 0x0, x165, 0xffffffffffffffff);
+    subborrowxU64(&x174, &x175, 0x0, x165, 0xffffffffffffffff);
     var x176: u64 = undefined;
     var x177: u1 = undefined;
-    p256SubborrowxU64(&x176, &x177, x175, x167, 0xffffffff);
+    subborrowxU64(&x176, &x177, x175, x167, 0xffffffff);
     var x178: u64 = undefined;
     var x179: u1 = undefined;
-    p256SubborrowxU64(&x178, &x179, x177, x169, @intCast(u64, 0x0));
+    subborrowxU64(&x178, &x179, x177, x169, cast(u64, 0x0));
     var x180: u64 = undefined;
     var x181: u1 = undefined;
-    p256SubborrowxU64(&x180, &x181, x179, x171, 0xffffffff00000001);
+    subborrowxU64(&x180, &x181, x179, x171, 0xffffffff00000001);
     var x182: u64 = undefined;
     var x183: u1 = undefined;
-    p256SubborrowxU64(&x182, &x183, x181, x173, @intCast(u64, 0x0));
+    subborrowxU64(&x182, &x183, x181, x173, cast(u64, 0x0));
     var x184: u64 = undefined;
-    p256CmovznzU64(&x184, x183, x174, x165);
+    cmovznzU64(&x184, x183, x174, x165);
     var x185: u64 = undefined;
-    p256CmovznzU64(&x185, x183, x176, x167);
+    cmovznzU64(&x185, x183, x176, x167);
     var x186: u64 = undefined;
-    p256CmovznzU64(&x186, x183, x178, x169);
+    cmovznzU64(&x186, x183, x178, x169);
     var x187: u64 = undefined;
-    p256CmovznzU64(&x187, x183, x180, x171);
+    cmovznzU64(&x187, x183, x180, x171);
     out1[0] = x184;
     out1[1] = x185;
     out1[2] = x186;
     out1[3] = x187;
 }
 
-/// The function p256Square squares a field element in the Montgomery domain.
+/// The function square squares a field element in the Montgomery domain.
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
@@ -399,289 +409,291 @@ pub fn p256Mul(out1: *Limbs, arg1: Limbs, arg2: Limbs) void {
 ///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256Square(out1: *Limbs, arg1: Limbs) void {
-    const x1: u64 = (arg1[1]);
-    const x2: u64 = (arg1[2]);
-    const x3: u64 = (arg1[3]);
-    const x4: u64 = (arg1[0]);
+pub fn square(out1: *[4]u64, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (arg1[1]);
+    const x2 = (arg1[2]);
+    const x3 = (arg1[3]);
+    const x4 = (arg1[0]);
     var x5: u64 = undefined;
     var x6: u64 = undefined;
-    p256MulxU64(&x5, &x6, x4, (arg1[3]));
+    mulxU64(&x5, &x6, x4, (arg1[3]));
     var x7: u64 = undefined;
     var x8: u64 = undefined;
-    p256MulxU64(&x7, &x8, x4, (arg1[2]));
+    mulxU64(&x7, &x8, x4, (arg1[2]));
     var x9: u64 = undefined;
     var x10: u64 = undefined;
-    p256MulxU64(&x9, &x10, x4, (arg1[1]));
+    mulxU64(&x9, &x10, x4, (arg1[1]));
     var x11: u64 = undefined;
     var x12: u64 = undefined;
-    p256MulxU64(&x11, &x12, x4, (arg1[0]));
+    mulxU64(&x11, &x12, x4, (arg1[0]));
     var x13: u64 = undefined;
     var x14: u1 = undefined;
-    p256AddcarryxU64(&x13, &x14, 0x0, x12, x9);
+    addcarryxU64(&x13, &x14, 0x0, x12, x9);
     var x15: u64 = undefined;
     var x16: u1 = undefined;
-    p256AddcarryxU64(&x15, &x16, x14, x10, x7);
+    addcarryxU64(&x15, &x16, x14, x10, x7);
     var x17: u64 = undefined;
     var x18: u1 = undefined;
-    p256AddcarryxU64(&x17, &x18, x16, x8, x5);
-    const x19: u64 = (@intCast(u64, x18) + x6);
+    addcarryxU64(&x17, &x18, x16, x8, x5);
+    const x19 = (cast(u64, x18) + x6);
     var x20: u64 = undefined;
     var x21: u64 = undefined;
-    p256MulxU64(&x20, &x21, x11, 0xffffffff00000001);
+    mulxU64(&x20, &x21, x11, 0xffffffff00000001);
     var x22: u64 = undefined;
     var x23: u64 = undefined;
-    p256MulxU64(&x22, &x23, x11, 0xffffffff);
+    mulxU64(&x22, &x23, x11, 0xffffffff);
     var x24: u64 = undefined;
     var x25: u64 = undefined;
-    p256MulxU64(&x24, &x25, x11, 0xffffffffffffffff);
+    mulxU64(&x24, &x25, x11, 0xffffffffffffffff);
     var x26: u64 = undefined;
     var x27: u1 = undefined;
-    p256AddcarryxU64(&x26, &x27, 0x0, x25, x22);
-    const x28: u64 = (@intCast(u64, x27) + x23);
+    addcarryxU64(&x26, &x27, 0x0, x25, x22);
+    const x28 = (cast(u64, x27) + x23);
     var x29: u64 = undefined;
     var x30: u1 = undefined;
-    p256AddcarryxU64(&x29, &x30, 0x0, x11, x24);
+    addcarryxU64(&x29, &x30, 0x0, x11, x24);
     var x31: u64 = undefined;
     var x32: u1 = undefined;
-    p256AddcarryxU64(&x31, &x32, x30, x13, x26);
+    addcarryxU64(&x31, &x32, x30, x13, x26);
     var x33: u64 = undefined;
     var x34: u1 = undefined;
-    p256AddcarryxU64(&x33, &x34, x32, x15, x28);
+    addcarryxU64(&x33, &x34, x32, x15, x28);
     var x35: u64 = undefined;
     var x36: u1 = undefined;
-    p256AddcarryxU64(&x35, &x36, x34, x17, x20);
+    addcarryxU64(&x35, &x36, x34, x17, x20);
     var x37: u64 = undefined;
     var x38: u1 = undefined;
-    p256AddcarryxU64(&x37, &x38, x36, x19, x21);
+    addcarryxU64(&x37, &x38, x36, x19, x21);
     var x39: u64 = undefined;
     var x40: u64 = undefined;
-    p256MulxU64(&x39, &x40, x1, (arg1[3]));
+    mulxU64(&x39, &x40, x1, (arg1[3]));
     var x41: u64 = undefined;
     var x42: u64 = undefined;
-    p256MulxU64(&x41, &x42, x1, (arg1[2]));
+    mulxU64(&x41, &x42, x1, (arg1[2]));
     var x43: u64 = undefined;
     var x44: u64 = undefined;
-    p256MulxU64(&x43, &x44, x1, (arg1[1]));
+    mulxU64(&x43, &x44, x1, (arg1[1]));
     var x45: u64 = undefined;
     var x46: u64 = undefined;
-    p256MulxU64(&x45, &x46, x1, (arg1[0]));
+    mulxU64(&x45, &x46, x1, (arg1[0]));
     var x47: u64 = undefined;
     var x48: u1 = undefined;
-    p256AddcarryxU64(&x47, &x48, 0x0, x46, x43);
+    addcarryxU64(&x47, &x48, 0x0, x46, x43);
     var x49: u64 = undefined;
     var x50: u1 = undefined;
-    p256AddcarryxU64(&x49, &x50, x48, x44, x41);
+    addcarryxU64(&x49, &x50, x48, x44, x41);
     var x51: u64 = undefined;
     var x52: u1 = undefined;
-    p256AddcarryxU64(&x51, &x52, x50, x42, x39);
-    const x53: u64 = (@intCast(u64, x52) + x40);
+    addcarryxU64(&x51, &x52, x50, x42, x39);
+    const x53 = (cast(u64, x52) + x40);
     var x54: u64 = undefined;
     var x55: u1 = undefined;
-    p256AddcarryxU64(&x54, &x55, 0x0, x31, x45);
+    addcarryxU64(&x54, &x55, 0x0, x31, x45);
     var x56: u64 = undefined;
     var x57: u1 = undefined;
-    p256AddcarryxU64(&x56, &x57, x55, x33, x47);
+    addcarryxU64(&x56, &x57, x55, x33, x47);
     var x58: u64 = undefined;
     var x59: u1 = undefined;
-    p256AddcarryxU64(&x58, &x59, x57, x35, x49);
+    addcarryxU64(&x58, &x59, x57, x35, x49);
     var x60: u64 = undefined;
     var x61: u1 = undefined;
-    p256AddcarryxU64(&x60, &x61, x59, x37, x51);
+    addcarryxU64(&x60, &x61, x59, x37, x51);
     var x62: u64 = undefined;
     var x63: u1 = undefined;
-    p256AddcarryxU64(&x62, &x63, x61, @intCast(u64, x38), x53);
+    addcarryxU64(&x62, &x63, x61, cast(u64, x38), x53);
     var x64: u64 = undefined;
     var x65: u64 = undefined;
-    p256MulxU64(&x64, &x65, x54, 0xffffffff00000001);
+    mulxU64(&x64, &x65, x54, 0xffffffff00000001);
     var x66: u64 = undefined;
     var x67: u64 = undefined;
-    p256MulxU64(&x66, &x67, x54, 0xffffffff);
+    mulxU64(&x66, &x67, x54, 0xffffffff);
     var x68: u64 = undefined;
     var x69: u64 = undefined;
-    p256MulxU64(&x68, &x69, x54, 0xffffffffffffffff);
+    mulxU64(&x68, &x69, x54, 0xffffffffffffffff);
     var x70: u64 = undefined;
     var x71: u1 = undefined;
-    p256AddcarryxU64(&x70, &x71, 0x0, x69, x66);
-    const x72: u64 = (@intCast(u64, x71) + x67);
+    addcarryxU64(&x70, &x71, 0x0, x69, x66);
+    const x72 = (cast(u64, x71) + x67);
     var x73: u64 = undefined;
     var x74: u1 = undefined;
-    p256AddcarryxU64(&x73, &x74, 0x0, x54, x68);
+    addcarryxU64(&x73, &x74, 0x0, x54, x68);
     var x75: u64 = undefined;
     var x76: u1 = undefined;
-    p256AddcarryxU64(&x75, &x76, x74, x56, x70);
+    addcarryxU64(&x75, &x76, x74, x56, x70);
     var x77: u64 = undefined;
     var x78: u1 = undefined;
-    p256AddcarryxU64(&x77, &x78, x76, x58, x72);
+    addcarryxU64(&x77, &x78, x76, x58, x72);
     var x79: u64 = undefined;
     var x80: u1 = undefined;
-    p256AddcarryxU64(&x79, &x80, x78, x60, x64);
+    addcarryxU64(&x79, &x80, x78, x60, x64);
     var x81: u64 = undefined;
     var x82: u1 = undefined;
-    p256AddcarryxU64(&x81, &x82, x80, x62, x65);
-    const x83: u64 = (@intCast(u64, x82) + @intCast(u64, x63));
+    addcarryxU64(&x81, &x82, x80, x62, x65);
+    const x83 = (cast(u64, x82) + cast(u64, x63));
     var x84: u64 = undefined;
     var x85: u64 = undefined;
-    p256MulxU64(&x84, &x85, x2, (arg1[3]));
+    mulxU64(&x84, &x85, x2, (arg1[3]));
     var x86: u64 = undefined;
     var x87: u64 = undefined;
-    p256MulxU64(&x86, &x87, x2, (arg1[2]));
+    mulxU64(&x86, &x87, x2, (arg1[2]));
     var x88: u64 = undefined;
     var x89: u64 = undefined;
-    p256MulxU64(&x88, &x89, x2, (arg1[1]));
+    mulxU64(&x88, &x89, x2, (arg1[1]));
     var x90: u64 = undefined;
     var x91: u64 = undefined;
-    p256MulxU64(&x90, &x91, x2, (arg1[0]));
+    mulxU64(&x90, &x91, x2, (arg1[0]));
     var x92: u64 = undefined;
     var x93: u1 = undefined;
-    p256AddcarryxU64(&x92, &x93, 0x0, x91, x88);
+    addcarryxU64(&x92, &x93, 0x0, x91, x88);
     var x94: u64 = undefined;
     var x95: u1 = undefined;
-    p256AddcarryxU64(&x94, &x95, x93, x89, x86);
+    addcarryxU64(&x94, &x95, x93, x89, x86);
     var x96: u64 = undefined;
     var x97: u1 = undefined;
-    p256AddcarryxU64(&x96, &x97, x95, x87, x84);
-    const x98: u64 = (@intCast(u64, x97) + x85);
+    addcarryxU64(&x96, &x97, x95, x87, x84);
+    const x98 = (cast(u64, x97) + x85);
     var x99: u64 = undefined;
     var x100: u1 = undefined;
-    p256AddcarryxU64(&x99, &x100, 0x0, x75, x90);
+    addcarryxU64(&x99, &x100, 0x0, x75, x90);
     var x101: u64 = undefined;
     var x102: u1 = undefined;
-    p256AddcarryxU64(&x101, &x102, x100, x77, x92);
+    addcarryxU64(&x101, &x102, x100, x77, x92);
     var x103: u64 = undefined;
     var x104: u1 = undefined;
-    p256AddcarryxU64(&x103, &x104, x102, x79, x94);
+    addcarryxU64(&x103, &x104, x102, x79, x94);
     var x105: u64 = undefined;
     var x106: u1 = undefined;
-    p256AddcarryxU64(&x105, &x106, x104, x81, x96);
+    addcarryxU64(&x105, &x106, x104, x81, x96);
     var x107: u64 = undefined;
     var x108: u1 = undefined;
-    p256AddcarryxU64(&x107, &x108, x106, x83, x98);
+    addcarryxU64(&x107, &x108, x106, x83, x98);
     var x109: u64 = undefined;
     var x110: u64 = undefined;
-    p256MulxU64(&x109, &x110, x99, 0xffffffff00000001);
+    mulxU64(&x109, &x110, x99, 0xffffffff00000001);
     var x111: u64 = undefined;
     var x112: u64 = undefined;
-    p256MulxU64(&x111, &x112, x99, 0xffffffff);
+    mulxU64(&x111, &x112, x99, 0xffffffff);
     var x113: u64 = undefined;
     var x114: u64 = undefined;
-    p256MulxU64(&x113, &x114, x99, 0xffffffffffffffff);
+    mulxU64(&x113, &x114, x99, 0xffffffffffffffff);
     var x115: u64 = undefined;
     var x116: u1 = undefined;
-    p256AddcarryxU64(&x115, &x116, 0x0, x114, x111);
-    const x117: u64 = (@intCast(u64, x116) + x112);
+    addcarryxU64(&x115, &x116, 0x0, x114, x111);
+    const x117 = (cast(u64, x116) + x112);
     var x118: u64 = undefined;
     var x119: u1 = undefined;
-    p256AddcarryxU64(&x118, &x119, 0x0, x99, x113);
+    addcarryxU64(&x118, &x119, 0x0, x99, x113);
     var x120: u64 = undefined;
     var x121: u1 = undefined;
-    p256AddcarryxU64(&x120, &x121, x119, x101, x115);
+    addcarryxU64(&x120, &x121, x119, x101, x115);
     var x122: u64 = undefined;
     var x123: u1 = undefined;
-    p256AddcarryxU64(&x122, &x123, x121, x103, x117);
+    addcarryxU64(&x122, &x123, x121, x103, x117);
     var x124: u64 = undefined;
     var x125: u1 = undefined;
-    p256AddcarryxU64(&x124, &x125, x123, x105, x109);
+    addcarryxU64(&x124, &x125, x123, x105, x109);
     var x126: u64 = undefined;
     var x127: u1 = undefined;
-    p256AddcarryxU64(&x126, &x127, x125, x107, x110);
-    const x128: u64 = (@intCast(u64, x127) + @intCast(u64, x108));
+    addcarryxU64(&x126, &x127, x125, x107, x110);
+    const x128 = (cast(u64, x127) + cast(u64, x108));
     var x129: u64 = undefined;
     var x130: u64 = undefined;
-    p256MulxU64(&x129, &x130, x3, (arg1[3]));
+    mulxU64(&x129, &x130, x3, (arg1[3]));
     var x131: u64 = undefined;
     var x132: u64 = undefined;
-    p256MulxU64(&x131, &x132, x3, (arg1[2]));
+    mulxU64(&x131, &x132, x3, (arg1[2]));
     var x133: u64 = undefined;
     var x134: u64 = undefined;
-    p256MulxU64(&x133, &x134, x3, (arg1[1]));
+    mulxU64(&x133, &x134, x3, (arg1[1]));
     var x135: u64 = undefined;
     var x136: u64 = undefined;
-    p256MulxU64(&x135, &x136, x3, (arg1[0]));
+    mulxU64(&x135, &x136, x3, (arg1[0]));
     var x137: u64 = undefined;
     var x138: u1 = undefined;
-    p256AddcarryxU64(&x137, &x138, 0x0, x136, x133);
+    addcarryxU64(&x137, &x138, 0x0, x136, x133);
     var x139: u64 = undefined;
     var x140: u1 = undefined;
-    p256AddcarryxU64(&x139, &x140, x138, x134, x131);
+    addcarryxU64(&x139, &x140, x138, x134, x131);
     var x141: u64 = undefined;
     var x142: u1 = undefined;
-    p256AddcarryxU64(&x141, &x142, x140, x132, x129);
-    const x143: u64 = (@intCast(u64, x142) + x130);
+    addcarryxU64(&x141, &x142, x140, x132, x129);
+    const x143 = (cast(u64, x142) + x130);
     var x144: u64 = undefined;
     var x145: u1 = undefined;
-    p256AddcarryxU64(&x144, &x145, 0x0, x120, x135);
+    addcarryxU64(&x144, &x145, 0x0, x120, x135);
     var x146: u64 = undefined;
     var x147: u1 = undefined;
-    p256AddcarryxU64(&x146, &x147, x145, x122, x137);
+    addcarryxU64(&x146, &x147, x145, x122, x137);
     var x148: u64 = undefined;
     var x149: u1 = undefined;
-    p256AddcarryxU64(&x148, &x149, x147, x124, x139);
+    addcarryxU64(&x148, &x149, x147, x124, x139);
     var x150: u64 = undefined;
     var x151: u1 = undefined;
-    p256AddcarryxU64(&x150, &x151, x149, x126, x141);
+    addcarryxU64(&x150, &x151, x149, x126, x141);
     var x152: u64 = undefined;
     var x153: u1 = undefined;
-    p256AddcarryxU64(&x152, &x153, x151, x128, x143);
+    addcarryxU64(&x152, &x153, x151, x128, x143);
     var x154: u64 = undefined;
     var x155: u64 = undefined;
-    p256MulxU64(&x154, &x155, x144, 0xffffffff00000001);
+    mulxU64(&x154, &x155, x144, 0xffffffff00000001);
     var x156: u64 = undefined;
     var x157: u64 = undefined;
-    p256MulxU64(&x156, &x157, x144, 0xffffffff);
+    mulxU64(&x156, &x157, x144, 0xffffffff);
     var x158: u64 = undefined;
     var x159: u64 = undefined;
-    p256MulxU64(&x158, &x159, x144, 0xffffffffffffffff);
+    mulxU64(&x158, &x159, x144, 0xffffffffffffffff);
     var x160: u64 = undefined;
     var x161: u1 = undefined;
-    p256AddcarryxU64(&x160, &x161, 0x0, x159, x156);
-    const x162: u64 = (@intCast(u64, x161) + x157);
+    addcarryxU64(&x160, &x161, 0x0, x159, x156);
+    const x162 = (cast(u64, x161) + x157);
     var x163: u64 = undefined;
     var x164: u1 = undefined;
-    p256AddcarryxU64(&x163, &x164, 0x0, x144, x158);
+    addcarryxU64(&x163, &x164, 0x0, x144, x158);
     var x165: u64 = undefined;
     var x166: u1 = undefined;
-    p256AddcarryxU64(&x165, &x166, x164, x146, x160);
+    addcarryxU64(&x165, &x166, x164, x146, x160);
     var x167: u64 = undefined;
     var x168: u1 = undefined;
-    p256AddcarryxU64(&x167, &x168, x166, x148, x162);
+    addcarryxU64(&x167, &x168, x166, x148, x162);
     var x169: u64 = undefined;
     var x170: u1 = undefined;
-    p256AddcarryxU64(&x169, &x170, x168, x150, x154);
+    addcarryxU64(&x169, &x170, x168, x150, x154);
     var x171: u64 = undefined;
     var x172: u1 = undefined;
-    p256AddcarryxU64(&x171, &x172, x170, x152, x155);
-    const x173: u64 = (@intCast(u64, x172) + @intCast(u64, x153));
+    addcarryxU64(&x171, &x172, x170, x152, x155);
+    const x173 = (cast(u64, x172) + cast(u64, x153));
     var x174: u64 = undefined;
     var x175: u1 = undefined;
-    p256SubborrowxU64(&x174, &x175, 0x0, x165, 0xffffffffffffffff);
+    subborrowxU64(&x174, &x175, 0x0, x165, 0xffffffffffffffff);
     var x176: u64 = undefined;
     var x177: u1 = undefined;
-    p256SubborrowxU64(&x176, &x177, x175, x167, 0xffffffff);
+    subborrowxU64(&x176, &x177, x175, x167, 0xffffffff);
     var x178: u64 = undefined;
     var x179: u1 = undefined;
-    p256SubborrowxU64(&x178, &x179, x177, x169, @intCast(u64, 0x0));
+    subborrowxU64(&x178, &x179, x177, x169, cast(u64, 0x0));
     var x180: u64 = undefined;
     var x181: u1 = undefined;
-    p256SubborrowxU64(&x180, &x181, x179, x171, 0xffffffff00000001);
+    subborrowxU64(&x180, &x181, x179, x171, 0xffffffff00000001);
     var x182: u64 = undefined;
     var x183: u1 = undefined;
-    p256SubborrowxU64(&x182, &x183, x181, x173, @intCast(u64, 0x0));
+    subborrowxU64(&x182, &x183, x181, x173, cast(u64, 0x0));
     var x184: u64 = undefined;
-    p256CmovznzU64(&x184, x183, x174, x165);
+    cmovznzU64(&x184, x183, x174, x165);
     var x185: u64 = undefined;
-    p256CmovznzU64(&x185, x183, x176, x167);
+    cmovznzU64(&x185, x183, x176, x167);
     var x186: u64 = undefined;
-    p256CmovznzU64(&x186, x183, x178, x169);
+    cmovznzU64(&x186, x183, x178, x169);
     var x187: u64 = undefined;
-    p256CmovznzU64(&x187, x183, x180, x171);
+    cmovznzU64(&x187, x183, x180, x171);
     out1[0] = x184;
     out1[1] = x185;
     out1[2] = x186;
     out1[3] = x187;
 }
 
-/// The function p256Add adds two field elements in the Montgomery domain.
+/// The function add adds two field elements in the Montgomery domain.
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 ///   0 ≤ eval arg2 < m
@@ -694,49 +706,51 @@ pub fn p256Square(out1: *Limbs, arg1: Limbs) void {
 ///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256Add(out1: *Limbs, arg1: Limbs, arg2: Limbs) void {
+pub fn add(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
     var x1: u64 = undefined;
     var x2: u1 = undefined;
-    p256AddcarryxU64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+    addcarryxU64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
     var x3: u64 = undefined;
     var x4: u1 = undefined;
-    p256AddcarryxU64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+    addcarryxU64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
     var x5: u64 = undefined;
     var x6: u1 = undefined;
-    p256AddcarryxU64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+    addcarryxU64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
     var x7: u64 = undefined;
     var x8: u1 = undefined;
-    p256AddcarryxU64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+    addcarryxU64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
     var x9: u64 = undefined;
     var x10: u1 = undefined;
-    p256SubborrowxU64(&x9, &x10, 0x0, x1, 0xffffffffffffffff);
+    subborrowxU64(&x9, &x10, 0x0, x1, 0xffffffffffffffff);
     var x11: u64 = undefined;
     var x12: u1 = undefined;
-    p256SubborrowxU64(&x11, &x12, x10, x3, 0xffffffff);
+    subborrowxU64(&x11, &x12, x10, x3, 0xffffffff);
     var x13: u64 = undefined;
     var x14: u1 = undefined;
-    p256SubborrowxU64(&x13, &x14, x12, x5, @intCast(u64, 0x0));
+    subborrowxU64(&x13, &x14, x12, x5, cast(u64, 0x0));
     var x15: u64 = undefined;
     var x16: u1 = undefined;
-    p256SubborrowxU64(&x15, &x16, x14, x7, 0xffffffff00000001);
+    subborrowxU64(&x15, &x16, x14, x7, 0xffffffff00000001);
     var x17: u64 = undefined;
     var x18: u1 = undefined;
-    p256SubborrowxU64(&x17, &x18, x16, @intCast(u64, x8), @intCast(u64, 0x0));
+    subborrowxU64(&x17, &x18, x16, cast(u64, x8), cast(u64, 0x0));
     var x19: u64 = undefined;
-    p256CmovznzU64(&x19, x18, x9, x1);
+    cmovznzU64(&x19, x18, x9, x1);
     var x20: u64 = undefined;
-    p256CmovznzU64(&x20, x18, x11, x3);
+    cmovznzU64(&x20, x18, x11, x3);
     var x21: u64 = undefined;
-    p256CmovznzU64(&x21, x18, x13, x5);
+    cmovznzU64(&x21, x18, x13, x5);
     var x22: u64 = undefined;
-    p256CmovznzU64(&x22, x18, x15, x7);
+    cmovznzU64(&x22, x18, x15, x7);
     out1[0] = x19;
     out1[1] = x20;
     out1[2] = x21;
     out1[3] = x22;
 }
 
-/// The function p256Sub subtracts two field elements in the Montgomery domain.
+/// The function sub subtracts two field elements in the Montgomery domain.
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 ///   0 ≤ eval arg2 < m
@@ -749,40 +763,42 @@ pub fn p256Add(out1: *Limbs, arg1: Limbs, arg2: Limbs) void {
 ///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256Sub(out1: *Limbs, arg1: Limbs, arg2: Limbs) void {
+pub fn sub(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
     var x1: u64 = undefined;
     var x2: u1 = undefined;
-    p256SubborrowxU64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+    subborrowxU64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
     var x3: u64 = undefined;
     var x4: u1 = undefined;
-    p256SubborrowxU64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+    subborrowxU64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
     var x5: u64 = undefined;
     var x6: u1 = undefined;
-    p256SubborrowxU64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+    subborrowxU64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
     var x7: u64 = undefined;
     var x8: u1 = undefined;
-    p256SubborrowxU64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+    subborrowxU64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
     var x9: u64 = undefined;
-    p256CmovznzU64(&x9, x8, @intCast(u64, 0x0), 0xffffffffffffffff);
+    cmovznzU64(&x9, x8, cast(u64, 0x0), 0xffffffffffffffff);
     var x10: u64 = undefined;
     var x11: u1 = undefined;
-    p256AddcarryxU64(&x10, &x11, 0x0, x1, x9);
+    addcarryxU64(&x10, &x11, 0x0, x1, x9);
     var x12: u64 = undefined;
     var x13: u1 = undefined;
-    p256AddcarryxU64(&x12, &x13, x11, x3, (x9 & 0xffffffff));
+    addcarryxU64(&x12, &x13, x11, x3, (x9 & 0xffffffff));
     var x14: u64 = undefined;
     var x15: u1 = undefined;
-    p256AddcarryxU64(&x14, &x15, x13, x5, @intCast(u64, 0x0));
+    addcarryxU64(&x14, &x15, x13, x5, cast(u64, 0x0));
     var x16: u64 = undefined;
     var x17: u1 = undefined;
-    p256AddcarryxU64(&x16, &x17, x15, x7, (x9 & 0xffffffff00000001));
+    addcarryxU64(&x16, &x17, x15, x7, (x9 & 0xffffffff00000001));
     out1[0] = x10;
     out1[1] = x12;
     out1[2] = x14;
     out1[3] = x16;
 }
 
-/// The function p256Opp negates a field element in the Montgomery domain.
+/// The function opp negates a field element in the Montgomery domain.
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
@@ -793,40 +809,42 @@ pub fn p256Sub(out1: *Limbs, arg1: Limbs, arg2: Limbs) void {
 ///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256Opp(out1: *Limbs, arg1: Limbs) void {
+pub fn opp(out1: *[4]u64, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
     var x1: u64 = undefined;
     var x2: u1 = undefined;
-    p256SubborrowxU64(&x1, &x2, 0x0, @intCast(u64, 0x0), (arg1[0]));
+    subborrowxU64(&x1, &x2, 0x0, cast(u64, 0x0), (arg1[0]));
     var x3: u64 = undefined;
     var x4: u1 = undefined;
-    p256SubborrowxU64(&x3, &x4, x2, @intCast(u64, 0x0), (arg1[1]));
+    subborrowxU64(&x3, &x4, x2, cast(u64, 0x0), (arg1[1]));
     var x5: u64 = undefined;
     var x6: u1 = undefined;
-    p256SubborrowxU64(&x5, &x6, x4, @intCast(u64, 0x0), (arg1[2]));
+    subborrowxU64(&x5, &x6, x4, cast(u64, 0x0), (arg1[2]));
     var x7: u64 = undefined;
     var x8: u1 = undefined;
-    p256SubborrowxU64(&x7, &x8, x6, @intCast(u64, 0x0), (arg1[3]));
+    subborrowxU64(&x7, &x8, x6, cast(u64, 0x0), (arg1[3]));
     var x9: u64 = undefined;
-    p256CmovznzU64(&x9, x8, @intCast(u64, 0x0), 0xffffffffffffffff);
+    cmovznzU64(&x9, x8, cast(u64, 0x0), 0xffffffffffffffff);
     var x10: u64 = undefined;
     var x11: u1 = undefined;
-    p256AddcarryxU64(&x10, &x11, 0x0, x1, x9);
+    addcarryxU64(&x10, &x11, 0x0, x1, x9);
     var x12: u64 = undefined;
     var x13: u1 = undefined;
-    p256AddcarryxU64(&x12, &x13, x11, x3, (x9 & 0xffffffff));
+    addcarryxU64(&x12, &x13, x11, x3, (x9 & 0xffffffff));
     var x14: u64 = undefined;
     var x15: u1 = undefined;
-    p256AddcarryxU64(&x14, &x15, x13, x5, @intCast(u64, 0x0));
+    addcarryxU64(&x14, &x15, x13, x5, cast(u64, 0x0));
     var x16: u64 = undefined;
     var x17: u1 = undefined;
-    p256AddcarryxU64(&x16, &x17, x15, x7, (x9 & 0xffffffff00000001));
+    addcarryxU64(&x16, &x17, x15, x7, (x9 & 0xffffffff00000001));
     out1[0] = x10;
     out1[1] = x12;
     out1[2] = x14;
     out1[3] = x16;
 }
 
-/// The function p256FromMontgomery translates a field element out of the Montgomery domain.
+/// The function fromMontgomery translates a field element out of the Montgomery domain.
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
@@ -837,150 +855,152 @@ pub fn p256Opp(out1: *Limbs, arg1: Limbs) void {
 ///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256FromMontgomery(out1: *Limbs, arg1: Limbs) void {
-    const x1: u64 = (arg1[0]);
+pub fn fromMontgomery(out1: *[4]u64, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (arg1[0]);
     var x2: u64 = undefined;
     var x3: u64 = undefined;
-    p256MulxU64(&x2, &x3, x1, 0xffffffff00000001);
+    mulxU64(&x2, &x3, x1, 0xffffffff00000001);
     var x4: u64 = undefined;
     var x5: u64 = undefined;
-    p256MulxU64(&x4, &x5, x1, 0xffffffff);
+    mulxU64(&x4, &x5, x1, 0xffffffff);
     var x6: u64 = undefined;
     var x7: u64 = undefined;
-    p256MulxU64(&x6, &x7, x1, 0xffffffffffffffff);
+    mulxU64(&x6, &x7, x1, 0xffffffffffffffff);
     var x8: u64 = undefined;
     var x9: u1 = undefined;
-    p256AddcarryxU64(&x8, &x9, 0x0, x7, x4);
+    addcarryxU64(&x8, &x9, 0x0, x7, x4);
     var x10: u64 = undefined;
     var x11: u1 = undefined;
-    p256AddcarryxU64(&x10, &x11, 0x0, x1, x6);
+    addcarryxU64(&x10, &x11, 0x0, x1, x6);
     var x12: u64 = undefined;
     var x13: u1 = undefined;
-    p256AddcarryxU64(&x12, &x13, x11, @intCast(u64, 0x0), x8);
+    addcarryxU64(&x12, &x13, x11, cast(u64, 0x0), x8);
     var x14: u64 = undefined;
     var x15: u1 = undefined;
-    p256AddcarryxU64(&x14, &x15, 0x0, x12, (arg1[1]));
+    addcarryxU64(&x14, &x15, 0x0, x12, (arg1[1]));
     var x16: u64 = undefined;
     var x17: u64 = undefined;
-    p256MulxU64(&x16, &x17, x14, 0xffffffff00000001);
+    mulxU64(&x16, &x17, x14, 0xffffffff00000001);
     var x18: u64 = undefined;
     var x19: u64 = undefined;
-    p256MulxU64(&x18, &x19, x14, 0xffffffff);
+    mulxU64(&x18, &x19, x14, 0xffffffff);
     var x20: u64 = undefined;
     var x21: u64 = undefined;
-    p256MulxU64(&x20, &x21, x14, 0xffffffffffffffff);
+    mulxU64(&x20, &x21, x14, 0xffffffffffffffff);
     var x22: u64 = undefined;
     var x23: u1 = undefined;
-    p256AddcarryxU64(&x22, &x23, 0x0, x21, x18);
+    addcarryxU64(&x22, &x23, 0x0, x21, x18);
     var x24: u64 = undefined;
     var x25: u1 = undefined;
-    p256AddcarryxU64(&x24, &x25, 0x0, x14, x20);
+    addcarryxU64(&x24, &x25, 0x0, x14, x20);
     var x26: u64 = undefined;
     var x27: u1 = undefined;
-    p256AddcarryxU64(&x26, &x27, x25, (@intCast(u64, x15) + (@intCast(u64, x13) + (@intCast(u64, x9) + x5))), x22);
+    addcarryxU64(&x26, &x27, x25, (cast(u64, x15) + (cast(u64, x13) + (cast(u64, x9) + x5))), x22);
     var x28: u64 = undefined;
     var x29: u1 = undefined;
-    p256AddcarryxU64(&x28, &x29, x27, x2, (@intCast(u64, x23) + x19));
+    addcarryxU64(&x28, &x29, x27, x2, (cast(u64, x23) + x19));
     var x30: u64 = undefined;
     var x31: u1 = undefined;
-    p256AddcarryxU64(&x30, &x31, x29, x3, x16);
+    addcarryxU64(&x30, &x31, x29, x3, x16);
     var x32: u64 = undefined;
     var x33: u1 = undefined;
-    p256AddcarryxU64(&x32, &x33, 0x0, x26, (arg1[2]));
+    addcarryxU64(&x32, &x33, 0x0, x26, (arg1[2]));
     var x34: u64 = undefined;
     var x35: u1 = undefined;
-    p256AddcarryxU64(&x34, &x35, x33, x28, @intCast(u64, 0x0));
+    addcarryxU64(&x34, &x35, x33, x28, cast(u64, 0x0));
     var x36: u64 = undefined;
     var x37: u1 = undefined;
-    p256AddcarryxU64(&x36, &x37, x35, x30, @intCast(u64, 0x0));
+    addcarryxU64(&x36, &x37, x35, x30, cast(u64, 0x0));
     var x38: u64 = undefined;
     var x39: u64 = undefined;
-    p256MulxU64(&x38, &x39, x32, 0xffffffff00000001);
+    mulxU64(&x38, &x39, x32, 0xffffffff00000001);
     var x40: u64 = undefined;
     var x41: u64 = undefined;
-    p256MulxU64(&x40, &x41, x32, 0xffffffff);
+    mulxU64(&x40, &x41, x32, 0xffffffff);
     var x42: u64 = undefined;
     var x43: u64 = undefined;
-    p256MulxU64(&x42, &x43, x32, 0xffffffffffffffff);
+    mulxU64(&x42, &x43, x32, 0xffffffffffffffff);
     var x44: u64 = undefined;
     var x45: u1 = undefined;
-    p256AddcarryxU64(&x44, &x45, 0x0, x43, x40);
+    addcarryxU64(&x44, &x45, 0x0, x43, x40);
     var x46: u64 = undefined;
     var x47: u1 = undefined;
-    p256AddcarryxU64(&x46, &x47, 0x0, x32, x42);
+    addcarryxU64(&x46, &x47, 0x0, x32, x42);
     var x48: u64 = undefined;
     var x49: u1 = undefined;
-    p256AddcarryxU64(&x48, &x49, x47, x34, x44);
+    addcarryxU64(&x48, &x49, x47, x34, x44);
     var x50: u64 = undefined;
     var x51: u1 = undefined;
-    p256AddcarryxU64(&x50, &x51, x49, x36, (@intCast(u64, x45) + x41));
+    addcarryxU64(&x50, &x51, x49, x36, (cast(u64, x45) + x41));
     var x52: u64 = undefined;
     var x53: u1 = undefined;
-    p256AddcarryxU64(&x52, &x53, x51, (@intCast(u64, x37) + (@intCast(u64, x31) + x17)), x38);
+    addcarryxU64(&x52, &x53, x51, (cast(u64, x37) + (cast(u64, x31) + x17)), x38);
     var x54: u64 = undefined;
     var x55: u1 = undefined;
-    p256AddcarryxU64(&x54, &x55, 0x0, x48, (arg1[3]));
+    addcarryxU64(&x54, &x55, 0x0, x48, (arg1[3]));
     var x56: u64 = undefined;
     var x57: u1 = undefined;
-    p256AddcarryxU64(&x56, &x57, x55, x50, @intCast(u64, 0x0));
+    addcarryxU64(&x56, &x57, x55, x50, cast(u64, 0x0));
     var x58: u64 = undefined;
     var x59: u1 = undefined;
-    p256AddcarryxU64(&x58, &x59, x57, x52, @intCast(u64, 0x0));
+    addcarryxU64(&x58, &x59, x57, x52, cast(u64, 0x0));
     var x60: u64 = undefined;
     var x61: u64 = undefined;
-    p256MulxU64(&x60, &x61, x54, 0xffffffff00000001);
+    mulxU64(&x60, &x61, x54, 0xffffffff00000001);
     var x62: u64 = undefined;
     var x63: u64 = undefined;
-    p256MulxU64(&x62, &x63, x54, 0xffffffff);
+    mulxU64(&x62, &x63, x54, 0xffffffff);
     var x64: u64 = undefined;
     var x65: u64 = undefined;
-    p256MulxU64(&x64, &x65, x54, 0xffffffffffffffff);
+    mulxU64(&x64, &x65, x54, 0xffffffffffffffff);
     var x66: u64 = undefined;
     var x67: u1 = undefined;
-    p256AddcarryxU64(&x66, &x67, 0x0, x65, x62);
+    addcarryxU64(&x66, &x67, 0x0, x65, x62);
     var x68: u64 = undefined;
     var x69: u1 = undefined;
-    p256AddcarryxU64(&x68, &x69, 0x0, x54, x64);
+    addcarryxU64(&x68, &x69, 0x0, x54, x64);
     var x70: u64 = undefined;
     var x71: u1 = undefined;
-    p256AddcarryxU64(&x70, &x71, x69, x56, x66);
+    addcarryxU64(&x70, &x71, x69, x56, x66);
     var x72: u64 = undefined;
     var x73: u1 = undefined;
-    p256AddcarryxU64(&x72, &x73, x71, x58, (@intCast(u64, x67) + x63));
+    addcarryxU64(&x72, &x73, x71, x58, (cast(u64, x67) + x63));
     var x74: u64 = undefined;
     var x75: u1 = undefined;
-    p256AddcarryxU64(&x74, &x75, x73, (@intCast(u64, x59) + (@intCast(u64, x53) + x39)), x60);
-    const x76: u64 = (@intCast(u64, x75) + x61);
+    addcarryxU64(&x74, &x75, x73, (cast(u64, x59) + (cast(u64, x53) + x39)), x60);
+    const x76 = (cast(u64, x75) + x61);
     var x77: u64 = undefined;
     var x78: u1 = undefined;
-    p256SubborrowxU64(&x77, &x78, 0x0, x70, 0xffffffffffffffff);
+    subborrowxU64(&x77, &x78, 0x0, x70, 0xffffffffffffffff);
     var x79: u64 = undefined;
     var x80: u1 = undefined;
-    p256SubborrowxU64(&x79, &x80, x78, x72, 0xffffffff);
+    subborrowxU64(&x79, &x80, x78, x72, 0xffffffff);
     var x81: u64 = undefined;
     var x82: u1 = undefined;
-    p256SubborrowxU64(&x81, &x82, x80, x74, @intCast(u64, 0x0));
+    subborrowxU64(&x81, &x82, x80, x74, cast(u64, 0x0));
     var x83: u64 = undefined;
     var x84: u1 = undefined;
-    p256SubborrowxU64(&x83, &x84, x82, x76, 0xffffffff00000001);
+    subborrowxU64(&x83, &x84, x82, x76, 0xffffffff00000001);
     var x85: u64 = undefined;
     var x86: u1 = undefined;
-    p256SubborrowxU64(&x85, &x86, x84, @intCast(u64, 0x0), @intCast(u64, 0x0));
+    subborrowxU64(&x85, &x86, x84, cast(u64, 0x0), cast(u64, 0x0));
     var x87: u64 = undefined;
-    p256CmovznzU64(&x87, x86, x77, x70);
+    cmovznzU64(&x87, x86, x77, x70);
     var x88: u64 = undefined;
-    p256CmovznzU64(&x88, x86, x79, x72);
+    cmovznzU64(&x88, x86, x79, x72);
     var x89: u64 = undefined;
-    p256CmovznzU64(&x89, x86, x81, x74);
+    cmovznzU64(&x89, x86, x81, x74);
     var x90: u64 = undefined;
-    p256CmovznzU64(&x90, x86, x83, x76);
+    cmovznzU64(&x90, x86, x83, x76);
     out1[0] = x87;
     out1[1] = x88;
     out1[2] = x89;
     out1[3] = x90;
 }
 
-/// The function p256ToMontgomery translates a field element into the Montgomery domain.
+/// The function toMontgomery translates a field element into the Montgomery domain.
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
@@ -991,269 +1011,271 @@ pub fn p256FromMontgomery(out1: *Limbs, arg1: Limbs) void {
 ///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256ToMontgomery(out1: *Limbs, arg1: Limbs) void {
-    const x1: u64 = (arg1[1]);
-    const x2: u64 = (arg1[2]);
-    const x3: u64 = (arg1[3]);
-    const x4: u64 = (arg1[0]);
+pub fn toMontgomery(out1: *[4]u64, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (arg1[1]);
+    const x2 = (arg1[2]);
+    const x3 = (arg1[3]);
+    const x4 = (arg1[0]);
     var x5: u64 = undefined;
     var x6: u64 = undefined;
-    p256MulxU64(&x5, &x6, x4, 0x4fffffffd);
+    mulxU64(&x5, &x6, x4, 0x4fffffffd);
     var x7: u64 = undefined;
     var x8: u64 = undefined;
-    p256MulxU64(&x7, &x8, x4, 0xfffffffffffffffe);
+    mulxU64(&x7, &x8, x4, 0xfffffffffffffffe);
     var x9: u64 = undefined;
     var x10: u64 = undefined;
-    p256MulxU64(&x9, &x10, x4, 0xfffffffbffffffff);
+    mulxU64(&x9, &x10, x4, 0xfffffffbffffffff);
     var x11: u64 = undefined;
     var x12: u64 = undefined;
-    p256MulxU64(&x11, &x12, x4, 0x3);
+    mulxU64(&x11, &x12, x4, 0x3);
     var x13: u64 = undefined;
     var x14: u1 = undefined;
-    p256AddcarryxU64(&x13, &x14, 0x0, x12, x9);
+    addcarryxU64(&x13, &x14, 0x0, x12, x9);
     var x15: u64 = undefined;
     var x16: u1 = undefined;
-    p256AddcarryxU64(&x15, &x16, x14, x10, x7);
+    addcarryxU64(&x15, &x16, x14, x10, x7);
     var x17: u64 = undefined;
     var x18: u1 = undefined;
-    p256AddcarryxU64(&x17, &x18, x16, x8, x5);
+    addcarryxU64(&x17, &x18, x16, x8, x5);
     var x19: u64 = undefined;
     var x20: u64 = undefined;
-    p256MulxU64(&x19, &x20, x11, 0xffffffff00000001);
+    mulxU64(&x19, &x20, x11, 0xffffffff00000001);
     var x21: u64 = undefined;
     var x22: u64 = undefined;
-    p256MulxU64(&x21, &x22, x11, 0xffffffff);
+    mulxU64(&x21, &x22, x11, 0xffffffff);
     var x23: u64 = undefined;
     var x24: u64 = undefined;
-    p256MulxU64(&x23, &x24, x11, 0xffffffffffffffff);
+    mulxU64(&x23, &x24, x11, 0xffffffffffffffff);
     var x25: u64 = undefined;
     var x26: u1 = undefined;
-    p256AddcarryxU64(&x25, &x26, 0x0, x24, x21);
+    addcarryxU64(&x25, &x26, 0x0, x24, x21);
     var x27: u64 = undefined;
     var x28: u1 = undefined;
-    p256AddcarryxU64(&x27, &x28, 0x0, x11, x23);
+    addcarryxU64(&x27, &x28, 0x0, x11, x23);
     var x29: u64 = undefined;
     var x30: u1 = undefined;
-    p256AddcarryxU64(&x29, &x30, x28, x13, x25);
+    addcarryxU64(&x29, &x30, x28, x13, x25);
     var x31: u64 = undefined;
     var x32: u1 = undefined;
-    p256AddcarryxU64(&x31, &x32, x30, x15, (@intCast(u64, x26) + x22));
+    addcarryxU64(&x31, &x32, x30, x15, (cast(u64, x26) + x22));
     var x33: u64 = undefined;
     var x34: u1 = undefined;
-    p256AddcarryxU64(&x33, &x34, x32, x17, x19);
+    addcarryxU64(&x33, &x34, x32, x17, x19);
     var x35: u64 = undefined;
     var x36: u1 = undefined;
-    p256AddcarryxU64(&x35, &x36, x34, (@intCast(u64, x18) + x6), x20);
+    addcarryxU64(&x35, &x36, x34, (cast(u64, x18) + x6), x20);
     var x37: u64 = undefined;
     var x38: u64 = undefined;
-    p256MulxU64(&x37, &x38, x1, 0x4fffffffd);
+    mulxU64(&x37, &x38, x1, 0x4fffffffd);
     var x39: u64 = undefined;
     var x40: u64 = undefined;
-    p256MulxU64(&x39, &x40, x1, 0xfffffffffffffffe);
+    mulxU64(&x39, &x40, x1, 0xfffffffffffffffe);
     var x41: u64 = undefined;
     var x42: u64 = undefined;
-    p256MulxU64(&x41, &x42, x1, 0xfffffffbffffffff);
+    mulxU64(&x41, &x42, x1, 0xfffffffbffffffff);
     var x43: u64 = undefined;
     var x44: u64 = undefined;
-    p256MulxU64(&x43, &x44, x1, 0x3);
+    mulxU64(&x43, &x44, x1, 0x3);
     var x45: u64 = undefined;
     var x46: u1 = undefined;
-    p256AddcarryxU64(&x45, &x46, 0x0, x44, x41);
+    addcarryxU64(&x45, &x46, 0x0, x44, x41);
     var x47: u64 = undefined;
     var x48: u1 = undefined;
-    p256AddcarryxU64(&x47, &x48, x46, x42, x39);
+    addcarryxU64(&x47, &x48, x46, x42, x39);
     var x49: u64 = undefined;
     var x50: u1 = undefined;
-    p256AddcarryxU64(&x49, &x50, x48, x40, x37);
+    addcarryxU64(&x49, &x50, x48, x40, x37);
     var x51: u64 = undefined;
     var x52: u1 = undefined;
-    p256AddcarryxU64(&x51, &x52, 0x0, x29, x43);
+    addcarryxU64(&x51, &x52, 0x0, x29, x43);
     var x53: u64 = undefined;
     var x54: u1 = undefined;
-    p256AddcarryxU64(&x53, &x54, x52, x31, x45);
+    addcarryxU64(&x53, &x54, x52, x31, x45);
     var x55: u64 = undefined;
     var x56: u1 = undefined;
-    p256AddcarryxU64(&x55, &x56, x54, x33, x47);
+    addcarryxU64(&x55, &x56, x54, x33, x47);
     var x57: u64 = undefined;
     var x58: u1 = undefined;
-    p256AddcarryxU64(&x57, &x58, x56, x35, x49);
+    addcarryxU64(&x57, &x58, x56, x35, x49);
     var x59: u64 = undefined;
     var x60: u64 = undefined;
-    p256MulxU64(&x59, &x60, x51, 0xffffffff00000001);
+    mulxU64(&x59, &x60, x51, 0xffffffff00000001);
     var x61: u64 = undefined;
     var x62: u64 = undefined;
-    p256MulxU64(&x61, &x62, x51, 0xffffffff);
+    mulxU64(&x61, &x62, x51, 0xffffffff);
     var x63: u64 = undefined;
     var x64: u64 = undefined;
-    p256MulxU64(&x63, &x64, x51, 0xffffffffffffffff);
+    mulxU64(&x63, &x64, x51, 0xffffffffffffffff);
     var x65: u64 = undefined;
     var x66: u1 = undefined;
-    p256AddcarryxU64(&x65, &x66, 0x0, x64, x61);
+    addcarryxU64(&x65, &x66, 0x0, x64, x61);
     var x67: u64 = undefined;
     var x68: u1 = undefined;
-    p256AddcarryxU64(&x67, &x68, 0x0, x51, x63);
+    addcarryxU64(&x67, &x68, 0x0, x51, x63);
     var x69: u64 = undefined;
     var x70: u1 = undefined;
-    p256AddcarryxU64(&x69, &x70, x68, x53, x65);
+    addcarryxU64(&x69, &x70, x68, x53, x65);
     var x71: u64 = undefined;
     var x72: u1 = undefined;
-    p256AddcarryxU64(&x71, &x72, x70, x55, (@intCast(u64, x66) + x62));
+    addcarryxU64(&x71, &x72, x70, x55, (cast(u64, x66) + x62));
     var x73: u64 = undefined;
     var x74: u1 = undefined;
-    p256AddcarryxU64(&x73, &x74, x72, x57, x59);
+    addcarryxU64(&x73, &x74, x72, x57, x59);
     var x75: u64 = undefined;
     var x76: u1 = undefined;
-    p256AddcarryxU64(&x75, &x76, x74, ((@intCast(u64, x58) + @intCast(u64, x36)) + (@intCast(u64, x50) + x38)), x60);
+    addcarryxU64(&x75, &x76, x74, ((cast(u64, x58) + cast(u64, x36)) + (cast(u64, x50) + x38)), x60);
     var x77: u64 = undefined;
     var x78: u64 = undefined;
-    p256MulxU64(&x77, &x78, x2, 0x4fffffffd);
+    mulxU64(&x77, &x78, x2, 0x4fffffffd);
     var x79: u64 = undefined;
     var x80: u64 = undefined;
-    p256MulxU64(&x79, &x80, x2, 0xfffffffffffffffe);
+    mulxU64(&x79, &x80, x2, 0xfffffffffffffffe);
     var x81: u64 = undefined;
     var x82: u64 = undefined;
-    p256MulxU64(&x81, &x82, x2, 0xfffffffbffffffff);
+    mulxU64(&x81, &x82, x2, 0xfffffffbffffffff);
     var x83: u64 = undefined;
     var x84: u64 = undefined;
-    p256MulxU64(&x83, &x84, x2, 0x3);
+    mulxU64(&x83, &x84, x2, 0x3);
     var x85: u64 = undefined;
     var x86: u1 = undefined;
-    p256AddcarryxU64(&x85, &x86, 0x0, x84, x81);
+    addcarryxU64(&x85, &x86, 0x0, x84, x81);
     var x87: u64 = undefined;
     var x88: u1 = undefined;
-    p256AddcarryxU64(&x87, &x88, x86, x82, x79);
+    addcarryxU64(&x87, &x88, x86, x82, x79);
     var x89: u64 = undefined;
     var x90: u1 = undefined;
-    p256AddcarryxU64(&x89, &x90, x88, x80, x77);
+    addcarryxU64(&x89, &x90, x88, x80, x77);
     var x91: u64 = undefined;
     var x92: u1 = undefined;
-    p256AddcarryxU64(&x91, &x92, 0x0, x69, x83);
+    addcarryxU64(&x91, &x92, 0x0, x69, x83);
     var x93: u64 = undefined;
     var x94: u1 = undefined;
-    p256AddcarryxU64(&x93, &x94, x92, x71, x85);
+    addcarryxU64(&x93, &x94, x92, x71, x85);
     var x95: u64 = undefined;
     var x96: u1 = undefined;
-    p256AddcarryxU64(&x95, &x96, x94, x73, x87);
+    addcarryxU64(&x95, &x96, x94, x73, x87);
     var x97: u64 = undefined;
     var x98: u1 = undefined;
-    p256AddcarryxU64(&x97, &x98, x96, x75, x89);
+    addcarryxU64(&x97, &x98, x96, x75, x89);
     var x99: u64 = undefined;
     var x100: u64 = undefined;
-    p256MulxU64(&x99, &x100, x91, 0xffffffff00000001);
+    mulxU64(&x99, &x100, x91, 0xffffffff00000001);
     var x101: u64 = undefined;
     var x102: u64 = undefined;
-    p256MulxU64(&x101, &x102, x91, 0xffffffff);
+    mulxU64(&x101, &x102, x91, 0xffffffff);
     var x103: u64 = undefined;
     var x104: u64 = undefined;
-    p256MulxU64(&x103, &x104, x91, 0xffffffffffffffff);
+    mulxU64(&x103, &x104, x91, 0xffffffffffffffff);
     var x105: u64 = undefined;
     var x106: u1 = undefined;
-    p256AddcarryxU64(&x105, &x106, 0x0, x104, x101);
+    addcarryxU64(&x105, &x106, 0x0, x104, x101);
     var x107: u64 = undefined;
     var x108: u1 = undefined;
-    p256AddcarryxU64(&x107, &x108, 0x0, x91, x103);
+    addcarryxU64(&x107, &x108, 0x0, x91, x103);
     var x109: u64 = undefined;
     var x110: u1 = undefined;
-    p256AddcarryxU64(&x109, &x110, x108, x93, x105);
+    addcarryxU64(&x109, &x110, x108, x93, x105);
     var x111: u64 = undefined;
     var x112: u1 = undefined;
-    p256AddcarryxU64(&x111, &x112, x110, x95, (@intCast(u64, x106) + x102));
+    addcarryxU64(&x111, &x112, x110, x95, (cast(u64, x106) + x102));
     var x113: u64 = undefined;
     var x114: u1 = undefined;
-    p256AddcarryxU64(&x113, &x114, x112, x97, x99);
+    addcarryxU64(&x113, &x114, x112, x97, x99);
     var x115: u64 = undefined;
     var x116: u1 = undefined;
-    p256AddcarryxU64(&x115, &x116, x114, ((@intCast(u64, x98) + @intCast(u64, x76)) + (@intCast(u64, x90) + x78)), x100);
+    addcarryxU64(&x115, &x116, x114, ((cast(u64, x98) + cast(u64, x76)) + (cast(u64, x90) + x78)), x100);
     var x117: u64 = undefined;
     var x118: u64 = undefined;
-    p256MulxU64(&x117, &x118, x3, 0x4fffffffd);
+    mulxU64(&x117, &x118, x3, 0x4fffffffd);
     var x119: u64 = undefined;
     var x120: u64 = undefined;
-    p256MulxU64(&x119, &x120, x3, 0xfffffffffffffffe);
+    mulxU64(&x119, &x120, x3, 0xfffffffffffffffe);
     var x121: u64 = undefined;
     var x122: u64 = undefined;
-    p256MulxU64(&x121, &x122, x3, 0xfffffffbffffffff);
+    mulxU64(&x121, &x122, x3, 0xfffffffbffffffff);
     var x123: u64 = undefined;
     var x124: u64 = undefined;
-    p256MulxU64(&x123, &x124, x3, 0x3);
+    mulxU64(&x123, &x124, x3, 0x3);
     var x125: u64 = undefined;
     var x126: u1 = undefined;
-    p256AddcarryxU64(&x125, &x126, 0x0, x124, x121);
+    addcarryxU64(&x125, &x126, 0x0, x124, x121);
     var x127: u64 = undefined;
     var x128: u1 = undefined;
-    p256AddcarryxU64(&x127, &x128, x126, x122, x119);
+    addcarryxU64(&x127, &x128, x126, x122, x119);
     var x129: u64 = undefined;
     var x130: u1 = undefined;
-    p256AddcarryxU64(&x129, &x130, x128, x120, x117);
+    addcarryxU64(&x129, &x130, x128, x120, x117);
     var x131: u64 = undefined;
     var x132: u1 = undefined;
-    p256AddcarryxU64(&x131, &x132, 0x0, x109, x123);
+    addcarryxU64(&x131, &x132, 0x0, x109, x123);
     var x133: u64 = undefined;
     var x134: u1 = undefined;
-    p256AddcarryxU64(&x133, &x134, x132, x111, x125);
+    addcarryxU64(&x133, &x134, x132, x111, x125);
     var x135: u64 = undefined;
     var x136: u1 = undefined;
-    p256AddcarryxU64(&x135, &x136, x134, x113, x127);
+    addcarryxU64(&x135, &x136, x134, x113, x127);
     var x137: u64 = undefined;
     var x138: u1 = undefined;
-    p256AddcarryxU64(&x137, &x138, x136, x115, x129);
+    addcarryxU64(&x137, &x138, x136, x115, x129);
     var x139: u64 = undefined;
     var x140: u64 = undefined;
-    p256MulxU64(&x139, &x140, x131, 0xffffffff00000001);
+    mulxU64(&x139, &x140, x131, 0xffffffff00000001);
     var x141: u64 = undefined;
     var x142: u64 = undefined;
-    p256MulxU64(&x141, &x142, x131, 0xffffffff);
+    mulxU64(&x141, &x142, x131, 0xffffffff);
     var x143: u64 = undefined;
     var x144: u64 = undefined;
-    p256MulxU64(&x143, &x144, x131, 0xffffffffffffffff);
+    mulxU64(&x143, &x144, x131, 0xffffffffffffffff);
     var x145: u64 = undefined;
     var x146: u1 = undefined;
-    p256AddcarryxU64(&x145, &x146, 0x0, x144, x141);
+    addcarryxU64(&x145, &x146, 0x0, x144, x141);
     var x147: u64 = undefined;
     var x148: u1 = undefined;
-    p256AddcarryxU64(&x147, &x148, 0x0, x131, x143);
+    addcarryxU64(&x147, &x148, 0x0, x131, x143);
     var x149: u64 = undefined;
     var x150: u1 = undefined;
-    p256AddcarryxU64(&x149, &x150, x148, x133, x145);
+    addcarryxU64(&x149, &x150, x148, x133, x145);
     var x151: u64 = undefined;
     var x152: u1 = undefined;
-    p256AddcarryxU64(&x151, &x152, x150, x135, (@intCast(u64, x146) + x142));
+    addcarryxU64(&x151, &x152, x150, x135, (cast(u64, x146) + x142));
     var x153: u64 = undefined;
     var x154: u1 = undefined;
-    p256AddcarryxU64(&x153, &x154, x152, x137, x139);
+    addcarryxU64(&x153, &x154, x152, x137, x139);
     var x155: u64 = undefined;
     var x156: u1 = undefined;
-    p256AddcarryxU64(&x155, &x156, x154, ((@intCast(u64, x138) + @intCast(u64, x116)) + (@intCast(u64, x130) + x118)), x140);
+    addcarryxU64(&x155, &x156, x154, ((cast(u64, x138) + cast(u64, x116)) + (cast(u64, x130) + x118)), x140);
     var x157: u64 = undefined;
     var x158: u1 = undefined;
-    p256SubborrowxU64(&x157, &x158, 0x0, x149, 0xffffffffffffffff);
+    subborrowxU64(&x157, &x158, 0x0, x149, 0xffffffffffffffff);
     var x159: u64 = undefined;
     var x160: u1 = undefined;
-    p256SubborrowxU64(&x159, &x160, x158, x151, 0xffffffff);
+    subborrowxU64(&x159, &x160, x158, x151, 0xffffffff);
     var x161: u64 = undefined;
     var x162: u1 = undefined;
-    p256SubborrowxU64(&x161, &x162, x160, x153, @intCast(u64, 0x0));
+    subborrowxU64(&x161, &x162, x160, x153, cast(u64, 0x0));
     var x163: u64 = undefined;
     var x164: u1 = undefined;
-    p256SubborrowxU64(&x163, &x164, x162, x155, 0xffffffff00000001);
+    subborrowxU64(&x163, &x164, x162, x155, 0xffffffff00000001);
     var x165: u64 = undefined;
     var x166: u1 = undefined;
-    p256SubborrowxU64(&x165, &x166, x164, @intCast(u64, x156), @intCast(u64, 0x0));
+    subborrowxU64(&x165, &x166, x164, cast(u64, x156), cast(u64, 0x0));
     var x167: u64 = undefined;
-    p256CmovznzU64(&x167, x166, x157, x149);
+    cmovznzU64(&x167, x166, x157, x149);
     var x168: u64 = undefined;
-    p256CmovznzU64(&x168, x166, x159, x151);
+    cmovznzU64(&x168, x166, x159, x151);
     var x169: u64 = undefined;
-    p256CmovznzU64(&x169, x166, x161, x153);
+    cmovznzU64(&x169, x166, x161, x153);
     var x170: u64 = undefined;
-    p256CmovznzU64(&x170, x166, x163, x155);
+    cmovznzU64(&x170, x166, x163, x155);
     out1[0] = x167;
     out1[1] = x168;
     out1[2] = x169;
     out1[3] = x170;
 }
 
-/// The function p256Nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
+/// The function nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
@@ -1263,12 +1285,14 @@ pub fn p256ToMontgomery(out1: *Limbs, arg1: Limbs) void {
 ///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 /// Output Bounds:
 ///   out1: [0x0 ~> 0xffffffffffffffff]
-pub fn p256Nonzero(out1: *u64, arg1: Limbs) void {
-    const x1: u64 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | (arg1[3]))));
+pub fn nonzero(out1: *u64, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | (arg1[3]))));
     out1.* = x1;
 }
 
-/// The function p256Selectznz is a multi-limb conditional select.
+/// The function selectznz is a multi-limb conditional select.
 /// Postconditions:
 ///   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
 ///
@@ -1278,22 +1302,24 @@ pub fn p256Nonzero(out1: *u64, arg1: Limbs) void {
 ///   arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256Selectznz(out1: *Limbs, arg1: u1, arg2: Limbs, arg3: Limbs) void {
+pub fn selectznz(out1: *[4]u64, arg1: u1, arg2: [4]u64, arg3: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
     var x1: u64 = undefined;
-    p256CmovznzU64(&x1, arg1, (arg2[0]), (arg3[0]));
+    cmovznzU64(&x1, arg1, (arg2[0]), (arg3[0]));
     var x2: u64 = undefined;
-    p256CmovznzU64(&x2, arg1, (arg2[1]), (arg3[1]));
+    cmovznzU64(&x2, arg1, (arg2[1]), (arg3[1]));
     var x3: u64 = undefined;
-    p256CmovznzU64(&x3, arg1, (arg2[2]), (arg3[2]));
+    cmovznzU64(&x3, arg1, (arg2[2]), (arg3[2]));
     var x4: u64 = undefined;
-    p256CmovznzU64(&x4, arg1, (arg2[3]), (arg3[3]));
+    cmovznzU64(&x4, arg1, (arg2[3]), (arg3[3]));
     out1[0] = x1;
     out1[1] = x2;
     out1[2] = x3;
     out1[3] = x4;
 }
 
-/// The function p256ToBytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
+/// The function toBytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
 /// Preconditions:
 ///   0 ≤ eval arg1 < m
 /// Postconditions:
@@ -1303,67 +1329,69 @@ pub fn p256Selectznz(out1: *Limbs, arg1: u1, arg2: Limbs, arg3: Limbs) void {
 ///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
-pub fn p256ToBytes(out1: *[32]u8, arg1: Limbs) void {
-    const x1: u64 = (arg1[3]);
-    const x2: u64 = (arg1[2]);
-    const x3: u64 = (arg1[1]);
-    const x4: u64 = (arg1[0]);
-    const x5: u8 = @intCast(u8, (x4 & @intCast(u64, 0xff)));
-    const x6: u64 = (x4 >> 8);
-    const x7: u8 = @intCast(u8, (x6 & @intCast(u64, 0xff)));
-    const x8: u64 = (x6 >> 8);
-    const x9: u8 = @intCast(u8, (x8 & @intCast(u64, 0xff)));
-    const x10: u64 = (x8 >> 8);
-    const x11: u8 = @intCast(u8, (x10 & @intCast(u64, 0xff)));
-    const x12: u64 = (x10 >> 8);
-    const x13: u8 = @intCast(u8, (x12 & @intCast(u64, 0xff)));
-    const x14: u64 = (x12 >> 8);
-    const x15: u8 = @intCast(u8, (x14 & @intCast(u64, 0xff)));
-    const x16: u64 = (x14 >> 8);
-    const x17: u8 = @intCast(u8, (x16 & @intCast(u64, 0xff)));
-    const x18: u8 = @intCast(u8, (x16 >> 8));
-    const x19: u8 = @intCast(u8, (x3 & @intCast(u64, 0xff)));
-    const x20: u64 = (x3 >> 8);
-    const x21: u8 = @intCast(u8, (x20 & @intCast(u64, 0xff)));
-    const x22: u64 = (x20 >> 8);
-    const x23: u8 = @intCast(u8, (x22 & @intCast(u64, 0xff)));
-    const x24: u64 = (x22 >> 8);
-    const x25: u8 = @intCast(u8, (x24 & @intCast(u64, 0xff)));
-    const x26: u64 = (x24 >> 8);
-    const x27: u8 = @intCast(u8, (x26 & @intCast(u64, 0xff)));
-    const x28: u64 = (x26 >> 8);
-    const x29: u8 = @intCast(u8, (x28 & @intCast(u64, 0xff)));
-    const x30: u64 = (x28 >> 8);
-    const x31: u8 = @intCast(u8, (x30 & @intCast(u64, 0xff)));
-    const x32: u8 = @intCast(u8, (x30 >> 8));
-    const x33: u8 = @intCast(u8, (x2 & @intCast(u64, 0xff)));
-    const x34: u64 = (x2 >> 8);
-    const x35: u8 = @intCast(u8, (x34 & @intCast(u64, 0xff)));
-    const x36: u64 = (x34 >> 8);
-    const x37: u8 = @intCast(u8, (x36 & @intCast(u64, 0xff)));
-    const x38: u64 = (x36 >> 8);
-    const x39: u8 = @intCast(u8, (x38 & @intCast(u64, 0xff)));
-    const x40: u64 = (x38 >> 8);
-    const x41: u8 = @intCast(u8, (x40 & @intCast(u64, 0xff)));
-    const x42: u64 = (x40 >> 8);
-    const x43: u8 = @intCast(u8, (x42 & @intCast(u64, 0xff)));
-    const x44: u64 = (x42 >> 8);
-    const x45: u8 = @intCast(u8, (x44 & @intCast(u64, 0xff)));
-    const x46: u8 = @intCast(u8, (x44 >> 8));
-    const x47: u8 = @intCast(u8, (x1 & @intCast(u64, 0xff)));
-    const x48: u64 = (x1 >> 8);
-    const x49: u8 = @intCast(u8, (x48 & @intCast(u64, 0xff)));
-    const x50: u64 = (x48 >> 8);
-    const x51: u8 = @intCast(u8, (x50 & @intCast(u64, 0xff)));
-    const x52: u64 = (x50 >> 8);
-    const x53: u8 = @intCast(u8, (x52 & @intCast(u64, 0xff)));
-    const x54: u64 = (x52 >> 8);
-    const x55: u8 = @intCast(u8, (x54 & @intCast(u64, 0xff)));
-    const x56: u64 = (x54 >> 8);
-    const x57: u8 = @intCast(u8, (x56 & @intCast(u64, 0xff)));
-    const x58: u64 = (x56 >> 8);
-    const x59: u8 = @intCast(u8, (x58 & @intCast(u64, 0xff)));
-    const x60: u8 = @intCast(u8, (x58 >> 8));
+pub fn toBytes(out1: *[32]u8, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (arg1[3]);
+    const x2 = (arg1[2]);
+    const x3 = (arg1[1]);
+    const x4 = (arg1[0]);
+    const x5 = cast(u8, (x4 & cast(u64, 0xff)));
+    const x6 = (x4 >> 8);
+    const x7 = cast(u8, (x6 & cast(u64, 0xff)));
+    const x8 = (x6 >> 8);
+    const x9 = cast(u8, (x8 & cast(u64, 0xff)));
+    const x10 = (x8 >> 8);
+    const x11 = cast(u8, (x10 & cast(u64, 0xff)));
+    const x12 = (x10 >> 8);
+    const x13 = cast(u8, (x12 & cast(u64, 0xff)));
+    const x14 = (x12 >> 8);
+    const x15 = cast(u8, (x14 & cast(u64, 0xff)));
+    const x16 = (x14 >> 8);
+    const x17 = cast(u8, (x16 & cast(u64, 0xff)));
+    const x18 = cast(u8, (x16 >> 8));
+    const x19 = cast(u8, (x3 & cast(u64, 0xff)));
+    const x20 = (x3 >> 8);
+    const x21 = cast(u8, (x20 & cast(u64, 0xff)));
+    const x22 = (x20 >> 8);
+    const x23 = cast(u8, (x22 & cast(u64, 0xff)));
+    const x24 = (x22 >> 8);
+    const x25 = cast(u8, (x24 & cast(u64, 0xff)));
+    const x26 = (x24 >> 8);
+    const x27 = cast(u8, (x26 & cast(u64, 0xff)));
+    const x28 = (x26 >> 8);
+    const x29 = cast(u8, (x28 & cast(u64, 0xff)));
+    const x30 = (x28 >> 8);
+    const x31 = cast(u8, (x30 & cast(u64, 0xff)));
+    const x32 = cast(u8, (x30 >> 8));
+    const x33 = cast(u8, (x2 & cast(u64, 0xff)));
+    const x34 = (x2 >> 8);
+    const x35 = cast(u8, (x34 & cast(u64, 0xff)));
+    const x36 = (x34 >> 8);
+    const x37 = cast(u8, (x36 & cast(u64, 0xff)));
+    const x38 = (x36 >> 8);
+    const x39 = cast(u8, (x38 & cast(u64, 0xff)));
+    const x40 = (x38 >> 8);
+    const x41 = cast(u8, (x40 & cast(u64, 0xff)));
+    const x42 = (x40 >> 8);
+    const x43 = cast(u8, (x42 & cast(u64, 0xff)));
+    const x44 = (x42 >> 8);
+    const x45 = cast(u8, (x44 & cast(u64, 0xff)));
+    const x46 = cast(u8, (x44 >> 8));
+    const x47 = cast(u8, (x1 & cast(u64, 0xff)));
+    const x48 = (x1 >> 8);
+    const x49 = cast(u8, (x48 & cast(u64, 0xff)));
+    const x50 = (x48 >> 8);
+    const x51 = cast(u8, (x50 & cast(u64, 0xff)));
+    const x52 = (x50 >> 8);
+    const x53 = cast(u8, (x52 & cast(u64, 0xff)));
+    const x54 = (x52 >> 8);
+    const x55 = cast(u8, (x54 & cast(u64, 0xff)));
+    const x56 = (x54 >> 8);
+    const x57 = cast(u8, (x56 & cast(u64, 0xff)));
+    const x58 = (x56 >> 8);
+    const x59 = cast(u8, (x58 & cast(u64, 0xff)));
+    const x60 = cast(u8, (x58 >> 8));
     out1[0] = x5;
     out1[1] = x7;
     out1[2] = x9;
@@ -1398,7 +1426,7 @@ pub fn p256ToBytes(out1: *[32]u8, arg1: Limbs) void {
     out1[31] = x60;
 }
 
-/// The function p256FromBytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
+/// The function fromBytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
 /// Preconditions:
 ///   0 ≤ bytes_eval arg1 < m
 /// Postconditions:
@@ -1409,74 +1437,76 @@ pub fn p256ToBytes(out1: *[32]u8, arg1: Limbs) void {
 ///   arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256FromBytes(out1: *Limbs, arg1: [32]u8) void {
-    const x1: u64 = (@intCast(u64, (arg1[31])) << 56);
-    const x2: u64 = (@intCast(u64, (arg1[30])) << 48);
-    const x3: u64 = (@intCast(u64, (arg1[29])) << 40);
-    const x4: u64 = (@intCast(u64, (arg1[28])) << 32);
-    const x5: u64 = (@intCast(u64, (arg1[27])) << 24);
-    const x6: u64 = (@intCast(u64, (arg1[26])) << 16);
-    const x7: u64 = (@intCast(u64, (arg1[25])) << 8);
-    const x8: u8 = (arg1[24]);
-    const x9: u64 = (@intCast(u64, (arg1[23])) << 56);
-    const x10: u64 = (@intCast(u64, (arg1[22])) << 48);
-    const x11: u64 = (@intCast(u64, (arg1[21])) << 40);
-    const x12: u64 = (@intCast(u64, (arg1[20])) << 32);
-    const x13: u64 = (@intCast(u64, (arg1[19])) << 24);
-    const x14: u64 = (@intCast(u64, (arg1[18])) << 16);
-    const x15: u64 = (@intCast(u64, (arg1[17])) << 8);
-    const x16: u8 = (arg1[16]);
-    const x17: u64 = (@intCast(u64, (arg1[15])) << 56);
-    const x18: u64 = (@intCast(u64, (arg1[14])) << 48);
-    const x19: u64 = (@intCast(u64, (arg1[13])) << 40);
-    const x20: u64 = (@intCast(u64, (arg1[12])) << 32);
-    const x21: u64 = (@intCast(u64, (arg1[11])) << 24);
-    const x22: u64 = (@intCast(u64, (arg1[10])) << 16);
-    const x23: u64 = (@intCast(u64, (arg1[9])) << 8);
-    const x24: u8 = (arg1[8]);
-    const x25: u64 = (@intCast(u64, (arg1[7])) << 56);
-    const x26: u64 = (@intCast(u64, (arg1[6])) << 48);
-    const x27: u64 = (@intCast(u64, (arg1[5])) << 40);
-    const x28: u64 = (@intCast(u64, (arg1[4])) << 32);
-    const x29: u64 = (@intCast(u64, (arg1[3])) << 24);
-    const x30: u64 = (@intCast(u64, (arg1[2])) << 16);
-    const x31: u64 = (@intCast(u64, (arg1[1])) << 8);
-    const x32: u8 = (arg1[0]);
-    const x33: u64 = (x31 + @intCast(u64, x32));
-    const x34: u64 = (x30 + x33);
-    const x35: u64 = (x29 + x34);
-    const x36: u64 = (x28 + x35);
-    const x37: u64 = (x27 + x36);
-    const x38: u64 = (x26 + x37);
-    const x39: u64 = (x25 + x38);
-    const x40: u64 = (x23 + @intCast(u64, x24));
-    const x41: u64 = (x22 + x40);
-    const x42: u64 = (x21 + x41);
-    const x43: u64 = (x20 + x42);
-    const x44: u64 = (x19 + x43);
-    const x45: u64 = (x18 + x44);
-    const x46: u64 = (x17 + x45);
-    const x47: u64 = (x15 + @intCast(u64, x16));
-    const x48: u64 = (x14 + x47);
-    const x49: u64 = (x13 + x48);
-    const x50: u64 = (x12 + x49);
-    const x51: u64 = (x11 + x50);
-    const x52: u64 = (x10 + x51);
-    const x53: u64 = (x9 + x52);
-    const x54: u64 = (x7 + @intCast(u64, x8));
-    const x55: u64 = (x6 + x54);
-    const x56: u64 = (x5 + x55);
-    const x57: u64 = (x4 + x56);
-    const x58: u64 = (x3 + x57);
-    const x59: u64 = (x2 + x58);
-    const x60: u64 = (x1 + x59);
+pub fn fromBytes(out1: *[4]u64, arg1: [32]u8) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (cast(u64, (arg1[31])) << 56);
+    const x2 = (cast(u64, (arg1[30])) << 48);
+    const x3 = (cast(u64, (arg1[29])) << 40);
+    const x4 = (cast(u64, (arg1[28])) << 32);
+    const x5 = (cast(u64, (arg1[27])) << 24);
+    const x6 = (cast(u64, (arg1[26])) << 16);
+    const x7 = (cast(u64, (arg1[25])) << 8);
+    const x8 = (arg1[24]);
+    const x9 = (cast(u64, (arg1[23])) << 56);
+    const x10 = (cast(u64, (arg1[22])) << 48);
+    const x11 = (cast(u64, (arg1[21])) << 40);
+    const x12 = (cast(u64, (arg1[20])) << 32);
+    const x13 = (cast(u64, (arg1[19])) << 24);
+    const x14 = (cast(u64, (arg1[18])) << 16);
+    const x15 = (cast(u64, (arg1[17])) << 8);
+    const x16 = (arg1[16]);
+    const x17 = (cast(u64, (arg1[15])) << 56);
+    const x18 = (cast(u64, (arg1[14])) << 48);
+    const x19 = (cast(u64, (arg1[13])) << 40);
+    const x20 = (cast(u64, (arg1[12])) << 32);
+    const x21 = (cast(u64, (arg1[11])) << 24);
+    const x22 = (cast(u64, (arg1[10])) << 16);
+    const x23 = (cast(u64, (arg1[9])) << 8);
+    const x24 = (arg1[8]);
+    const x25 = (cast(u64, (arg1[7])) << 56);
+    const x26 = (cast(u64, (arg1[6])) << 48);
+    const x27 = (cast(u64, (arg1[5])) << 40);
+    const x28 = (cast(u64, (arg1[4])) << 32);
+    const x29 = (cast(u64, (arg1[3])) << 24);
+    const x30 = (cast(u64, (arg1[2])) << 16);
+    const x31 = (cast(u64, (arg1[1])) << 8);
+    const x32 = (arg1[0]);
+    const x33 = (x31 + cast(u64, x32));
+    const x34 = (x30 + x33);
+    const x35 = (x29 + x34);
+    const x36 = (x28 + x35);
+    const x37 = (x27 + x36);
+    const x38 = (x26 + x37);
+    const x39 = (x25 + x38);
+    const x40 = (x23 + cast(u64, x24));
+    const x41 = (x22 + x40);
+    const x42 = (x21 + x41);
+    const x43 = (x20 + x42);
+    const x44 = (x19 + x43);
+    const x45 = (x18 + x44);
+    const x46 = (x17 + x45);
+    const x47 = (x15 + cast(u64, x16));
+    const x48 = (x14 + x47);
+    const x49 = (x13 + x48);
+    const x50 = (x12 + x49);
+    const x51 = (x11 + x50);
+    const x52 = (x10 + x51);
+    const x53 = (x9 + x52);
+    const x54 = (x7 + cast(u64, x8));
+    const x55 = (x6 + x54);
+    const x56 = (x5 + x55);
+    const x57 = (x4 + x56);
+    const x58 = (x3 + x57);
+    const x59 = (x2 + x58);
+    const x60 = (x1 + x59);
     out1[0] = x39;
     out1[1] = x46;
     out1[2] = x53;
     out1[3] = x60;
 }
 
-/// The function p256SetOne returns the field element one in the Montgomery domain.
+/// The function setOne returns the field element one in the Montgomery domain.
 /// Postconditions:
 ///   eval (from_montgomery out1) mod m = 1 mod m
 ///   0 ≤ eval out1 < m
@@ -1484,14 +1514,16 @@ pub fn p256FromBytes(out1: *Limbs, arg1: [32]u8) void {
 /// Input Bounds:
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256SetOne(out1: *Limbs) void {
-    out1[0] = @intCast(u64, 0x1);
+pub fn setOne(out1: *[4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    out1[0] = cast(u64, 0x1);
     out1[1] = 0xffffffff00000000;
     out1[2] = 0xffffffffffffffff;
     out1[3] = 0xfffffffe;
 }
 
-/// The function p256Msat returns the saturated representation of the prime modulus.
+/// The function msat returns the saturated representation of the prime modulus.
 /// Postconditions:
 ///   twos_complement_eval out1 = m
 ///   0 ≤ eval out1 < m
@@ -1499,15 +1531,17 @@ pub fn p256SetOne(out1: *Limbs) void {
 /// Input Bounds:
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256Msat(out1: *[5]u64) void {
+pub fn msat(out1: *[5]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
     out1[0] = 0xffffffffffffffff;
     out1[1] = 0xffffffff;
-    out1[2] = @intCast(u64, 0x0);
+    out1[2] = cast(u64, 0x0);
     out1[3] = 0xffffffff00000001;
-    out1[4] = @intCast(u64, 0x0);
+    out1[4] = cast(u64, 0x0);
 }
 
-/// The function p256Divstep computes a divstep.
+/// The function divstep computes a divstep.
 /// Preconditions:
 ///   0 ≤ eval arg4 < m
 ///   0 ≤ eval arg5 < m
@@ -1534,209 +1568,211 @@ pub fn p256Msat(out1: *[5]u64) void {
 ///   out3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 ///   out4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
 ///   out5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256Divstep(out1: *u64, out2: *[5]u64, out3: *[5]u64, out4: *Limbs, out5: *Limbs, arg1: u64, arg2: [5]u64, arg3: [5]u64, arg4: Limbs, arg5: Limbs) void {
+pub fn divstep(out1: *u64, out2: *[5]u64, out3: *[5]u64, out4: *[4]u64, out5: *[4]u64, arg1: u64, arg2: [5]u64, arg3: [5]u64, arg4: [4]u64, arg5: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
     var x1: u64 = undefined;
     var x2: u1 = undefined;
-    p256AddcarryxU64(&x1, &x2, 0x0, (~arg1), @intCast(u64, 0x1));
-    const x3: u1 = (@intCast(u1, (x1 >> 63)) & @intCast(u1, ((arg3[0]) & @intCast(u64, 0x1))));
+    addcarryxU64(&x1, &x2, 0x0, (~arg1), cast(u64, 0x1));
+    const x3 = (cast(u1, (x1 >> 63)) & cast(u1, ((arg3[0]) & cast(u64, 0x1))));
     var x4: u64 = undefined;
     var x5: u1 = undefined;
-    p256AddcarryxU64(&x4, &x5, 0x0, (~arg1), @intCast(u64, 0x1));
+    addcarryxU64(&x4, &x5, 0x0, (~arg1), cast(u64, 0x1));
     var x6: u64 = undefined;
-    p256CmovznzU64(&x6, x3, arg1, x4);
+    cmovznzU64(&x6, x3, arg1, x4);
     var x7: u64 = undefined;
-    p256CmovznzU64(&x7, x3, (arg2[0]), (arg3[0]));
+    cmovznzU64(&x7, x3, (arg2[0]), (arg3[0]));
     var x8: u64 = undefined;
-    p256CmovznzU64(&x8, x3, (arg2[1]), (arg3[1]));
+    cmovznzU64(&x8, x3, (arg2[1]), (arg3[1]));
     var x9: u64 = undefined;
-    p256CmovznzU64(&x9, x3, (arg2[2]), (arg3[2]));
+    cmovznzU64(&x9, x3, (arg2[2]), (arg3[2]));
     var x10: u64 = undefined;
-    p256CmovznzU64(&x10, x3, (arg2[3]), (arg3[3]));
+    cmovznzU64(&x10, x3, (arg2[3]), (arg3[3]));
     var x11: u64 = undefined;
-    p256CmovznzU64(&x11, x3, (arg2[4]), (arg3[4]));
+    cmovznzU64(&x11, x3, (arg2[4]), (arg3[4]));
     var x12: u64 = undefined;
     var x13: u1 = undefined;
-    p256AddcarryxU64(&x12, &x13, 0x0, @intCast(u64, 0x1), (~(arg2[0])));
+    addcarryxU64(&x12, &x13, 0x0, cast(u64, 0x1), (~(arg2[0])));
     var x14: u64 = undefined;
     var x15: u1 = undefined;
-    p256AddcarryxU64(&x14, &x15, x13, @intCast(u64, 0x0), (~(arg2[1])));
+    addcarryxU64(&x14, &x15, x13, cast(u64, 0x0), (~(arg2[1])));
     var x16: u64 = undefined;
     var x17: u1 = undefined;
-    p256AddcarryxU64(&x16, &x17, x15, @intCast(u64, 0x0), (~(arg2[2])));
+    addcarryxU64(&x16, &x17, x15, cast(u64, 0x0), (~(arg2[2])));
     var x18: u64 = undefined;
     var x19: u1 = undefined;
-    p256AddcarryxU64(&x18, &x19, x17, @intCast(u64, 0x0), (~(arg2[3])));
+    addcarryxU64(&x18, &x19, x17, cast(u64, 0x0), (~(arg2[3])));
     var x20: u64 = undefined;
     var x21: u1 = undefined;
-    p256AddcarryxU64(&x20, &x21, x19, @intCast(u64, 0x0), (~(arg2[4])));
+    addcarryxU64(&x20, &x21, x19, cast(u64, 0x0), (~(arg2[4])));
     var x22: u64 = undefined;
-    p256CmovznzU64(&x22, x3, (arg3[0]), x12);
+    cmovznzU64(&x22, x3, (arg3[0]), x12);
     var x23: u64 = undefined;
-    p256CmovznzU64(&x23, x3, (arg3[1]), x14);
+    cmovznzU64(&x23, x3, (arg3[1]), x14);
     var x24: u64 = undefined;
-    p256CmovznzU64(&x24, x3, (arg3[2]), x16);
+    cmovznzU64(&x24, x3, (arg3[2]), x16);
     var x25: u64 = undefined;
-    p256CmovznzU64(&x25, x3, (arg3[3]), x18);
+    cmovznzU64(&x25, x3, (arg3[3]), x18);
     var x26: u64 = undefined;
-    p256CmovznzU64(&x26, x3, (arg3[4]), x20);
+    cmovznzU64(&x26, x3, (arg3[4]), x20);
     var x27: u64 = undefined;
-    p256CmovznzU64(&x27, x3, (arg4[0]), (arg5[0]));
+    cmovznzU64(&x27, x3, (arg4[0]), (arg5[0]));
     var x28: u64 = undefined;
-    p256CmovznzU64(&x28, x3, (arg4[1]), (arg5[1]));
+    cmovznzU64(&x28, x3, (arg4[1]), (arg5[1]));
     var x29: u64 = undefined;
-    p256CmovznzU64(&x29, x3, (arg4[2]), (arg5[2]));
+    cmovznzU64(&x29, x3, (arg4[2]), (arg5[2]));
     var x30: u64 = undefined;
-    p256CmovznzU64(&x30, x3, (arg4[3]), (arg5[3]));
+    cmovznzU64(&x30, x3, (arg4[3]), (arg5[3]));
     var x31: u64 = undefined;
     var x32: u1 = undefined;
-    p256AddcarryxU64(&x31, &x32, 0x0, x27, x27);
+    addcarryxU64(&x31, &x32, 0x0, x27, x27);
     var x33: u64 = undefined;
     var x34: u1 = undefined;
-    p256AddcarryxU64(&x33, &x34, x32, x28, x28);
+    addcarryxU64(&x33, &x34, x32, x28, x28);
     var x35: u64 = undefined;
     var x36: u1 = undefined;
-    p256AddcarryxU64(&x35, &x36, x34, x29, x29);
+    addcarryxU64(&x35, &x36, x34, x29, x29);
     var x37: u64 = undefined;
     var x38: u1 = undefined;
-    p256AddcarryxU64(&x37, &x38, x36, x30, x30);
+    addcarryxU64(&x37, &x38, x36, x30, x30);
     var x39: u64 = undefined;
     var x40: u1 = undefined;
-    p256SubborrowxU64(&x39, &x40, 0x0, x31, 0xffffffffffffffff);
+    subborrowxU64(&x39, &x40, 0x0, x31, 0xffffffffffffffff);
     var x41: u64 = undefined;
     var x42: u1 = undefined;
-    p256SubborrowxU64(&x41, &x42, x40, x33, 0xffffffff);
+    subborrowxU64(&x41, &x42, x40, x33, 0xffffffff);
     var x43: u64 = undefined;
     var x44: u1 = undefined;
-    p256SubborrowxU64(&x43, &x44, x42, x35, @intCast(u64, 0x0));
+    subborrowxU64(&x43, &x44, x42, x35, cast(u64, 0x0));
     var x45: u64 = undefined;
     var x46: u1 = undefined;
-    p256SubborrowxU64(&x45, &x46, x44, x37, 0xffffffff00000001);
+    subborrowxU64(&x45, &x46, x44, x37, 0xffffffff00000001);
     var x47: u64 = undefined;
     var x48: u1 = undefined;
-    p256SubborrowxU64(&x47, &x48, x46, @intCast(u64, x38), @intCast(u64, 0x0));
-    const x49: u64 = (arg4[3]);
-    const x50: u64 = (arg4[2]);
-    const x51: u64 = (arg4[1]);
-    const x52: u64 = (arg4[0]);
+    subborrowxU64(&x47, &x48, x46, cast(u64, x38), cast(u64, 0x0));
+    const x49 = (arg4[3]);
+    const x50 = (arg4[2]);
+    const x51 = (arg4[1]);
+    const x52 = (arg4[0]);
     var x53: u64 = undefined;
     var x54: u1 = undefined;
-    p256SubborrowxU64(&x53, &x54, 0x0, @intCast(u64, 0x0), x52);
+    subborrowxU64(&x53, &x54, 0x0, cast(u64, 0x0), x52);
     var x55: u64 = undefined;
     var x56: u1 = undefined;
-    p256SubborrowxU64(&x55, &x56, x54, @intCast(u64, 0x0), x51);
+    subborrowxU64(&x55, &x56, x54, cast(u64, 0x0), x51);
     var x57: u64 = undefined;
     var x58: u1 = undefined;
-    p256SubborrowxU64(&x57, &x58, x56, @intCast(u64, 0x0), x50);
+    subborrowxU64(&x57, &x58, x56, cast(u64, 0x0), x50);
     var x59: u64 = undefined;
     var x60: u1 = undefined;
-    p256SubborrowxU64(&x59, &x60, x58, @intCast(u64, 0x0), x49);
+    subborrowxU64(&x59, &x60, x58, cast(u64, 0x0), x49);
     var x61: u64 = undefined;
-    p256CmovznzU64(&x61, x60, @intCast(u64, 0x0), 0xffffffffffffffff);
+    cmovznzU64(&x61, x60, cast(u64, 0x0), 0xffffffffffffffff);
     var x62: u64 = undefined;
     var x63: u1 = undefined;
-    p256AddcarryxU64(&x62, &x63, 0x0, x53, x61);
+    addcarryxU64(&x62, &x63, 0x0, x53, x61);
     var x64: u64 = undefined;
     var x65: u1 = undefined;
-    p256AddcarryxU64(&x64, &x65, x63, x55, (x61 & 0xffffffff));
+    addcarryxU64(&x64, &x65, x63, x55, (x61 & 0xffffffff));
     var x66: u64 = undefined;
     var x67: u1 = undefined;
-    p256AddcarryxU64(&x66, &x67, x65, x57, @intCast(u64, 0x0));
+    addcarryxU64(&x66, &x67, x65, x57, cast(u64, 0x0));
     var x68: u64 = undefined;
     var x69: u1 = undefined;
-    p256AddcarryxU64(&x68, &x69, x67, x59, (x61 & 0xffffffff00000001));
+    addcarryxU64(&x68, &x69, x67, x59, (x61 & 0xffffffff00000001));
     var x70: u64 = undefined;
-    p256CmovznzU64(&x70, x3, (arg5[0]), x62);
+    cmovznzU64(&x70, x3, (arg5[0]), x62);
     var x71: u64 = undefined;
-    p256CmovznzU64(&x71, x3, (arg5[1]), x64);
+    cmovznzU64(&x71, x3, (arg5[1]), x64);
     var x72: u64 = undefined;
-    p256CmovznzU64(&x72, x3, (arg5[2]), x66);
+    cmovznzU64(&x72, x3, (arg5[2]), x66);
     var x73: u64 = undefined;
-    p256CmovznzU64(&x73, x3, (arg5[3]), x68);
-    const x74: u1 = @intCast(u1, (x22 & @intCast(u64, 0x1)));
+    cmovznzU64(&x73, x3, (arg5[3]), x68);
+    const x74 = cast(u1, (x22 & cast(u64, 0x1)));
     var x75: u64 = undefined;
-    p256CmovznzU64(&x75, x74, @intCast(u64, 0x0), x7);
+    cmovznzU64(&x75, x74, cast(u64, 0x0), x7);
     var x76: u64 = undefined;
-    p256CmovznzU64(&x76, x74, @intCast(u64, 0x0), x8);
+    cmovznzU64(&x76, x74, cast(u64, 0x0), x8);
     var x77: u64 = undefined;
-    p256CmovznzU64(&x77, x74, @intCast(u64, 0x0), x9);
+    cmovznzU64(&x77, x74, cast(u64, 0x0), x9);
     var x78: u64 = undefined;
-    p256CmovznzU64(&x78, x74, @intCast(u64, 0x0), x10);
+    cmovznzU64(&x78, x74, cast(u64, 0x0), x10);
     var x79: u64 = undefined;
-    p256CmovznzU64(&x79, x74, @intCast(u64, 0x0), x11);
+    cmovznzU64(&x79, x74, cast(u64, 0x0), x11);
     var x80: u64 = undefined;
     var x81: u1 = undefined;
-    p256AddcarryxU64(&x80, &x81, 0x0, x22, x75);
+    addcarryxU64(&x80, &x81, 0x0, x22, x75);
     var x82: u64 = undefined;
     var x83: u1 = undefined;
-    p256AddcarryxU64(&x82, &x83, x81, x23, x76);
+    addcarryxU64(&x82, &x83, x81, x23, x76);
     var x84: u64 = undefined;
     var x85: u1 = undefined;
-    p256AddcarryxU64(&x84, &x85, x83, x24, x77);
+    addcarryxU64(&x84, &x85, x83, x24, x77);
     var x86: u64 = undefined;
     var x87: u1 = undefined;
-    p256AddcarryxU64(&x86, &x87, x85, x25, x78);
+    addcarryxU64(&x86, &x87, x85, x25, x78);
     var x88: u64 = undefined;
     var x89: u1 = undefined;
-    p256AddcarryxU64(&x88, &x89, x87, x26, x79);
+    addcarryxU64(&x88, &x89, x87, x26, x79);
     var x90: u64 = undefined;
-    p256CmovznzU64(&x90, x74, @intCast(u64, 0x0), x27);
+    cmovznzU64(&x90, x74, cast(u64, 0x0), x27);
     var x91: u64 = undefined;
-    p256CmovznzU64(&x91, x74, @intCast(u64, 0x0), x28);
+    cmovznzU64(&x91, x74, cast(u64, 0x0), x28);
     var x92: u64 = undefined;
-    p256CmovznzU64(&x92, x74, @intCast(u64, 0x0), x29);
+    cmovznzU64(&x92, x74, cast(u64, 0x0), x29);
     var x93: u64 = undefined;
-    p256CmovznzU64(&x93, x74, @intCast(u64, 0x0), x30);
+    cmovznzU64(&x93, x74, cast(u64, 0x0), x30);
     var x94: u64 = undefined;
     var x95: u1 = undefined;
-    p256AddcarryxU64(&x94, &x95, 0x0, x70, x90);
+    addcarryxU64(&x94, &x95, 0x0, x70, x90);
     var x96: u64 = undefined;
     var x97: u1 = undefined;
-    p256AddcarryxU64(&x96, &x97, x95, x71, x91);
+    addcarryxU64(&x96, &x97, x95, x71, x91);
     var x98: u64 = undefined;
     var x99: u1 = undefined;
-    p256AddcarryxU64(&x98, &x99, x97, x72, x92);
+    addcarryxU64(&x98, &x99, x97, x72, x92);
     var x100: u64 = undefined;
     var x101: u1 = undefined;
-    p256AddcarryxU64(&x100, &x101, x99, x73, x93);
+    addcarryxU64(&x100, &x101, x99, x73, x93);
     var x102: u64 = undefined;
     var x103: u1 = undefined;
-    p256SubborrowxU64(&x102, &x103, 0x0, x94, 0xffffffffffffffff);
+    subborrowxU64(&x102, &x103, 0x0, x94, 0xffffffffffffffff);
     var x104: u64 = undefined;
     var x105: u1 = undefined;
-    p256SubborrowxU64(&x104, &x105, x103, x96, 0xffffffff);
+    subborrowxU64(&x104, &x105, x103, x96, 0xffffffff);
     var x106: u64 = undefined;
     var x107: u1 = undefined;
-    p256SubborrowxU64(&x106, &x107, x105, x98, @intCast(u64, 0x0));
+    subborrowxU64(&x106, &x107, x105, x98, cast(u64, 0x0));
     var x108: u64 = undefined;
     var x109: u1 = undefined;
-    p256SubborrowxU64(&x108, &x109, x107, x100, 0xffffffff00000001);
+    subborrowxU64(&x108, &x109, x107, x100, 0xffffffff00000001);
     var x110: u64 = undefined;
     var x111: u1 = undefined;
-    p256SubborrowxU64(&x110, &x111, x109, @intCast(u64, x101), @intCast(u64, 0x0));
+    subborrowxU64(&x110, &x111, x109, cast(u64, x101), cast(u64, 0x0));
     var x112: u64 = undefined;
     var x113: u1 = undefined;
-    p256AddcarryxU64(&x112, &x113, 0x0, x6, @intCast(u64, 0x1));
-    const x114: u64 = ((x80 >> 1) | ((x82 << 63) & 0xffffffffffffffff));
-    const x115: u64 = ((x82 >> 1) | ((x84 << 63) & 0xffffffffffffffff));
-    const x116: u64 = ((x84 >> 1) | ((x86 << 63) & 0xffffffffffffffff));
-    const x117: u64 = ((x86 >> 1) | ((x88 << 63) & 0xffffffffffffffff));
-    const x118: u64 = ((x88 & 0x8000000000000000) | (x88 >> 1));
+    addcarryxU64(&x112, &x113, 0x0, x6, cast(u64, 0x1));
+    const x114 = ((x80 >> 1) | ((x82 << 63) & 0xffffffffffffffff));
+    const x115 = ((x82 >> 1) | ((x84 << 63) & 0xffffffffffffffff));
+    const x116 = ((x84 >> 1) | ((x86 << 63) & 0xffffffffffffffff));
+    const x117 = ((x86 >> 1) | ((x88 << 63) & 0xffffffffffffffff));
+    const x118 = ((x88 & 0x8000000000000000) | (x88 >> 1));
     var x119: u64 = undefined;
-    p256CmovznzU64(&x119, x48, x39, x31);
+    cmovznzU64(&x119, x48, x39, x31);
     var x120: u64 = undefined;
-    p256CmovznzU64(&x120, x48, x41, x33);
+    cmovznzU64(&x120, x48, x41, x33);
     var x121: u64 = undefined;
-    p256CmovznzU64(&x121, x48, x43, x35);
+    cmovznzU64(&x121, x48, x43, x35);
     var x122: u64 = undefined;
-    p256CmovznzU64(&x122, x48, x45, x37);
+    cmovznzU64(&x122, x48, x45, x37);
     var x123: u64 = undefined;
-    p256CmovznzU64(&x123, x111, x102, x94);
+    cmovznzU64(&x123, x111, x102, x94);
     var x124: u64 = undefined;
-    p256CmovznzU64(&x124, x111, x104, x96);
+    cmovznzU64(&x124, x111, x104, x96);
     var x125: u64 = undefined;
-    p256CmovznzU64(&x125, x111, x106, x98);
+    cmovznzU64(&x125, x111, x106, x98);
     var x126: u64 = undefined;
-    p256CmovznzU64(&x126, x111, x108, x100);
+    cmovznzU64(&x126, x111, x108, x100);
     out1.* = x112;
     out2[0] = x7;
     out2[1] = x8;
@@ -1758,7 +1794,7 @@ pub fn p256Divstep(out1: *u64, out2: *[5]u64, out3: *[5]u64, out4: *Limbs, out5:
     out5[3] = x126;
 }
 
-/// The function p256DivstepPrecomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form).
+/// The function divstepPrecomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form).
 /// Postconditions:
 ///   eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if (log2 m) + 1 < 46 then ⌊(49 * ((log2 m) + 1) + 80) / 17⌋ else ⌊(49 * ((log2 m) + 1) + 57) / 17⌋)
 ///   0 ≤ eval out1 < m
@@ -1766,7 +1802,9 @@ pub fn p256Divstep(out1: *u64, out2: *[5]u64, out3: *[5]u64, out4: *Limbs, out5:
 /// Input Bounds:
 /// Output Bounds:
 ///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
-pub fn p256DivstepPrecomp(out1: *Limbs) void {
+pub fn divstepPrecomp(out1: *[4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
     out1[0] = 0x67ffffffb8000000;
     out1[1] = 0xc000000038000000;
     out1[2] = 0xd80000007fffffff;
lib/std/crypto/pcurves/p256/p256_scalar_64.zig
@@ -0,0 +1,2016 @@
+// Autogenerated: './src/ExtractionOCaml/word_by_word_montgomery' --lang Zig --internal-static --public-function-case camelCase --private-function-case camelCase --no-prefix-fiat --package-name p256-scalar '' 64 115792089210356248762697446949407573529996955224135760342422259061068512044369
+// curve description (via package name): p256-scalar
+// machine_wordsize = 64 (from "64")
+// requested operations: (all)
+// m = 0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551 (from "115792089210356248762697446949407573529996955224135760342422259061068512044369")
+//
+// NOTE: In addition to the bounds specified above each function, all
+//   functions synthesized for this Montgomery arithmetic require the
+//   input to be strictly less than the prime modulus (m), and also
+//   require the input to be in the unique saturated representation.
+//   All functions also ensure that these two properties are true of
+//   return values.
+//
+// Computed values:
+// eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192)
+// bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248)
+// twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in
+//                          if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256
+
+const std = @import("std");
+const cast = std.meta.cast;
+const mode = std.builtin.mode; // Checked arithmetic is disabled in non-debug modes to avoid side channels
+
+pub const Limbs = [4]u64;
+
+/// The function addcarryxU64 is an addition with carry.
+/// Postconditions:
+///   out1 = (arg1 + arg2 + arg3) mod 2^64
+///   out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0x1]
+///   arg2: [0x0 ~> 0xffffffffffffffff]
+///   arg3: [0x0 ~> 0xffffffffffffffff]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+///   out2: [0x0 ~> 0x1]
+fn addcarryxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    var t: u64 = undefined;
+    const carry1 = @addWithOverflow(u64, arg2, arg3, &t);
+    const carry2 = @addWithOverflow(u64, t, arg1, out1);
+    out2.* = @boolToInt(carry1) | @boolToInt(carry2);
+}
+
+/// The function subborrowxU64 is a subtraction with borrow.
+/// Postconditions:
+///   out1 = (-arg1 + arg2 + -arg3) mod 2^64
+///   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0x1]
+///   arg2: [0x0 ~> 0xffffffffffffffff]
+///   arg3: [0x0 ~> 0xffffffffffffffff]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+///   out2: [0x0 ~> 0x1]
+fn subborrowxU64(out1: *u64, out2: *u1, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    var t: u64 = undefined;
+    const carry1 = @subWithOverflow(u64, arg2, arg3, &t);
+    const carry2 = @subWithOverflow(u64, t, arg1, out1);
+    out2.* = @boolToInt(carry1) | @boolToInt(carry2);
+}
+
+/// The function mulxU64 is a multiplication, returning the full double-width result.
+/// Postconditions:
+///   out1 = (arg1 * arg2) mod 2^64
+///   out2 = ⌊arg1 * arg2 / 2^64⌋
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0xffffffffffffffff]
+///   arg2: [0x0 ~> 0xffffffffffffffff]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+///   out2: [0x0 ~> 0xffffffffffffffff]
+fn mulxU64(out1: *u64, out2: *u64, arg1: u64, arg2: u64) callconv(.Inline) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x = @as(u128, arg1) * @as(u128, arg2);
+    out1.* = @truncate(u64, x);
+    out2.* = @truncate(u64, x >> 64);
+}
+
+/// The function cmovznzU64 is a single-word conditional move.
+/// Postconditions:
+///   out1 = (if arg1 = 0 then arg2 else arg3)
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0x1]
+///   arg2: [0x0 ~> 0xffffffffffffffff]
+///   arg3: [0x0 ~> 0xffffffffffffffff]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+fn cmovznzU64(out1: *u64, arg1: u1, arg2: u64, arg3: u64) callconv(.Inline) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const mask = 0 -% @as(u64, arg1);
+    out1.* = (mask & arg3) | ((~mask) & arg2);
+}
+
+/// The function mul multiplies two field elements in the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+///   0 ≤ eval arg2 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn mul(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (arg1[1]);
+    const x2 = (arg1[2]);
+    const x3 = (arg1[3]);
+    const x4 = (arg1[0]);
+    var x5: u64 = undefined;
+    var x6: u64 = undefined;
+    mulxU64(&x5, &x6, x4, (arg2[3]));
+    var x7: u64 = undefined;
+    var x8: u64 = undefined;
+    mulxU64(&x7, &x8, x4, (arg2[2]));
+    var x9: u64 = undefined;
+    var x10: u64 = undefined;
+    mulxU64(&x9, &x10, x4, (arg2[1]));
+    var x11: u64 = undefined;
+    var x12: u64 = undefined;
+    mulxU64(&x11, &x12, x4, (arg2[0]));
+    var x13: u64 = undefined;
+    var x14: u1 = undefined;
+    addcarryxU64(&x13, &x14, 0x0, x12, x9);
+    var x15: u64 = undefined;
+    var x16: u1 = undefined;
+    addcarryxU64(&x15, &x16, x14, x10, x7);
+    var x17: u64 = undefined;
+    var x18: u1 = undefined;
+    addcarryxU64(&x17, &x18, x16, x8, x5);
+    const x19 = (cast(u64, x18) + x6);
+    var x20: u64 = undefined;
+    var x21: u64 = undefined;
+    mulxU64(&x20, &x21, x11, 0xccd1c8aaee00bc4f);
+    var x22: u64 = undefined;
+    var x23: u64 = undefined;
+    mulxU64(&x22, &x23, x20, 0xffffffff00000000);
+    var x24: u64 = undefined;
+    var x25: u64 = undefined;
+    mulxU64(&x24, &x25, x20, 0xffffffffffffffff);
+    var x26: u64 = undefined;
+    var x27: u64 = undefined;
+    mulxU64(&x26, &x27, x20, 0xbce6faada7179e84);
+    var x28: u64 = undefined;
+    var x29: u64 = undefined;
+    mulxU64(&x28, &x29, x20, 0xf3b9cac2fc632551);
+    var x30: u64 = undefined;
+    var x31: u1 = undefined;
+    addcarryxU64(&x30, &x31, 0x0, x29, x26);
+    var x32: u64 = undefined;
+    var x33: u1 = undefined;
+    addcarryxU64(&x32, &x33, x31, x27, x24);
+    var x34: u64 = undefined;
+    var x35: u1 = undefined;
+    addcarryxU64(&x34, &x35, x33, x25, x22);
+    const x36 = (cast(u64, x35) + x23);
+    var x37: u64 = undefined;
+    var x38: u1 = undefined;
+    addcarryxU64(&x37, &x38, 0x0, x11, x28);
+    var x39: u64 = undefined;
+    var x40: u1 = undefined;
+    addcarryxU64(&x39, &x40, x38, x13, x30);
+    var x41: u64 = undefined;
+    var x42: u1 = undefined;
+    addcarryxU64(&x41, &x42, x40, x15, x32);
+    var x43: u64 = undefined;
+    var x44: u1 = undefined;
+    addcarryxU64(&x43, &x44, x42, x17, x34);
+    var x45: u64 = undefined;
+    var x46: u1 = undefined;
+    addcarryxU64(&x45, &x46, x44, x19, x36);
+    var x47: u64 = undefined;
+    var x48: u64 = undefined;
+    mulxU64(&x47, &x48, x1, (arg2[3]));
+    var x49: u64 = undefined;
+    var x50: u64 = undefined;
+    mulxU64(&x49, &x50, x1, (arg2[2]));
+    var x51: u64 = undefined;
+    var x52: u64 = undefined;
+    mulxU64(&x51, &x52, x1, (arg2[1]));
+    var x53: u64 = undefined;
+    var x54: u64 = undefined;
+    mulxU64(&x53, &x54, x1, (arg2[0]));
+    var x55: u64 = undefined;
+    var x56: u1 = undefined;
+    addcarryxU64(&x55, &x56, 0x0, x54, x51);
+    var x57: u64 = undefined;
+    var x58: u1 = undefined;
+    addcarryxU64(&x57, &x58, x56, x52, x49);
+    var x59: u64 = undefined;
+    var x60: u1 = undefined;
+    addcarryxU64(&x59, &x60, x58, x50, x47);
+    const x61 = (cast(u64, x60) + x48);
+    var x62: u64 = undefined;
+    var x63: u1 = undefined;
+    addcarryxU64(&x62, &x63, 0x0, x39, x53);
+    var x64: u64 = undefined;
+    var x65: u1 = undefined;
+    addcarryxU64(&x64, &x65, x63, x41, x55);
+    var x66: u64 = undefined;
+    var x67: u1 = undefined;
+    addcarryxU64(&x66, &x67, x65, x43, x57);
+    var x68: u64 = undefined;
+    var x69: u1 = undefined;
+    addcarryxU64(&x68, &x69, x67, x45, x59);
+    var x70: u64 = undefined;
+    var x71: u1 = undefined;
+    addcarryxU64(&x70, &x71, x69, cast(u64, x46), x61);
+    var x72: u64 = undefined;
+    var x73: u64 = undefined;
+    mulxU64(&x72, &x73, x62, 0xccd1c8aaee00bc4f);
+    var x74: u64 = undefined;
+    var x75: u64 = undefined;
+    mulxU64(&x74, &x75, x72, 0xffffffff00000000);
+    var x76: u64 = undefined;
+    var x77: u64 = undefined;
+    mulxU64(&x76, &x77, x72, 0xffffffffffffffff);
+    var x78: u64 = undefined;
+    var x79: u64 = undefined;
+    mulxU64(&x78, &x79, x72, 0xbce6faada7179e84);
+    var x80: u64 = undefined;
+    var x81: u64 = undefined;
+    mulxU64(&x80, &x81, x72, 0xf3b9cac2fc632551);
+    var x82: u64 = undefined;
+    var x83: u1 = undefined;
+    addcarryxU64(&x82, &x83, 0x0, x81, x78);
+    var x84: u64 = undefined;
+    var x85: u1 = undefined;
+    addcarryxU64(&x84, &x85, x83, x79, x76);
+    var x86: u64 = undefined;
+    var x87: u1 = undefined;
+    addcarryxU64(&x86, &x87, x85, x77, x74);
+    const x88 = (cast(u64, x87) + x75);
+    var x89: u64 = undefined;
+    var x90: u1 = undefined;
+    addcarryxU64(&x89, &x90, 0x0, x62, x80);
+    var x91: u64 = undefined;
+    var x92: u1 = undefined;
+    addcarryxU64(&x91, &x92, x90, x64, x82);
+    var x93: u64 = undefined;
+    var x94: u1 = undefined;
+    addcarryxU64(&x93, &x94, x92, x66, x84);
+    var x95: u64 = undefined;
+    var x96: u1 = undefined;
+    addcarryxU64(&x95, &x96, x94, x68, x86);
+    var x97: u64 = undefined;
+    var x98: u1 = undefined;
+    addcarryxU64(&x97, &x98, x96, x70, x88);
+    const x99 = (cast(u64, x98) + cast(u64, x71));
+    var x100: u64 = undefined;
+    var x101: u64 = undefined;
+    mulxU64(&x100, &x101, x2, (arg2[3]));
+    var x102: u64 = undefined;
+    var x103: u64 = undefined;
+    mulxU64(&x102, &x103, x2, (arg2[2]));
+    var x104: u64 = undefined;
+    var x105: u64 = undefined;
+    mulxU64(&x104, &x105, x2, (arg2[1]));
+    var x106: u64 = undefined;
+    var x107: u64 = undefined;
+    mulxU64(&x106, &x107, x2, (arg2[0]));
+    var x108: u64 = undefined;
+    var x109: u1 = undefined;
+    addcarryxU64(&x108, &x109, 0x0, x107, x104);
+    var x110: u64 = undefined;
+    var x111: u1 = undefined;
+    addcarryxU64(&x110, &x111, x109, x105, x102);
+    var x112: u64 = undefined;
+    var x113: u1 = undefined;
+    addcarryxU64(&x112, &x113, x111, x103, x100);
+    const x114 = (cast(u64, x113) + x101);
+    var x115: u64 = undefined;
+    var x116: u1 = undefined;
+    addcarryxU64(&x115, &x116, 0x0, x91, x106);
+    var x117: u64 = undefined;
+    var x118: u1 = undefined;
+    addcarryxU64(&x117, &x118, x116, x93, x108);
+    var x119: u64 = undefined;
+    var x120: u1 = undefined;
+    addcarryxU64(&x119, &x120, x118, x95, x110);
+    var x121: u64 = undefined;
+    var x122: u1 = undefined;
+    addcarryxU64(&x121, &x122, x120, x97, x112);
+    var x123: u64 = undefined;
+    var x124: u1 = undefined;
+    addcarryxU64(&x123, &x124, x122, x99, x114);
+    var x125: u64 = undefined;
+    var x126: u64 = undefined;
+    mulxU64(&x125, &x126, x115, 0xccd1c8aaee00bc4f);
+    var x127: u64 = undefined;
+    var x128: u64 = undefined;
+    mulxU64(&x127, &x128, x125, 0xffffffff00000000);
+    var x129: u64 = undefined;
+    var x130: u64 = undefined;
+    mulxU64(&x129, &x130, x125, 0xffffffffffffffff);
+    var x131: u64 = undefined;
+    var x132: u64 = undefined;
+    mulxU64(&x131, &x132, x125, 0xbce6faada7179e84);
+    var x133: u64 = undefined;
+    var x134: u64 = undefined;
+    mulxU64(&x133, &x134, x125, 0xf3b9cac2fc632551);
+    var x135: u64 = undefined;
+    var x136: u1 = undefined;
+    addcarryxU64(&x135, &x136, 0x0, x134, x131);
+    var x137: u64 = undefined;
+    var x138: u1 = undefined;
+    addcarryxU64(&x137, &x138, x136, x132, x129);
+    var x139: u64 = undefined;
+    var x140: u1 = undefined;
+    addcarryxU64(&x139, &x140, x138, x130, x127);
+    const x141 = (cast(u64, x140) + x128);
+    var x142: u64 = undefined;
+    var x143: u1 = undefined;
+    addcarryxU64(&x142, &x143, 0x0, x115, x133);
+    var x144: u64 = undefined;
+    var x145: u1 = undefined;
+    addcarryxU64(&x144, &x145, x143, x117, x135);
+    var x146: u64 = undefined;
+    var x147: u1 = undefined;
+    addcarryxU64(&x146, &x147, x145, x119, x137);
+    var x148: u64 = undefined;
+    var x149: u1 = undefined;
+    addcarryxU64(&x148, &x149, x147, x121, x139);
+    var x150: u64 = undefined;
+    var x151: u1 = undefined;
+    addcarryxU64(&x150, &x151, x149, x123, x141);
+    const x152 = (cast(u64, x151) + cast(u64, x124));
+    var x153: u64 = undefined;
+    var x154: u64 = undefined;
+    mulxU64(&x153, &x154, x3, (arg2[3]));
+    var x155: u64 = undefined;
+    var x156: u64 = undefined;
+    mulxU64(&x155, &x156, x3, (arg2[2]));
+    var x157: u64 = undefined;
+    var x158: u64 = undefined;
+    mulxU64(&x157, &x158, x3, (arg2[1]));
+    var x159: u64 = undefined;
+    var x160: u64 = undefined;
+    mulxU64(&x159, &x160, x3, (arg2[0]));
+    var x161: u64 = undefined;
+    var x162: u1 = undefined;
+    addcarryxU64(&x161, &x162, 0x0, x160, x157);
+    var x163: u64 = undefined;
+    var x164: u1 = undefined;
+    addcarryxU64(&x163, &x164, x162, x158, x155);
+    var x165: u64 = undefined;
+    var x166: u1 = undefined;
+    addcarryxU64(&x165, &x166, x164, x156, x153);
+    const x167 = (cast(u64, x166) + x154);
+    var x168: u64 = undefined;
+    var x169: u1 = undefined;
+    addcarryxU64(&x168, &x169, 0x0, x144, x159);
+    var x170: u64 = undefined;
+    var x171: u1 = undefined;
+    addcarryxU64(&x170, &x171, x169, x146, x161);
+    var x172: u64 = undefined;
+    var x173: u1 = undefined;
+    addcarryxU64(&x172, &x173, x171, x148, x163);
+    var x174: u64 = undefined;
+    var x175: u1 = undefined;
+    addcarryxU64(&x174, &x175, x173, x150, x165);
+    var x176: u64 = undefined;
+    var x177: u1 = undefined;
+    addcarryxU64(&x176, &x177, x175, x152, x167);
+    var x178: u64 = undefined;
+    var x179: u64 = undefined;
+    mulxU64(&x178, &x179, x168, 0xccd1c8aaee00bc4f);
+    var x180: u64 = undefined;
+    var x181: u64 = undefined;
+    mulxU64(&x180, &x181, x178, 0xffffffff00000000);
+    var x182: u64 = undefined;
+    var x183: u64 = undefined;
+    mulxU64(&x182, &x183, x178, 0xffffffffffffffff);
+    var x184: u64 = undefined;
+    var x185: u64 = undefined;
+    mulxU64(&x184, &x185, x178, 0xbce6faada7179e84);
+    var x186: u64 = undefined;
+    var x187: u64 = undefined;
+    mulxU64(&x186, &x187, x178, 0xf3b9cac2fc632551);
+    var x188: u64 = undefined;
+    var x189: u1 = undefined;
+    addcarryxU64(&x188, &x189, 0x0, x187, x184);
+    var x190: u64 = undefined;
+    var x191: u1 = undefined;
+    addcarryxU64(&x190, &x191, x189, x185, x182);
+    var x192: u64 = undefined;
+    var x193: u1 = undefined;
+    addcarryxU64(&x192, &x193, x191, x183, x180);
+    const x194 = (cast(u64, x193) + x181);
+    var x195: u64 = undefined;
+    var x196: u1 = undefined;
+    addcarryxU64(&x195, &x196, 0x0, x168, x186);
+    var x197: u64 = undefined;
+    var x198: u1 = undefined;
+    addcarryxU64(&x197, &x198, x196, x170, x188);
+    var x199: u64 = undefined;
+    var x200: u1 = undefined;
+    addcarryxU64(&x199, &x200, x198, x172, x190);
+    var x201: u64 = undefined;
+    var x202: u1 = undefined;
+    addcarryxU64(&x201, &x202, x200, x174, x192);
+    var x203: u64 = undefined;
+    var x204: u1 = undefined;
+    addcarryxU64(&x203, &x204, x202, x176, x194);
+    const x205 = (cast(u64, x204) + cast(u64, x177));
+    var x206: u64 = undefined;
+    var x207: u1 = undefined;
+    subborrowxU64(&x206, &x207, 0x0, x197, 0xf3b9cac2fc632551);
+    var x208: u64 = undefined;
+    var x209: u1 = undefined;
+    subborrowxU64(&x208, &x209, x207, x199, 0xbce6faada7179e84);
+    var x210: u64 = undefined;
+    var x211: u1 = undefined;
+    subborrowxU64(&x210, &x211, x209, x201, 0xffffffffffffffff);
+    var x212: u64 = undefined;
+    var x213: u1 = undefined;
+    subborrowxU64(&x212, &x213, x211, x203, 0xffffffff00000000);
+    var x214: u64 = undefined;
+    var x215: u1 = undefined;
+    subborrowxU64(&x214, &x215, x213, x205, cast(u64, 0x0));
+    var x216: u64 = undefined;
+    cmovznzU64(&x216, x215, x206, x197);
+    var x217: u64 = undefined;
+    cmovznzU64(&x217, x215, x208, x199);
+    var x218: u64 = undefined;
+    cmovznzU64(&x218, x215, x210, x201);
+    var x219: u64 = undefined;
+    cmovznzU64(&x219, x215, x212, x203);
+    out1[0] = x216;
+    out1[1] = x217;
+    out1[2] = x218;
+    out1[3] = x219;
+}
+
+/// The function square squares a field element in the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn square(out1: *[4]u64, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (arg1[1]);
+    const x2 = (arg1[2]);
+    const x3 = (arg1[3]);
+    const x4 = (arg1[0]);
+    var x5: u64 = undefined;
+    var x6: u64 = undefined;
+    mulxU64(&x5, &x6, x4, (arg1[3]));
+    var x7: u64 = undefined;
+    var x8: u64 = undefined;
+    mulxU64(&x7, &x8, x4, (arg1[2]));
+    var x9: u64 = undefined;
+    var x10: u64 = undefined;
+    mulxU64(&x9, &x10, x4, (arg1[1]));
+    var x11: u64 = undefined;
+    var x12: u64 = undefined;
+    mulxU64(&x11, &x12, x4, (arg1[0]));
+    var x13: u64 = undefined;
+    var x14: u1 = undefined;
+    addcarryxU64(&x13, &x14, 0x0, x12, x9);
+    var x15: u64 = undefined;
+    var x16: u1 = undefined;
+    addcarryxU64(&x15, &x16, x14, x10, x7);
+    var x17: u64 = undefined;
+    var x18: u1 = undefined;
+    addcarryxU64(&x17, &x18, x16, x8, x5);
+    const x19 = (cast(u64, x18) + x6);
+    var x20: u64 = undefined;
+    var x21: u64 = undefined;
+    mulxU64(&x20, &x21, x11, 0xccd1c8aaee00bc4f);
+    var x22: u64 = undefined;
+    var x23: u64 = undefined;
+    mulxU64(&x22, &x23, x20, 0xffffffff00000000);
+    var x24: u64 = undefined;
+    var x25: u64 = undefined;
+    mulxU64(&x24, &x25, x20, 0xffffffffffffffff);
+    var x26: u64 = undefined;
+    var x27: u64 = undefined;
+    mulxU64(&x26, &x27, x20, 0xbce6faada7179e84);
+    var x28: u64 = undefined;
+    var x29: u64 = undefined;
+    mulxU64(&x28, &x29, x20, 0xf3b9cac2fc632551);
+    var x30: u64 = undefined;
+    var x31: u1 = undefined;
+    addcarryxU64(&x30, &x31, 0x0, x29, x26);
+    var x32: u64 = undefined;
+    var x33: u1 = undefined;
+    addcarryxU64(&x32, &x33, x31, x27, x24);
+    var x34: u64 = undefined;
+    var x35: u1 = undefined;
+    addcarryxU64(&x34, &x35, x33, x25, x22);
+    const x36 = (cast(u64, x35) + x23);
+    var x37: u64 = undefined;
+    var x38: u1 = undefined;
+    addcarryxU64(&x37, &x38, 0x0, x11, x28);
+    var x39: u64 = undefined;
+    var x40: u1 = undefined;
+    addcarryxU64(&x39, &x40, x38, x13, x30);
+    var x41: u64 = undefined;
+    var x42: u1 = undefined;
+    addcarryxU64(&x41, &x42, x40, x15, x32);
+    var x43: u64 = undefined;
+    var x44: u1 = undefined;
+    addcarryxU64(&x43, &x44, x42, x17, x34);
+    var x45: u64 = undefined;
+    var x46: u1 = undefined;
+    addcarryxU64(&x45, &x46, x44, x19, x36);
+    var x47: u64 = undefined;
+    var x48: u64 = undefined;
+    mulxU64(&x47, &x48, x1, (arg1[3]));
+    var x49: u64 = undefined;
+    var x50: u64 = undefined;
+    mulxU64(&x49, &x50, x1, (arg1[2]));
+    var x51: u64 = undefined;
+    var x52: u64 = undefined;
+    mulxU64(&x51, &x52, x1, (arg1[1]));
+    var x53: u64 = undefined;
+    var x54: u64 = undefined;
+    mulxU64(&x53, &x54, x1, (arg1[0]));
+    var x55: u64 = undefined;
+    var x56: u1 = undefined;
+    addcarryxU64(&x55, &x56, 0x0, x54, x51);
+    var x57: u64 = undefined;
+    var x58: u1 = undefined;
+    addcarryxU64(&x57, &x58, x56, x52, x49);
+    var x59: u64 = undefined;
+    var x60: u1 = undefined;
+    addcarryxU64(&x59, &x60, x58, x50, x47);
+    const x61 = (cast(u64, x60) + x48);
+    var x62: u64 = undefined;
+    var x63: u1 = undefined;
+    addcarryxU64(&x62, &x63, 0x0, x39, x53);
+    var x64: u64 = undefined;
+    var x65: u1 = undefined;
+    addcarryxU64(&x64, &x65, x63, x41, x55);
+    var x66: u64 = undefined;
+    var x67: u1 = undefined;
+    addcarryxU64(&x66, &x67, x65, x43, x57);
+    var x68: u64 = undefined;
+    var x69: u1 = undefined;
+    addcarryxU64(&x68, &x69, x67, x45, x59);
+    var x70: u64 = undefined;
+    var x71: u1 = undefined;
+    addcarryxU64(&x70, &x71, x69, cast(u64, x46), x61);
+    var x72: u64 = undefined;
+    var x73: u64 = undefined;
+    mulxU64(&x72, &x73, x62, 0xccd1c8aaee00bc4f);
+    var x74: u64 = undefined;
+    var x75: u64 = undefined;
+    mulxU64(&x74, &x75, x72, 0xffffffff00000000);
+    var x76: u64 = undefined;
+    var x77: u64 = undefined;
+    mulxU64(&x76, &x77, x72, 0xffffffffffffffff);
+    var x78: u64 = undefined;
+    var x79: u64 = undefined;
+    mulxU64(&x78, &x79, x72, 0xbce6faada7179e84);
+    var x80: u64 = undefined;
+    var x81: u64 = undefined;
+    mulxU64(&x80, &x81, x72, 0xf3b9cac2fc632551);
+    var x82: u64 = undefined;
+    var x83: u1 = undefined;
+    addcarryxU64(&x82, &x83, 0x0, x81, x78);
+    var x84: u64 = undefined;
+    var x85: u1 = undefined;
+    addcarryxU64(&x84, &x85, x83, x79, x76);
+    var x86: u64 = undefined;
+    var x87: u1 = undefined;
+    addcarryxU64(&x86, &x87, x85, x77, x74);
+    const x88 = (cast(u64, x87) + x75);
+    var x89: u64 = undefined;
+    var x90: u1 = undefined;
+    addcarryxU64(&x89, &x90, 0x0, x62, x80);
+    var x91: u64 = undefined;
+    var x92: u1 = undefined;
+    addcarryxU64(&x91, &x92, x90, x64, x82);
+    var x93: u64 = undefined;
+    var x94: u1 = undefined;
+    addcarryxU64(&x93, &x94, x92, x66, x84);
+    var x95: u64 = undefined;
+    var x96: u1 = undefined;
+    addcarryxU64(&x95, &x96, x94, x68, x86);
+    var x97: u64 = undefined;
+    var x98: u1 = undefined;
+    addcarryxU64(&x97, &x98, x96, x70, x88);
+    const x99 = (cast(u64, x98) + cast(u64, x71));
+    var x100: u64 = undefined;
+    var x101: u64 = undefined;
+    mulxU64(&x100, &x101, x2, (arg1[3]));
+    var x102: u64 = undefined;
+    var x103: u64 = undefined;
+    mulxU64(&x102, &x103, x2, (arg1[2]));
+    var x104: u64 = undefined;
+    var x105: u64 = undefined;
+    mulxU64(&x104, &x105, x2, (arg1[1]));
+    var x106: u64 = undefined;
+    var x107: u64 = undefined;
+    mulxU64(&x106, &x107, x2, (arg1[0]));
+    var x108: u64 = undefined;
+    var x109: u1 = undefined;
+    addcarryxU64(&x108, &x109, 0x0, x107, x104);
+    var x110: u64 = undefined;
+    var x111: u1 = undefined;
+    addcarryxU64(&x110, &x111, x109, x105, x102);
+    var x112: u64 = undefined;
+    var x113: u1 = undefined;
+    addcarryxU64(&x112, &x113, x111, x103, x100);
+    const x114 = (cast(u64, x113) + x101);
+    var x115: u64 = undefined;
+    var x116: u1 = undefined;
+    addcarryxU64(&x115, &x116, 0x0, x91, x106);
+    var x117: u64 = undefined;
+    var x118: u1 = undefined;
+    addcarryxU64(&x117, &x118, x116, x93, x108);
+    var x119: u64 = undefined;
+    var x120: u1 = undefined;
+    addcarryxU64(&x119, &x120, x118, x95, x110);
+    var x121: u64 = undefined;
+    var x122: u1 = undefined;
+    addcarryxU64(&x121, &x122, x120, x97, x112);
+    var x123: u64 = undefined;
+    var x124: u1 = undefined;
+    addcarryxU64(&x123, &x124, x122, x99, x114);
+    var x125: u64 = undefined;
+    var x126: u64 = undefined;
+    mulxU64(&x125, &x126, x115, 0xccd1c8aaee00bc4f);
+    var x127: u64 = undefined;
+    var x128: u64 = undefined;
+    mulxU64(&x127, &x128, x125, 0xffffffff00000000);
+    var x129: u64 = undefined;
+    var x130: u64 = undefined;
+    mulxU64(&x129, &x130, x125, 0xffffffffffffffff);
+    var x131: u64 = undefined;
+    var x132: u64 = undefined;
+    mulxU64(&x131, &x132, x125, 0xbce6faada7179e84);
+    var x133: u64 = undefined;
+    var x134: u64 = undefined;
+    mulxU64(&x133, &x134, x125, 0xf3b9cac2fc632551);
+    var x135: u64 = undefined;
+    var x136: u1 = undefined;
+    addcarryxU64(&x135, &x136, 0x0, x134, x131);
+    var x137: u64 = undefined;
+    var x138: u1 = undefined;
+    addcarryxU64(&x137, &x138, x136, x132, x129);
+    var x139: u64 = undefined;
+    var x140: u1 = undefined;
+    addcarryxU64(&x139, &x140, x138, x130, x127);
+    const x141 = (cast(u64, x140) + x128);
+    var x142: u64 = undefined;
+    var x143: u1 = undefined;
+    addcarryxU64(&x142, &x143, 0x0, x115, x133);
+    var x144: u64 = undefined;
+    var x145: u1 = undefined;
+    addcarryxU64(&x144, &x145, x143, x117, x135);
+    var x146: u64 = undefined;
+    var x147: u1 = undefined;
+    addcarryxU64(&x146, &x147, x145, x119, x137);
+    var x148: u64 = undefined;
+    var x149: u1 = undefined;
+    addcarryxU64(&x148, &x149, x147, x121, x139);
+    var x150: u64 = undefined;
+    var x151: u1 = undefined;
+    addcarryxU64(&x150, &x151, x149, x123, x141);
+    const x152 = (cast(u64, x151) + cast(u64, x124));
+    var x153: u64 = undefined;
+    var x154: u64 = undefined;
+    mulxU64(&x153, &x154, x3, (arg1[3]));
+    var x155: u64 = undefined;
+    var x156: u64 = undefined;
+    mulxU64(&x155, &x156, x3, (arg1[2]));
+    var x157: u64 = undefined;
+    var x158: u64 = undefined;
+    mulxU64(&x157, &x158, x3, (arg1[1]));
+    var x159: u64 = undefined;
+    var x160: u64 = undefined;
+    mulxU64(&x159, &x160, x3, (arg1[0]));
+    var x161: u64 = undefined;
+    var x162: u1 = undefined;
+    addcarryxU64(&x161, &x162, 0x0, x160, x157);
+    var x163: u64 = undefined;
+    var x164: u1 = undefined;
+    addcarryxU64(&x163, &x164, x162, x158, x155);
+    var x165: u64 = undefined;
+    var x166: u1 = undefined;
+    addcarryxU64(&x165, &x166, x164, x156, x153);
+    const x167 = (cast(u64, x166) + x154);
+    var x168: u64 = undefined;
+    var x169: u1 = undefined;
+    addcarryxU64(&x168, &x169, 0x0, x144, x159);
+    var x170: u64 = undefined;
+    var x171: u1 = undefined;
+    addcarryxU64(&x170, &x171, x169, x146, x161);
+    var x172: u64 = undefined;
+    var x173: u1 = undefined;
+    addcarryxU64(&x172, &x173, x171, x148, x163);
+    var x174: u64 = undefined;
+    var x175: u1 = undefined;
+    addcarryxU64(&x174, &x175, x173, x150, x165);
+    var x176: u64 = undefined;
+    var x177: u1 = undefined;
+    addcarryxU64(&x176, &x177, x175, x152, x167);
+    var x178: u64 = undefined;
+    var x179: u64 = undefined;
+    mulxU64(&x178, &x179, x168, 0xccd1c8aaee00bc4f);
+    var x180: u64 = undefined;
+    var x181: u64 = undefined;
+    mulxU64(&x180, &x181, x178, 0xffffffff00000000);
+    var x182: u64 = undefined;
+    var x183: u64 = undefined;
+    mulxU64(&x182, &x183, x178, 0xffffffffffffffff);
+    var x184: u64 = undefined;
+    var x185: u64 = undefined;
+    mulxU64(&x184, &x185, x178, 0xbce6faada7179e84);
+    var x186: u64 = undefined;
+    var x187: u64 = undefined;
+    mulxU64(&x186, &x187, x178, 0xf3b9cac2fc632551);
+    var x188: u64 = undefined;
+    var x189: u1 = undefined;
+    addcarryxU64(&x188, &x189, 0x0, x187, x184);
+    var x190: u64 = undefined;
+    var x191: u1 = undefined;
+    addcarryxU64(&x190, &x191, x189, x185, x182);
+    var x192: u64 = undefined;
+    var x193: u1 = undefined;
+    addcarryxU64(&x192, &x193, x191, x183, x180);
+    const x194 = (cast(u64, x193) + x181);
+    var x195: u64 = undefined;
+    var x196: u1 = undefined;
+    addcarryxU64(&x195, &x196, 0x0, x168, x186);
+    var x197: u64 = undefined;
+    var x198: u1 = undefined;
+    addcarryxU64(&x197, &x198, x196, x170, x188);
+    var x199: u64 = undefined;
+    var x200: u1 = undefined;
+    addcarryxU64(&x199, &x200, x198, x172, x190);
+    var x201: u64 = undefined;
+    var x202: u1 = undefined;
+    addcarryxU64(&x201, &x202, x200, x174, x192);
+    var x203: u64 = undefined;
+    var x204: u1 = undefined;
+    addcarryxU64(&x203, &x204, x202, x176, x194);
+    const x205 = (cast(u64, x204) + cast(u64, x177));
+    var x206: u64 = undefined;
+    var x207: u1 = undefined;
+    subborrowxU64(&x206, &x207, 0x0, x197, 0xf3b9cac2fc632551);
+    var x208: u64 = undefined;
+    var x209: u1 = undefined;
+    subborrowxU64(&x208, &x209, x207, x199, 0xbce6faada7179e84);
+    var x210: u64 = undefined;
+    var x211: u1 = undefined;
+    subborrowxU64(&x210, &x211, x209, x201, 0xffffffffffffffff);
+    var x212: u64 = undefined;
+    var x213: u1 = undefined;
+    subborrowxU64(&x212, &x213, x211, x203, 0xffffffff00000000);
+    var x214: u64 = undefined;
+    var x215: u1 = undefined;
+    subborrowxU64(&x214, &x215, x213, x205, cast(u64, 0x0));
+    var x216: u64 = undefined;
+    cmovznzU64(&x216, x215, x206, x197);
+    var x217: u64 = undefined;
+    cmovznzU64(&x217, x215, x208, x199);
+    var x218: u64 = undefined;
+    cmovznzU64(&x218, x215, x210, x201);
+    var x219: u64 = undefined;
+    cmovznzU64(&x219, x215, x212, x203);
+    out1[0] = x216;
+    out1[1] = x217;
+    out1[2] = x218;
+    out1[3] = x219;
+}
+
+/// The function add adds two field elements in the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+///   0 ≤ eval arg2 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn add(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    var x1: u64 = undefined;
+    var x2: u1 = undefined;
+    addcarryxU64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+    var x3: u64 = undefined;
+    var x4: u1 = undefined;
+    addcarryxU64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+    var x5: u64 = undefined;
+    var x6: u1 = undefined;
+    addcarryxU64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+    var x7: u64 = undefined;
+    var x8: u1 = undefined;
+    addcarryxU64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+    var x9: u64 = undefined;
+    var x10: u1 = undefined;
+    subborrowxU64(&x9, &x10, 0x0, x1, 0xf3b9cac2fc632551);
+    var x11: u64 = undefined;
+    var x12: u1 = undefined;
+    subborrowxU64(&x11, &x12, x10, x3, 0xbce6faada7179e84);
+    var x13: u64 = undefined;
+    var x14: u1 = undefined;
+    subborrowxU64(&x13, &x14, x12, x5, 0xffffffffffffffff);
+    var x15: u64 = undefined;
+    var x16: u1 = undefined;
+    subborrowxU64(&x15, &x16, x14, x7, 0xffffffff00000000);
+    var x17: u64 = undefined;
+    var x18: u1 = undefined;
+    subborrowxU64(&x17, &x18, x16, cast(u64, x8), cast(u64, 0x0));
+    var x19: u64 = undefined;
+    cmovznzU64(&x19, x18, x9, x1);
+    var x20: u64 = undefined;
+    cmovznzU64(&x20, x18, x11, x3);
+    var x21: u64 = undefined;
+    cmovznzU64(&x21, x18, x13, x5);
+    var x22: u64 = undefined;
+    cmovznzU64(&x22, x18, x15, x7);
+    out1[0] = x19;
+    out1[1] = x20;
+    out1[2] = x21;
+    out1[3] = x22;
+}
+
+/// The function sub subtracts two field elements in the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+///   0 ≤ eval arg2 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn sub(out1: *[4]u64, arg1: [4]u64, arg2: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    var x1: u64 = undefined;
+    var x2: u1 = undefined;
+    subborrowxU64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+    var x3: u64 = undefined;
+    var x4: u1 = undefined;
+    subborrowxU64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+    var x5: u64 = undefined;
+    var x6: u1 = undefined;
+    subborrowxU64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+    var x7: u64 = undefined;
+    var x8: u1 = undefined;
+    subborrowxU64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+    var x9: u64 = undefined;
+    cmovznzU64(&x9, x8, cast(u64, 0x0), 0xffffffffffffffff);
+    var x10: u64 = undefined;
+    var x11: u1 = undefined;
+    addcarryxU64(&x10, &x11, 0x0, x1, (x9 & 0xf3b9cac2fc632551));
+    var x12: u64 = undefined;
+    var x13: u1 = undefined;
+    addcarryxU64(&x12, &x13, x11, x3, (x9 & 0xbce6faada7179e84));
+    var x14: u64 = undefined;
+    var x15: u1 = undefined;
+    addcarryxU64(&x14, &x15, x13, x5, x9);
+    var x16: u64 = undefined;
+    var x17: u1 = undefined;
+    addcarryxU64(&x16, &x17, x15, x7, (x9 & 0xffffffff00000000));
+    out1[0] = x10;
+    out1[1] = x12;
+    out1[2] = x14;
+    out1[3] = x16;
+}
+
+/// The function opp negates a field element in the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn opp(out1: *[4]u64, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    var x1: u64 = undefined;
+    var x2: u1 = undefined;
+    subborrowxU64(&x1, &x2, 0x0, cast(u64, 0x0), (arg1[0]));
+    var x3: u64 = undefined;
+    var x4: u1 = undefined;
+    subborrowxU64(&x3, &x4, x2, cast(u64, 0x0), (arg1[1]));
+    var x5: u64 = undefined;
+    var x6: u1 = undefined;
+    subborrowxU64(&x5, &x6, x4, cast(u64, 0x0), (arg1[2]));
+    var x7: u64 = undefined;
+    var x8: u1 = undefined;
+    subborrowxU64(&x7, &x8, x6, cast(u64, 0x0), (arg1[3]));
+    var x9: u64 = undefined;
+    cmovznzU64(&x9, x8, cast(u64, 0x0), 0xffffffffffffffff);
+    var x10: u64 = undefined;
+    var x11: u1 = undefined;
+    addcarryxU64(&x10, &x11, 0x0, x1, (x9 & 0xf3b9cac2fc632551));
+    var x12: u64 = undefined;
+    var x13: u1 = undefined;
+    addcarryxU64(&x12, &x13, x11, x3, (x9 & 0xbce6faada7179e84));
+    var x14: u64 = undefined;
+    var x15: u1 = undefined;
+    addcarryxU64(&x14, &x15, x13, x5, x9);
+    var x16: u64 = undefined;
+    var x17: u1 = undefined;
+    addcarryxU64(&x16, &x17, x15, x7, (x9 & 0xffffffff00000000));
+    out1[0] = x10;
+    out1[1] = x12;
+    out1[2] = x14;
+    out1[3] = x16;
+}
+
+/// The function fromMontgomery translates a field element out of the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn fromMontgomery(out1: *[4]u64, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (arg1[0]);
+    var x2: u64 = undefined;
+    var x3: u64 = undefined;
+    mulxU64(&x2, &x3, x1, 0xccd1c8aaee00bc4f);
+    var x4: u64 = undefined;
+    var x5: u64 = undefined;
+    mulxU64(&x4, &x5, x2, 0xffffffff00000000);
+    var x6: u64 = undefined;
+    var x7: u64 = undefined;
+    mulxU64(&x6, &x7, x2, 0xffffffffffffffff);
+    var x8: u64 = undefined;
+    var x9: u64 = undefined;
+    mulxU64(&x8, &x9, x2, 0xbce6faada7179e84);
+    var x10: u64 = undefined;
+    var x11: u64 = undefined;
+    mulxU64(&x10, &x11, x2, 0xf3b9cac2fc632551);
+    var x12: u64 = undefined;
+    var x13: u1 = undefined;
+    addcarryxU64(&x12, &x13, 0x0, x11, x8);
+    var x14: u64 = undefined;
+    var x15: u1 = undefined;
+    addcarryxU64(&x14, &x15, x13, x9, x6);
+    var x16: u64 = undefined;
+    var x17: u1 = undefined;
+    addcarryxU64(&x16, &x17, x15, x7, x4);
+    var x18: u64 = undefined;
+    var x19: u1 = undefined;
+    addcarryxU64(&x18, &x19, 0x0, x1, x10);
+    var x20: u64 = undefined;
+    var x21: u1 = undefined;
+    addcarryxU64(&x20, &x21, x19, cast(u64, 0x0), x12);
+    var x22: u64 = undefined;
+    var x23: u1 = undefined;
+    addcarryxU64(&x22, &x23, x21, cast(u64, 0x0), x14);
+    var x24: u64 = undefined;
+    var x25: u1 = undefined;
+    addcarryxU64(&x24, &x25, x23, cast(u64, 0x0), x16);
+    var x26: u64 = undefined;
+    var x27: u1 = undefined;
+    addcarryxU64(&x26, &x27, 0x0, x20, (arg1[1]));
+    var x28: u64 = undefined;
+    var x29: u1 = undefined;
+    addcarryxU64(&x28, &x29, x27, x22, cast(u64, 0x0));
+    var x30: u64 = undefined;
+    var x31: u1 = undefined;
+    addcarryxU64(&x30, &x31, x29, x24, cast(u64, 0x0));
+    var x32: u64 = undefined;
+    var x33: u64 = undefined;
+    mulxU64(&x32, &x33, x26, 0xccd1c8aaee00bc4f);
+    var x34: u64 = undefined;
+    var x35: u64 = undefined;
+    mulxU64(&x34, &x35, x32, 0xffffffff00000000);
+    var x36: u64 = undefined;
+    var x37: u64 = undefined;
+    mulxU64(&x36, &x37, x32, 0xffffffffffffffff);
+    var x38: u64 = undefined;
+    var x39: u64 = undefined;
+    mulxU64(&x38, &x39, x32, 0xbce6faada7179e84);
+    var x40: u64 = undefined;
+    var x41: u64 = undefined;
+    mulxU64(&x40, &x41, x32, 0xf3b9cac2fc632551);
+    var x42: u64 = undefined;
+    var x43: u1 = undefined;
+    addcarryxU64(&x42, &x43, 0x0, x41, x38);
+    var x44: u64 = undefined;
+    var x45: u1 = undefined;
+    addcarryxU64(&x44, &x45, x43, x39, x36);
+    var x46: u64 = undefined;
+    var x47: u1 = undefined;
+    addcarryxU64(&x46, &x47, x45, x37, x34);
+    var x48: u64 = undefined;
+    var x49: u1 = undefined;
+    addcarryxU64(&x48, &x49, 0x0, x26, x40);
+    var x50: u64 = undefined;
+    var x51: u1 = undefined;
+    addcarryxU64(&x50, &x51, x49, x28, x42);
+    var x52: u64 = undefined;
+    var x53: u1 = undefined;
+    addcarryxU64(&x52, &x53, x51, x30, x44);
+    var x54: u64 = undefined;
+    var x55: u1 = undefined;
+    addcarryxU64(&x54, &x55, x53, (cast(u64, x31) + (cast(u64, x25) + (cast(u64, x17) + x5))), x46);
+    var x56: u64 = undefined;
+    var x57: u1 = undefined;
+    addcarryxU64(&x56, &x57, 0x0, x50, (arg1[2]));
+    var x58: u64 = undefined;
+    var x59: u1 = undefined;
+    addcarryxU64(&x58, &x59, x57, x52, cast(u64, 0x0));
+    var x60: u64 = undefined;
+    var x61: u1 = undefined;
+    addcarryxU64(&x60, &x61, x59, x54, cast(u64, 0x0));
+    var x62: u64 = undefined;
+    var x63: u64 = undefined;
+    mulxU64(&x62, &x63, x56, 0xccd1c8aaee00bc4f);
+    var x64: u64 = undefined;
+    var x65: u64 = undefined;
+    mulxU64(&x64, &x65, x62, 0xffffffff00000000);
+    var x66: u64 = undefined;
+    var x67: u64 = undefined;
+    mulxU64(&x66, &x67, x62, 0xffffffffffffffff);
+    var x68: u64 = undefined;
+    var x69: u64 = undefined;
+    mulxU64(&x68, &x69, x62, 0xbce6faada7179e84);
+    var x70: u64 = undefined;
+    var x71: u64 = undefined;
+    mulxU64(&x70, &x71, x62, 0xf3b9cac2fc632551);
+    var x72: u64 = undefined;
+    var x73: u1 = undefined;
+    addcarryxU64(&x72, &x73, 0x0, x71, x68);
+    var x74: u64 = undefined;
+    var x75: u1 = undefined;
+    addcarryxU64(&x74, &x75, x73, x69, x66);
+    var x76: u64 = undefined;
+    var x77: u1 = undefined;
+    addcarryxU64(&x76, &x77, x75, x67, x64);
+    var x78: u64 = undefined;
+    var x79: u1 = undefined;
+    addcarryxU64(&x78, &x79, 0x0, x56, x70);
+    var x80: u64 = undefined;
+    var x81: u1 = undefined;
+    addcarryxU64(&x80, &x81, x79, x58, x72);
+    var x82: u64 = undefined;
+    var x83: u1 = undefined;
+    addcarryxU64(&x82, &x83, x81, x60, x74);
+    var x84: u64 = undefined;
+    var x85: u1 = undefined;
+    addcarryxU64(&x84, &x85, x83, (cast(u64, x61) + (cast(u64, x55) + (cast(u64, x47) + x35))), x76);
+    var x86: u64 = undefined;
+    var x87: u1 = undefined;
+    addcarryxU64(&x86, &x87, 0x0, x80, (arg1[3]));
+    var x88: u64 = undefined;
+    var x89: u1 = undefined;
+    addcarryxU64(&x88, &x89, x87, x82, cast(u64, 0x0));
+    var x90: u64 = undefined;
+    var x91: u1 = undefined;
+    addcarryxU64(&x90, &x91, x89, x84, cast(u64, 0x0));
+    var x92: u64 = undefined;
+    var x93: u64 = undefined;
+    mulxU64(&x92, &x93, x86, 0xccd1c8aaee00bc4f);
+    var x94: u64 = undefined;
+    var x95: u64 = undefined;
+    mulxU64(&x94, &x95, x92, 0xffffffff00000000);
+    var x96: u64 = undefined;
+    var x97: u64 = undefined;
+    mulxU64(&x96, &x97, x92, 0xffffffffffffffff);
+    var x98: u64 = undefined;
+    var x99: u64 = undefined;
+    mulxU64(&x98, &x99, x92, 0xbce6faada7179e84);
+    var x100: u64 = undefined;
+    var x101: u64 = undefined;
+    mulxU64(&x100, &x101, x92, 0xf3b9cac2fc632551);
+    var x102: u64 = undefined;
+    var x103: u1 = undefined;
+    addcarryxU64(&x102, &x103, 0x0, x101, x98);
+    var x104: u64 = undefined;
+    var x105: u1 = undefined;
+    addcarryxU64(&x104, &x105, x103, x99, x96);
+    var x106: u64 = undefined;
+    var x107: u1 = undefined;
+    addcarryxU64(&x106, &x107, x105, x97, x94);
+    var x108: u64 = undefined;
+    var x109: u1 = undefined;
+    addcarryxU64(&x108, &x109, 0x0, x86, x100);
+    var x110: u64 = undefined;
+    var x111: u1 = undefined;
+    addcarryxU64(&x110, &x111, x109, x88, x102);
+    var x112: u64 = undefined;
+    var x113: u1 = undefined;
+    addcarryxU64(&x112, &x113, x111, x90, x104);
+    var x114: u64 = undefined;
+    var x115: u1 = undefined;
+    addcarryxU64(&x114, &x115, x113, (cast(u64, x91) + (cast(u64, x85) + (cast(u64, x77) + x65))), x106);
+    const x116 = (cast(u64, x115) + (cast(u64, x107) + x95));
+    var x117: u64 = undefined;
+    var x118: u1 = undefined;
+    subborrowxU64(&x117, &x118, 0x0, x110, 0xf3b9cac2fc632551);
+    var x119: u64 = undefined;
+    var x120: u1 = undefined;
+    subborrowxU64(&x119, &x120, x118, x112, 0xbce6faada7179e84);
+    var x121: u64 = undefined;
+    var x122: u1 = undefined;
+    subborrowxU64(&x121, &x122, x120, x114, 0xffffffffffffffff);
+    var x123: u64 = undefined;
+    var x124: u1 = undefined;
+    subborrowxU64(&x123, &x124, x122, x116, 0xffffffff00000000);
+    var x125: u64 = undefined;
+    var x126: u1 = undefined;
+    subborrowxU64(&x125, &x126, x124, cast(u64, 0x0), cast(u64, 0x0));
+    var x127: u64 = undefined;
+    cmovznzU64(&x127, x126, x117, x110);
+    var x128: u64 = undefined;
+    cmovznzU64(&x128, x126, x119, x112);
+    var x129: u64 = undefined;
+    cmovznzU64(&x129, x126, x121, x114);
+    var x130: u64 = undefined;
+    cmovznzU64(&x130, x126, x123, x116);
+    out1[0] = x127;
+    out1[1] = x128;
+    out1[2] = x129;
+    out1[3] = x130;
+}
+
+/// The function toMontgomery translates a field element into the Montgomery domain.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = eval arg1 mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn toMontgomery(out1: *[4]u64, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (arg1[1]);
+    const x2 = (arg1[2]);
+    const x3 = (arg1[3]);
+    const x4 = (arg1[0]);
+    var x5: u64 = undefined;
+    var x6: u64 = undefined;
+    mulxU64(&x5, &x6, x4, 0x66e12d94f3d95620);
+    var x7: u64 = undefined;
+    var x8: u64 = undefined;
+    mulxU64(&x7, &x8, x4, 0x2845b2392b6bec59);
+    var x9: u64 = undefined;
+    var x10: u64 = undefined;
+    mulxU64(&x9, &x10, x4, 0x4699799c49bd6fa6);
+    var x11: u64 = undefined;
+    var x12: u64 = undefined;
+    mulxU64(&x11, &x12, x4, 0x83244c95be79eea2);
+    var x13: u64 = undefined;
+    var x14: u1 = undefined;
+    addcarryxU64(&x13, &x14, 0x0, x12, x9);
+    var x15: u64 = undefined;
+    var x16: u1 = undefined;
+    addcarryxU64(&x15, &x16, x14, x10, x7);
+    var x17: u64 = undefined;
+    var x18: u1 = undefined;
+    addcarryxU64(&x17, &x18, x16, x8, x5);
+    var x19: u64 = undefined;
+    var x20: u64 = undefined;
+    mulxU64(&x19, &x20, x11, 0xccd1c8aaee00bc4f);
+    var x21: u64 = undefined;
+    var x22: u64 = undefined;
+    mulxU64(&x21, &x22, x19, 0xffffffff00000000);
+    var x23: u64 = undefined;
+    var x24: u64 = undefined;
+    mulxU64(&x23, &x24, x19, 0xffffffffffffffff);
+    var x25: u64 = undefined;
+    var x26: u64 = undefined;
+    mulxU64(&x25, &x26, x19, 0xbce6faada7179e84);
+    var x27: u64 = undefined;
+    var x28: u64 = undefined;
+    mulxU64(&x27, &x28, x19, 0xf3b9cac2fc632551);
+    var x29: u64 = undefined;
+    var x30: u1 = undefined;
+    addcarryxU64(&x29, &x30, 0x0, x28, x25);
+    var x31: u64 = undefined;
+    var x32: u1 = undefined;
+    addcarryxU64(&x31, &x32, x30, x26, x23);
+    var x33: u64 = undefined;
+    var x34: u1 = undefined;
+    addcarryxU64(&x33, &x34, x32, x24, x21);
+    var x35: u64 = undefined;
+    var x36: u1 = undefined;
+    addcarryxU64(&x35, &x36, 0x0, x11, x27);
+    var x37: u64 = undefined;
+    var x38: u1 = undefined;
+    addcarryxU64(&x37, &x38, x36, x13, x29);
+    var x39: u64 = undefined;
+    var x40: u1 = undefined;
+    addcarryxU64(&x39, &x40, x38, x15, x31);
+    var x41: u64 = undefined;
+    var x42: u1 = undefined;
+    addcarryxU64(&x41, &x42, x40, x17, x33);
+    var x43: u64 = undefined;
+    var x44: u1 = undefined;
+    addcarryxU64(&x43, &x44, x42, (cast(u64, x18) + x6), (cast(u64, x34) + x22));
+    var x45: u64 = undefined;
+    var x46: u64 = undefined;
+    mulxU64(&x45, &x46, x1, 0x66e12d94f3d95620);
+    var x47: u64 = undefined;
+    var x48: u64 = undefined;
+    mulxU64(&x47, &x48, x1, 0x2845b2392b6bec59);
+    var x49: u64 = undefined;
+    var x50: u64 = undefined;
+    mulxU64(&x49, &x50, x1, 0x4699799c49bd6fa6);
+    var x51: u64 = undefined;
+    var x52: u64 = undefined;
+    mulxU64(&x51, &x52, x1, 0x83244c95be79eea2);
+    var x53: u64 = undefined;
+    var x54: u1 = undefined;
+    addcarryxU64(&x53, &x54, 0x0, x52, x49);
+    var x55: u64 = undefined;
+    var x56: u1 = undefined;
+    addcarryxU64(&x55, &x56, x54, x50, x47);
+    var x57: u64 = undefined;
+    var x58: u1 = undefined;
+    addcarryxU64(&x57, &x58, x56, x48, x45);
+    var x59: u64 = undefined;
+    var x60: u1 = undefined;
+    addcarryxU64(&x59, &x60, 0x0, x37, x51);
+    var x61: u64 = undefined;
+    var x62: u1 = undefined;
+    addcarryxU64(&x61, &x62, x60, x39, x53);
+    var x63: u64 = undefined;
+    var x64: u1 = undefined;
+    addcarryxU64(&x63, &x64, x62, x41, x55);
+    var x65: u64 = undefined;
+    var x66: u1 = undefined;
+    addcarryxU64(&x65, &x66, x64, x43, x57);
+    var x67: u64 = undefined;
+    var x68: u64 = undefined;
+    mulxU64(&x67, &x68, x59, 0xccd1c8aaee00bc4f);
+    var x69: u64 = undefined;
+    var x70: u64 = undefined;
+    mulxU64(&x69, &x70, x67, 0xffffffff00000000);
+    var x71: u64 = undefined;
+    var x72: u64 = undefined;
+    mulxU64(&x71, &x72, x67, 0xffffffffffffffff);
+    var x73: u64 = undefined;
+    var x74: u64 = undefined;
+    mulxU64(&x73, &x74, x67, 0xbce6faada7179e84);
+    var x75: u64 = undefined;
+    var x76: u64 = undefined;
+    mulxU64(&x75, &x76, x67, 0xf3b9cac2fc632551);
+    var x77: u64 = undefined;
+    var x78: u1 = undefined;
+    addcarryxU64(&x77, &x78, 0x0, x76, x73);
+    var x79: u64 = undefined;
+    var x80: u1 = undefined;
+    addcarryxU64(&x79, &x80, x78, x74, x71);
+    var x81: u64 = undefined;
+    var x82: u1 = undefined;
+    addcarryxU64(&x81, &x82, x80, x72, x69);
+    var x83: u64 = undefined;
+    var x84: u1 = undefined;
+    addcarryxU64(&x83, &x84, 0x0, x59, x75);
+    var x85: u64 = undefined;
+    var x86: u1 = undefined;
+    addcarryxU64(&x85, &x86, x84, x61, x77);
+    var x87: u64 = undefined;
+    var x88: u1 = undefined;
+    addcarryxU64(&x87, &x88, x86, x63, x79);
+    var x89: u64 = undefined;
+    var x90: u1 = undefined;
+    addcarryxU64(&x89, &x90, x88, x65, x81);
+    var x91: u64 = undefined;
+    var x92: u1 = undefined;
+    addcarryxU64(&x91, &x92, x90, ((cast(u64, x66) + cast(u64, x44)) + (cast(u64, x58) + x46)), (cast(u64, x82) + x70));
+    var x93: u64 = undefined;
+    var x94: u64 = undefined;
+    mulxU64(&x93, &x94, x2, 0x66e12d94f3d95620);
+    var x95: u64 = undefined;
+    var x96: u64 = undefined;
+    mulxU64(&x95, &x96, x2, 0x2845b2392b6bec59);
+    var x97: u64 = undefined;
+    var x98: u64 = undefined;
+    mulxU64(&x97, &x98, x2, 0x4699799c49bd6fa6);
+    var x99: u64 = undefined;
+    var x100: u64 = undefined;
+    mulxU64(&x99, &x100, x2, 0x83244c95be79eea2);
+    var x101: u64 = undefined;
+    var x102: u1 = undefined;
+    addcarryxU64(&x101, &x102, 0x0, x100, x97);
+    var x103: u64 = undefined;
+    var x104: u1 = undefined;
+    addcarryxU64(&x103, &x104, x102, x98, x95);
+    var x105: u64 = undefined;
+    var x106: u1 = undefined;
+    addcarryxU64(&x105, &x106, x104, x96, x93);
+    var x107: u64 = undefined;
+    var x108: u1 = undefined;
+    addcarryxU64(&x107, &x108, 0x0, x85, x99);
+    var x109: u64 = undefined;
+    var x110: u1 = undefined;
+    addcarryxU64(&x109, &x110, x108, x87, x101);
+    var x111: u64 = undefined;
+    var x112: u1 = undefined;
+    addcarryxU64(&x111, &x112, x110, x89, x103);
+    var x113: u64 = undefined;
+    var x114: u1 = undefined;
+    addcarryxU64(&x113, &x114, x112, x91, x105);
+    var x115: u64 = undefined;
+    var x116: u64 = undefined;
+    mulxU64(&x115, &x116, x107, 0xccd1c8aaee00bc4f);
+    var x117: u64 = undefined;
+    var x118: u64 = undefined;
+    mulxU64(&x117, &x118, x115, 0xffffffff00000000);
+    var x119: u64 = undefined;
+    var x120: u64 = undefined;
+    mulxU64(&x119, &x120, x115, 0xffffffffffffffff);
+    var x121: u64 = undefined;
+    var x122: u64 = undefined;
+    mulxU64(&x121, &x122, x115, 0xbce6faada7179e84);
+    var x123: u64 = undefined;
+    var x124: u64 = undefined;
+    mulxU64(&x123, &x124, x115, 0xf3b9cac2fc632551);
+    var x125: u64 = undefined;
+    var x126: u1 = undefined;
+    addcarryxU64(&x125, &x126, 0x0, x124, x121);
+    var x127: u64 = undefined;
+    var x128: u1 = undefined;
+    addcarryxU64(&x127, &x128, x126, x122, x119);
+    var x129: u64 = undefined;
+    var x130: u1 = undefined;
+    addcarryxU64(&x129, &x130, x128, x120, x117);
+    var x131: u64 = undefined;
+    var x132: u1 = undefined;
+    addcarryxU64(&x131, &x132, 0x0, x107, x123);
+    var x133: u64 = undefined;
+    var x134: u1 = undefined;
+    addcarryxU64(&x133, &x134, x132, x109, x125);
+    var x135: u64 = undefined;
+    var x136: u1 = undefined;
+    addcarryxU64(&x135, &x136, x134, x111, x127);
+    var x137: u64 = undefined;
+    var x138: u1 = undefined;
+    addcarryxU64(&x137, &x138, x136, x113, x129);
+    var x139: u64 = undefined;
+    var x140: u1 = undefined;
+    addcarryxU64(&x139, &x140, x138, ((cast(u64, x114) + cast(u64, x92)) + (cast(u64, x106) + x94)), (cast(u64, x130) + x118));
+    var x141: u64 = undefined;
+    var x142: u64 = undefined;
+    mulxU64(&x141, &x142, x3, 0x66e12d94f3d95620);
+    var x143: u64 = undefined;
+    var x144: u64 = undefined;
+    mulxU64(&x143, &x144, x3, 0x2845b2392b6bec59);
+    var x145: u64 = undefined;
+    var x146: u64 = undefined;
+    mulxU64(&x145, &x146, x3, 0x4699799c49bd6fa6);
+    var x147: u64 = undefined;
+    var x148: u64 = undefined;
+    mulxU64(&x147, &x148, x3, 0x83244c95be79eea2);
+    var x149: u64 = undefined;
+    var x150: u1 = undefined;
+    addcarryxU64(&x149, &x150, 0x0, x148, x145);
+    var x151: u64 = undefined;
+    var x152: u1 = undefined;
+    addcarryxU64(&x151, &x152, x150, x146, x143);
+    var x153: u64 = undefined;
+    var x154: u1 = undefined;
+    addcarryxU64(&x153, &x154, x152, x144, x141);
+    var x155: u64 = undefined;
+    var x156: u1 = undefined;
+    addcarryxU64(&x155, &x156, 0x0, x133, x147);
+    var x157: u64 = undefined;
+    var x158: u1 = undefined;
+    addcarryxU64(&x157, &x158, x156, x135, x149);
+    var x159: u64 = undefined;
+    var x160: u1 = undefined;
+    addcarryxU64(&x159, &x160, x158, x137, x151);
+    var x161: u64 = undefined;
+    var x162: u1 = undefined;
+    addcarryxU64(&x161, &x162, x160, x139, x153);
+    var x163: u64 = undefined;
+    var x164: u64 = undefined;
+    mulxU64(&x163, &x164, x155, 0xccd1c8aaee00bc4f);
+    var x165: u64 = undefined;
+    var x166: u64 = undefined;
+    mulxU64(&x165, &x166, x163, 0xffffffff00000000);
+    var x167: u64 = undefined;
+    var x168: u64 = undefined;
+    mulxU64(&x167, &x168, x163, 0xffffffffffffffff);
+    var x169: u64 = undefined;
+    var x170: u64 = undefined;
+    mulxU64(&x169, &x170, x163, 0xbce6faada7179e84);
+    var x171: u64 = undefined;
+    var x172: u64 = undefined;
+    mulxU64(&x171, &x172, x163, 0xf3b9cac2fc632551);
+    var x173: u64 = undefined;
+    var x174: u1 = undefined;
+    addcarryxU64(&x173, &x174, 0x0, x172, x169);
+    var x175: u64 = undefined;
+    var x176: u1 = undefined;
+    addcarryxU64(&x175, &x176, x174, x170, x167);
+    var x177: u64 = undefined;
+    var x178: u1 = undefined;
+    addcarryxU64(&x177, &x178, x176, x168, x165);
+    var x179: u64 = undefined;
+    var x180: u1 = undefined;
+    addcarryxU64(&x179, &x180, 0x0, x155, x171);
+    var x181: u64 = undefined;
+    var x182: u1 = undefined;
+    addcarryxU64(&x181, &x182, x180, x157, x173);
+    var x183: u64 = undefined;
+    var x184: u1 = undefined;
+    addcarryxU64(&x183, &x184, x182, x159, x175);
+    var x185: u64 = undefined;
+    var x186: u1 = undefined;
+    addcarryxU64(&x185, &x186, x184, x161, x177);
+    var x187: u64 = undefined;
+    var x188: u1 = undefined;
+    addcarryxU64(&x187, &x188, x186, ((cast(u64, x162) + cast(u64, x140)) + (cast(u64, x154) + x142)), (cast(u64, x178) + x166));
+    var x189: u64 = undefined;
+    var x190: u1 = undefined;
+    subborrowxU64(&x189, &x190, 0x0, x181, 0xf3b9cac2fc632551);
+    var x191: u64 = undefined;
+    var x192: u1 = undefined;
+    subborrowxU64(&x191, &x192, x190, x183, 0xbce6faada7179e84);
+    var x193: u64 = undefined;
+    var x194: u1 = undefined;
+    subborrowxU64(&x193, &x194, x192, x185, 0xffffffffffffffff);
+    var x195: u64 = undefined;
+    var x196: u1 = undefined;
+    subborrowxU64(&x195, &x196, x194, x187, 0xffffffff00000000);
+    var x197: u64 = undefined;
+    var x198: u1 = undefined;
+    subborrowxU64(&x197, &x198, x196, cast(u64, x188), cast(u64, 0x0));
+    var x199: u64 = undefined;
+    cmovznzU64(&x199, x198, x189, x181);
+    var x200: u64 = undefined;
+    cmovznzU64(&x200, x198, x191, x183);
+    var x201: u64 = undefined;
+    cmovznzU64(&x201, x198, x193, x185);
+    var x202: u64 = undefined;
+    cmovznzU64(&x202, x198, x195, x187);
+    out1[0] = x199;
+    out1[1] = x200;
+    out1[2] = x201;
+    out1[3] = x202;
+}
+
+/// The function nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+pub fn nonzero(out1: *u64, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | (arg1[3]))));
+    out1.* = x1;
+}
+
+/// The function selectznz is a multi-limb conditional select.
+/// Postconditions:
+///   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0x1]
+///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn selectznz(out1: *[4]u64, arg1: u1, arg2: [4]u64, arg3: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    var x1: u64 = undefined;
+    cmovznzU64(&x1, arg1, (arg2[0]), (arg3[0]));
+    var x2: u64 = undefined;
+    cmovznzU64(&x2, arg1, (arg2[1]), (arg3[1]));
+    var x3: u64 = undefined;
+    cmovznzU64(&x3, arg1, (arg2[2]), (arg3[2]));
+    var x4: u64 = undefined;
+    cmovznzU64(&x4, arg1, (arg2[3]), (arg3[3]));
+    out1[0] = x1;
+    out1[1] = x2;
+    out1[2] = x3;
+    out1[3] = x4;
+}
+
+/// The function toBytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
+/// Preconditions:
+///   0 ≤ eval arg1 < m
+/// Postconditions:
+///   out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
+pub fn toBytes(out1: *[32]u8, arg1: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (arg1[3]);
+    const x2 = (arg1[2]);
+    const x3 = (arg1[1]);
+    const x4 = (arg1[0]);
+    const x5 = cast(u8, (x4 & cast(u64, 0xff)));
+    const x6 = (x4 >> 8);
+    const x7 = cast(u8, (x6 & cast(u64, 0xff)));
+    const x8 = (x6 >> 8);
+    const x9 = cast(u8, (x8 & cast(u64, 0xff)));
+    const x10 = (x8 >> 8);
+    const x11 = cast(u8, (x10 & cast(u64, 0xff)));
+    const x12 = (x10 >> 8);
+    const x13 = cast(u8, (x12 & cast(u64, 0xff)));
+    const x14 = (x12 >> 8);
+    const x15 = cast(u8, (x14 & cast(u64, 0xff)));
+    const x16 = (x14 >> 8);
+    const x17 = cast(u8, (x16 & cast(u64, 0xff)));
+    const x18 = cast(u8, (x16 >> 8));
+    const x19 = cast(u8, (x3 & cast(u64, 0xff)));
+    const x20 = (x3 >> 8);
+    const x21 = cast(u8, (x20 & cast(u64, 0xff)));
+    const x22 = (x20 >> 8);
+    const x23 = cast(u8, (x22 & cast(u64, 0xff)));
+    const x24 = (x22 >> 8);
+    const x25 = cast(u8, (x24 & cast(u64, 0xff)));
+    const x26 = (x24 >> 8);
+    const x27 = cast(u8, (x26 & cast(u64, 0xff)));
+    const x28 = (x26 >> 8);
+    const x29 = cast(u8, (x28 & cast(u64, 0xff)));
+    const x30 = (x28 >> 8);
+    const x31 = cast(u8, (x30 & cast(u64, 0xff)));
+    const x32 = cast(u8, (x30 >> 8));
+    const x33 = cast(u8, (x2 & cast(u64, 0xff)));
+    const x34 = (x2 >> 8);
+    const x35 = cast(u8, (x34 & cast(u64, 0xff)));
+    const x36 = (x34 >> 8);
+    const x37 = cast(u8, (x36 & cast(u64, 0xff)));
+    const x38 = (x36 >> 8);
+    const x39 = cast(u8, (x38 & cast(u64, 0xff)));
+    const x40 = (x38 >> 8);
+    const x41 = cast(u8, (x40 & cast(u64, 0xff)));
+    const x42 = (x40 >> 8);
+    const x43 = cast(u8, (x42 & cast(u64, 0xff)));
+    const x44 = (x42 >> 8);
+    const x45 = cast(u8, (x44 & cast(u64, 0xff)));
+    const x46 = cast(u8, (x44 >> 8));
+    const x47 = cast(u8, (x1 & cast(u64, 0xff)));
+    const x48 = (x1 >> 8);
+    const x49 = cast(u8, (x48 & cast(u64, 0xff)));
+    const x50 = (x48 >> 8);
+    const x51 = cast(u8, (x50 & cast(u64, 0xff)));
+    const x52 = (x50 >> 8);
+    const x53 = cast(u8, (x52 & cast(u64, 0xff)));
+    const x54 = (x52 >> 8);
+    const x55 = cast(u8, (x54 & cast(u64, 0xff)));
+    const x56 = (x54 >> 8);
+    const x57 = cast(u8, (x56 & cast(u64, 0xff)));
+    const x58 = (x56 >> 8);
+    const x59 = cast(u8, (x58 & cast(u64, 0xff)));
+    const x60 = cast(u8, (x58 >> 8));
+    out1[0] = x5;
+    out1[1] = x7;
+    out1[2] = x9;
+    out1[3] = x11;
+    out1[4] = x13;
+    out1[5] = x15;
+    out1[6] = x17;
+    out1[7] = x18;
+    out1[8] = x19;
+    out1[9] = x21;
+    out1[10] = x23;
+    out1[11] = x25;
+    out1[12] = x27;
+    out1[13] = x29;
+    out1[14] = x31;
+    out1[15] = x32;
+    out1[16] = x33;
+    out1[17] = x35;
+    out1[18] = x37;
+    out1[19] = x39;
+    out1[20] = x41;
+    out1[21] = x43;
+    out1[22] = x45;
+    out1[23] = x46;
+    out1[24] = x47;
+    out1[25] = x49;
+    out1[26] = x51;
+    out1[27] = x53;
+    out1[28] = x55;
+    out1[29] = x57;
+    out1[30] = x59;
+    out1[31] = x60;
+}
+
+/// The function fromBytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
+/// Preconditions:
+///   0 ≤ bytes_eval arg1 < m
+/// Postconditions:
+///   eval out1 mod m = bytes_eval arg1 mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+///   arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn fromBytes(out1: *[4]u64, arg1: [32]u8) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    const x1 = (cast(u64, (arg1[31])) << 56);
+    const x2 = (cast(u64, (arg1[30])) << 48);
+    const x3 = (cast(u64, (arg1[29])) << 40);
+    const x4 = (cast(u64, (arg1[28])) << 32);
+    const x5 = (cast(u64, (arg1[27])) << 24);
+    const x6 = (cast(u64, (arg1[26])) << 16);
+    const x7 = (cast(u64, (arg1[25])) << 8);
+    const x8 = (arg1[24]);
+    const x9 = (cast(u64, (arg1[23])) << 56);
+    const x10 = (cast(u64, (arg1[22])) << 48);
+    const x11 = (cast(u64, (arg1[21])) << 40);
+    const x12 = (cast(u64, (arg1[20])) << 32);
+    const x13 = (cast(u64, (arg1[19])) << 24);
+    const x14 = (cast(u64, (arg1[18])) << 16);
+    const x15 = (cast(u64, (arg1[17])) << 8);
+    const x16 = (arg1[16]);
+    const x17 = (cast(u64, (arg1[15])) << 56);
+    const x18 = (cast(u64, (arg1[14])) << 48);
+    const x19 = (cast(u64, (arg1[13])) << 40);
+    const x20 = (cast(u64, (arg1[12])) << 32);
+    const x21 = (cast(u64, (arg1[11])) << 24);
+    const x22 = (cast(u64, (arg1[10])) << 16);
+    const x23 = (cast(u64, (arg1[9])) << 8);
+    const x24 = (arg1[8]);
+    const x25 = (cast(u64, (arg1[7])) << 56);
+    const x26 = (cast(u64, (arg1[6])) << 48);
+    const x27 = (cast(u64, (arg1[5])) << 40);
+    const x28 = (cast(u64, (arg1[4])) << 32);
+    const x29 = (cast(u64, (arg1[3])) << 24);
+    const x30 = (cast(u64, (arg1[2])) << 16);
+    const x31 = (cast(u64, (arg1[1])) << 8);
+    const x32 = (arg1[0]);
+    const x33 = (x31 + cast(u64, x32));
+    const x34 = (x30 + x33);
+    const x35 = (x29 + x34);
+    const x36 = (x28 + x35);
+    const x37 = (x27 + x36);
+    const x38 = (x26 + x37);
+    const x39 = (x25 + x38);
+    const x40 = (x23 + cast(u64, x24));
+    const x41 = (x22 + x40);
+    const x42 = (x21 + x41);
+    const x43 = (x20 + x42);
+    const x44 = (x19 + x43);
+    const x45 = (x18 + x44);
+    const x46 = (x17 + x45);
+    const x47 = (x15 + cast(u64, x16));
+    const x48 = (x14 + x47);
+    const x49 = (x13 + x48);
+    const x50 = (x12 + x49);
+    const x51 = (x11 + x50);
+    const x52 = (x10 + x51);
+    const x53 = (x9 + x52);
+    const x54 = (x7 + cast(u64, x8));
+    const x55 = (x6 + x54);
+    const x56 = (x5 + x55);
+    const x57 = (x4 + x56);
+    const x58 = (x3 + x57);
+    const x59 = (x2 + x58);
+    const x60 = (x1 + x59);
+    out1[0] = x39;
+    out1[1] = x46;
+    out1[2] = x53;
+    out1[3] = x60;
+}
+
+/// The function setOne returns the field element one in the Montgomery domain.
+/// Postconditions:
+///   eval (from_montgomery out1) mod m = 1 mod m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn setOne(out1: *[4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    out1[0] = 0xc46353d039cdaaf;
+    out1[1] = 0x4319055258e8617b;
+    out1[2] = cast(u64, 0x0);
+    out1[3] = 0xffffffff;
+}
+
+/// The function msat returns the saturated representation of the prime modulus.
+/// Postconditions:
+///   twos_complement_eval out1 = m
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn msat(out1: *[5]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    out1[0] = 0xf3b9cac2fc632551;
+    out1[1] = 0xbce6faada7179e84;
+    out1[2] = 0xffffffffffffffff;
+    out1[3] = 0xffffffff00000000;
+    out1[4] = cast(u64, 0x0);
+}
+
+/// The function divstepPrecomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form).
+/// Postconditions:
+///   eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if (log2 m) + 1 < 46 then ⌊(49 * ((log2 m) + 1) + 80) / 17⌋ else ⌊(49 * ((log2 m) + 1) + 57) / 17⌋)
+///   0 ≤ eval out1 < m
+///
+/// Input Bounds:
+/// Output Bounds:
+///   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn divstepPrecomp(out1: *[4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    out1[0] = 0xd739262fb7fcfbb5;
+    out1[1] = 0x8ac6f75d20074414;
+    out1[2] = 0xc67428bfb5e3c256;
+    out1[3] = 0x444962f2eda7aedf;
+}
+
+/// The function divstep computes a divstep.
+/// Preconditions:
+///   0 ≤ eval arg4 < m
+///   0 ≤ eval arg5 < m
+/// Postconditions:
+///   out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1)
+///   twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2)
+///   twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋)
+///   eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m)
+///   eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m)
+///   0 ≤ eval out5 < m
+///   0 ≤ eval out5 < m
+///   0 ≤ eval out2 < m
+///   0 ≤ eval out3 < m
+///
+/// Input Bounds:
+///   arg1: [0x0 ~> 0xffffffffffffffff]
+///   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   arg5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+/// Output Bounds:
+///   out1: [0x0 ~> 0xffffffffffffffff]
+///   out2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   out3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   out4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+///   out5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+pub fn divstep(out1: *u64, out2: *[5]u64, out3: *[5]u64, out4: *[4]u64, out5: *[4]u64, arg1: u64, arg2: [5]u64, arg3: [5]u64, arg4: [4]u64, arg5: [4]u64) void {
+    @setRuntimeSafety(mode == .Debug);
+
+    var x1: u64 = undefined;
+    var x2: u1 = undefined;
+    addcarryxU64(&x1, &x2, 0x0, (~arg1), cast(u64, 0x1));
+    const x3 = (cast(u1, (x1 >> 63)) & cast(u1, ((arg3[0]) & cast(u64, 0x1))));
+    var x4: u64 = undefined;
+    var x5: u1 = undefined;
+    addcarryxU64(&x4, &x5, 0x0, (~arg1), cast(u64, 0x1));
+    var x6: u64 = undefined;
+    cmovznzU64(&x6, x3, arg1, x4);
+    var x7: u64 = undefined;
+    cmovznzU64(&x7, x3, (arg2[0]), (arg3[0]));
+    var x8: u64 = undefined;
+    cmovznzU64(&x8, x3, (arg2[1]), (arg3[1]));
+    var x9: u64 = undefined;
+    cmovznzU64(&x9, x3, (arg2[2]), (arg3[2]));
+    var x10: u64 = undefined;
+    cmovznzU64(&x10, x3, (arg2[3]), (arg3[3]));
+    var x11: u64 = undefined;
+    cmovznzU64(&x11, x3, (arg2[4]), (arg3[4]));
+    var x12: u64 = undefined;
+    var x13: u1 = undefined;
+    addcarryxU64(&x12, &x13, 0x0, cast(u64, 0x1), (~(arg2[0])));
+    var x14: u64 = undefined;
+    var x15: u1 = undefined;
+    addcarryxU64(&x14, &x15, x13, cast(u64, 0x0), (~(arg2[1])));
+    var x16: u64 = undefined;
+    var x17: u1 = undefined;
+    addcarryxU64(&x16, &x17, x15, cast(u64, 0x0), (~(arg2[2])));
+    var x18: u64 = undefined;
+    var x19: u1 = undefined;
+    addcarryxU64(&x18, &x19, x17, cast(u64, 0x0), (~(arg2[3])));
+    var x20: u64 = undefined;
+    var x21: u1 = undefined;
+    addcarryxU64(&x20, &x21, x19, cast(u64, 0x0), (~(arg2[4])));
+    var x22: u64 = undefined;
+    cmovznzU64(&x22, x3, (arg3[0]), x12);
+    var x23: u64 = undefined;
+    cmovznzU64(&x23, x3, (arg3[1]), x14);
+    var x24: u64 = undefined;
+    cmovznzU64(&x24, x3, (arg3[2]), x16);
+    var x25: u64 = undefined;
+    cmovznzU64(&x25, x3, (arg3[3]), x18);
+    var x26: u64 = undefined;
+    cmovznzU64(&x26, x3, (arg3[4]), x20);
+    var x27: u64 = undefined;
+    cmovznzU64(&x27, x3, (arg4[0]), (arg5[0]));
+    var x28: u64 = undefined;
+    cmovznzU64(&x28, x3, (arg4[1]), (arg5[1]));
+    var x29: u64 = undefined;
+    cmovznzU64(&x29, x3, (arg4[2]), (arg5[2]));
+    var x30: u64 = undefined;
+    cmovznzU64(&x30, x3, (arg4[3]), (arg5[3]));
+    var x31: u64 = undefined;
+    var x32: u1 = undefined;
+    addcarryxU64(&x31, &x32, 0x0, x27, x27);
+    var x33: u64 = undefined;
+    var x34: u1 = undefined;
+    addcarryxU64(&x33, &x34, x32, x28, x28);
+    var x35: u64 = undefined;
+    var x36: u1 = undefined;
+    addcarryxU64(&x35, &x36, x34, x29, x29);
+    var x37: u64 = undefined;
+    var x38: u1 = undefined;
+    addcarryxU64(&x37, &x38, x36, x30, x30);
+    var x39: u64 = undefined;
+    var x40: u1 = undefined;
+    subborrowxU64(&x39, &x40, 0x0, x31, 0xf3b9cac2fc632551);
+    var x41: u64 = undefined;
+    var x42: u1 = undefined;
+    subborrowxU64(&x41, &x42, x40, x33, 0xbce6faada7179e84);
+    var x43: u64 = undefined;
+    var x44: u1 = undefined;
+    subborrowxU64(&x43, &x44, x42, x35, 0xffffffffffffffff);
+    var x45: u64 = undefined;
+    var x46: u1 = undefined;
+    subborrowxU64(&x45, &x46, x44, x37, 0xffffffff00000000);
+    var x47: u64 = undefined;
+    var x48: u1 = undefined;
+    subborrowxU64(&x47, &x48, x46, cast(u64, x38), cast(u64, 0x0));
+    const x49 = (arg4[3]);
+    const x50 = (arg4[2]);
+    const x51 = (arg4[1]);
+    const x52 = (arg4[0]);
+    var x53: u64 = undefined;
+    var x54: u1 = undefined;
+    subborrowxU64(&x53, &x54, 0x0, cast(u64, 0x0), x52);
+    var x55: u64 = undefined;
+    var x56: u1 = undefined;
+    subborrowxU64(&x55, &x56, x54, cast(u64, 0x0), x51);
+    var x57: u64 = undefined;
+    var x58: u1 = undefined;
+    subborrowxU64(&x57, &x58, x56, cast(u64, 0x0), x50);
+    var x59: u64 = undefined;
+    var x60: u1 = undefined;
+    subborrowxU64(&x59, &x60, x58, cast(u64, 0x0), x49);
+    var x61: u64 = undefined;
+    cmovznzU64(&x61, x60, cast(u64, 0x0), 0xffffffffffffffff);
+    var x62: u64 = undefined;
+    var x63: u1 = undefined;
+    addcarryxU64(&x62, &x63, 0x0, x53, (x61 & 0xf3b9cac2fc632551));
+    var x64: u64 = undefined;
+    var x65: u1 = undefined;
+    addcarryxU64(&x64, &x65, x63, x55, (x61 & 0xbce6faada7179e84));
+    var x66: u64 = undefined;
+    var x67: u1 = undefined;
+    addcarryxU64(&x66, &x67, x65, x57, x61);
+    var x68: u64 = undefined;
+    var x69: u1 = undefined;
+    addcarryxU64(&x68, &x69, x67, x59, (x61 & 0xffffffff00000000));
+    var x70: u64 = undefined;
+    cmovznzU64(&x70, x3, (arg5[0]), x62);
+    var x71: u64 = undefined;
+    cmovznzU64(&x71, x3, (arg5[1]), x64);
+    var x72: u64 = undefined;
+    cmovznzU64(&x72, x3, (arg5[2]), x66);
+    var x73: u64 = undefined;
+    cmovznzU64(&x73, x3, (arg5[3]), x68);
+    const x74 = cast(u1, (x22 & cast(u64, 0x1)));
+    var x75: u64 = undefined;
+    cmovznzU64(&x75, x74, cast(u64, 0x0), x7);
+    var x76: u64 = undefined;
+    cmovznzU64(&x76, x74, cast(u64, 0x0), x8);
+    var x77: u64 = undefined;
+    cmovznzU64(&x77, x74, cast(u64, 0x0), x9);
+    var x78: u64 = undefined;
+    cmovznzU64(&x78, x74, cast(u64, 0x0), x10);
+    var x79: u64 = undefined;
+    cmovznzU64(&x79, x74, cast(u64, 0x0), x11);
+    var x80: u64 = undefined;
+    var x81: u1 = undefined;
+    addcarryxU64(&x80, &x81, 0x0, x22, x75);
+    var x82: u64 = undefined;
+    var x83: u1 = undefined;
+    addcarryxU64(&x82, &x83, x81, x23, x76);
+    var x84: u64 = undefined;
+    var x85: u1 = undefined;
+    addcarryxU64(&x84, &x85, x83, x24, x77);
+    var x86: u64 = undefined;
+    var x87: u1 = undefined;
+    addcarryxU64(&x86, &x87, x85, x25, x78);
+    var x88: u64 = undefined;
+    var x89: u1 = undefined;
+    addcarryxU64(&x88, &x89, x87, x26, x79);
+    var x90: u64 = undefined;
+    cmovznzU64(&x90, x74, cast(u64, 0x0), x27);
+    var x91: u64 = undefined;
+    cmovznzU64(&x91, x74, cast(u64, 0x0), x28);
+    var x92: u64 = undefined;
+    cmovznzU64(&x92, x74, cast(u64, 0x0), x29);
+    var x93: u64 = undefined;
+    cmovznzU64(&x93, x74, cast(u64, 0x0), x30);
+    var x94: u64 = undefined;
+    var x95: u1 = undefined;
+    addcarryxU64(&x94, &x95, 0x0, x70, x90);
+    var x96: u64 = undefined;
+    var x97: u1 = undefined;
+    addcarryxU64(&x96, &x97, x95, x71, x91);
+    var x98: u64 = undefined;
+    var x99: u1 = undefined;
+    addcarryxU64(&x98, &x99, x97, x72, x92);
+    var x100: u64 = undefined;
+    var x101: u1 = undefined;
+    addcarryxU64(&x100, &x101, x99, x73, x93);
+    var x102: u64 = undefined;
+    var x103: u1 = undefined;
+    subborrowxU64(&x102, &x103, 0x0, x94, 0xf3b9cac2fc632551);
+    var x104: u64 = undefined;
+    var x105: u1 = undefined;
+    subborrowxU64(&x104, &x105, x103, x96, 0xbce6faada7179e84);
+    var x106: u64 = undefined;
+    var x107: u1 = undefined;
+    subborrowxU64(&x106, &x107, x105, x98, 0xffffffffffffffff);
+    var x108: u64 = undefined;
+    var x109: u1 = undefined;
+    subborrowxU64(&x108, &x109, x107, x100, 0xffffffff00000000);
+    var x110: u64 = undefined;
+    var x111: u1 = undefined;
+    subborrowxU64(&x110, &x111, x109, cast(u64, x101), cast(u64, 0x0));
+    var x112: u64 = undefined;
+    var x113: u1 = undefined;
+    addcarryxU64(&x112, &x113, 0x0, x6, cast(u64, 0x1));
+    const x114 = ((x80 >> 1) | ((x82 << 63) & 0xffffffffffffffff));
+    const x115 = ((x82 >> 1) | ((x84 << 63) & 0xffffffffffffffff));
+    const x116 = ((x84 >> 1) | ((x86 << 63) & 0xffffffffffffffff));
+    const x117 = ((x86 >> 1) | ((x88 << 63) & 0xffffffffffffffff));
+    const x118 = ((x88 & 0x8000000000000000) | (x88 >> 1));
+    var x119: u64 = undefined;
+    cmovznzU64(&x119, x48, x39, x31);
+    var x120: u64 = undefined;
+    cmovznzU64(&x120, x48, x41, x33);
+    var x121: u64 = undefined;
+    cmovznzU64(&x121, x48, x43, x35);
+    var x122: u64 = undefined;
+    cmovznzU64(&x122, x48, x45, x37);
+    var x123: u64 = undefined;
+    cmovznzU64(&x123, x111, x102, x94);
+    var x124: u64 = undefined;
+    cmovznzU64(&x124, x111, x104, x96);
+    var x125: u64 = undefined;
+    cmovznzU64(&x125, x111, x106, x98);
+    var x126: u64 = undefined;
+    cmovznzU64(&x126, x111, x108, x100);
+    out1.* = x112;
+    out2[0] = x7;
+    out2[1] = x8;
+    out2[2] = x9;
+    out2[3] = x10;
+    out2[4] = x11;
+    out3[0] = x114;
+    out3[1] = x115;
+    out3[2] = x116;
+    out3[3] = x117;
+    out3[4] = x118;
+    out4[0] = x119;
+    out4[1] = x120;
+    out4[2] = x121;
+    out4[3] = x122;
+    out5[0] = x123;
+    out5[1] = x124;
+    out5[2] = x125;
+    out5[3] = x126;
+}
lib/std/crypto/pcurves/p256/scalar.zig
@@ -6,32 +6,44 @@
 
 const std = @import("std");
 const builtin = std.builtin;
+const common = @import("../common.zig");
 const crypto = std.crypto;
 const debug = std.debug;
 const math = std.math;
 const mem = std.mem;
 
-const Fe = @import("field.zig").Fe;
+const Field = common.Field;
 
 const NonCanonicalError = std.crypto.errors.NonCanonicalError;
 const NotSquareError = std.crypto.errors.NotSquareError;
 
+/// Number of bytes required to encode a scalar.
+pub const encoded_length = 32;
+
 /// A compressed scalar, in canonical form.
-pub const CompressedScalar = [32]u8;
+pub const CompressedScalar = [encoded_length]u8;
+
+const Fe = Field(.{
+    .fiat = @import("p256_scalar_64.zig"),
+    .field_order = 115792089210356248762697446949407573529996955224135760342422259061068512044369,
+    .field_bits = 256,
+    .saturated_bits = 255,
+    .encoded_length = encoded_length,
+});
 
 /// Reject a scalar whose encoding is not canonical.
-pub fn rejectNonCanonical(s: CompressedScalar) NonCanonicalError!void {
-    return Fe.rejectNonCanonical(s);
+pub fn rejectNonCanonical(s: CompressedScalar, endian: builtin.Endian) NonCanonicalError!void {
+    return Fe.rejectNonCanonical(s, endian);
 }
 
 /// Reduce a 48-bytes scalar to the field size.
-pub fn reduce48(s: [48]u8) CompressedScalar {
-    return Scalar.fromBytes48(s).toBytes();
+pub fn reduce48(s: [48]u8, endian: builtin.Endian) CompressedScalar {
+    return Scalar.fromBytes48(s, endian).toBytes(endian);
 }
 
 /// Reduce a 64-bytes scalar to the field size.
-pub fn reduce64(s: [64]u8) CompressedScalar {
-    return ScalarDouble.fromBytes64(s).toBytes();
+pub fn reduce64(s: [64]u8, endian: builtin.Endian) CompressedScalar {
+    return ScalarDouble.fromBytes64(s, endian).toBytes(endian);
 }
 
 /// Return a*b (mod L)
@@ -183,19 +195,19 @@ const ScalarDouble = struct {
         }
         var t = ScalarDouble{ .x1 = undefined, .x2 = Fe.zero, .x3 = Fe.zero };
         {
-            var b = [_]u8{0} ** 32;
+            var b = [_]u8{0} ** encoded_length;
             const len = math.min(s.len, 24);
             mem.copy(u8, b[0..len], s[0..len]);
             t.x1 = Fe.fromBytes(b, .Little) catch unreachable;
         }
         if (s_.len >= 24) {
-            var b = [_]u8{0} ** 32;
+            var b = [_]u8{0} ** encoded_length;
             const len = math.min(s.len - 24, 24);
             mem.copy(u8, b[0..len], s[24..][0..len]);
             t.x2 = Fe.fromBytes(b, .Little) catch unreachable;
         }
         if (s_.len >= 48) {
-            var b = [_]u8{0} ** 32;
+            var b = [_]u8{0} ** encoded_length;
             const len = s.len - 48;
             mem.copy(u8, b[0..len], s[48..][0..len]);
             t.x3 = Fe.fromBytes(b, .Little) catch unreachable;
lib/std/crypto/pcurves/common.zig
@@ -0,0 +1,284 @@
+const std = @import("std");
+const builtin = std.builtin;
+const crypto = std.crypto;
+const debug = std.debug;
+const mem = std.mem;
+const meta = std.meta;
+
+const NonCanonicalError = crypto.errors.NonCanonicalError;
+const NotSquareError = crypto.errors.NotSquareError;
+
+/// Parameters to create a finite field type.
+pub const FieldParams = struct {
+    fiat: type,
+    field_order: comptime_int,
+    field_bits: comptime_int,
+    saturated_bits: comptime_int,
+    encoded_length: comptime_int,
+};
+
+/// A field element, internally stored in Montgomery domain.
+pub fn Field(comptime params: FieldParams) type {
+    const fiat = params.fiat;
+    const Limbs = fiat.Limbs;
+
+    return struct {
+        const Fe = @This();
+
+        limbs: Limbs,
+
+        /// Field size.
+        pub const field_order = params.field_order;
+
+        /// Number of bits to represent the set of all elements.
+        pub const field_bits = params.field_bits;
+
+        /// Number of bits that can be saturated without overflowing.
+        pub const saturated_bits = params.saturated_bits;
+
+        /// Number of bytes required to encode an element.
+        pub const encoded_length = params.encoded_length;
+
+        /// Zero.
+        pub const zero: Fe = Fe{ .limbs = mem.zeroes(Limbs) };
+
+        /// One.
+        pub const one = comptime one: {
+            var fe: Fe = undefined;
+            fiat.setOne(&fe.limbs);
+            break :one fe;
+        };
+
+        /// Reject non-canonical encodings of an element.
+        pub fn rejectNonCanonical(s_: [encoded_length]u8, endian: builtin.Endian) NonCanonicalError!void {
+            var s = if (endian == .Little) s_ else orderSwap(s_);
+            const field_order_s = comptime fos: {
+                var fos: [encoded_length]u8 = undefined;
+                mem.writeIntLittle(std.meta.Int(.unsigned, encoded_length * 8), &fos, field_order);
+                break :fos fos;
+            };
+            if (crypto.utils.timingSafeCompare(u8, &s, &field_order_s, .Little) != .lt) {
+                return error.NonCanonical;
+            }
+        }
+
+        /// Swap the endianness of an encoded element.
+        pub fn orderSwap(s: [encoded_length]u8) [encoded_length]u8 {
+            var t = s;
+            for (s) |x, i| t[t.len - 1 - i] = x;
+            return t;
+        }
+
+        /// Unpack a field element.
+        pub fn fromBytes(s_: [encoded_length]u8, endian: builtin.Endian) NonCanonicalError!Fe {
+            var s = if (endian == .Little) s_ else orderSwap(s_);
+            try rejectNonCanonical(s, .Little);
+            var limbs_z: Limbs = undefined;
+            fiat.fromBytes(&limbs_z, s);
+            var limbs: Limbs = undefined;
+            fiat.toMontgomery(&limbs, limbs_z);
+            return Fe{ .limbs = limbs };
+        }
+
+        /// Pack a field element.
+        pub fn toBytes(fe: Fe, endian: builtin.Endian) [encoded_length]u8 {
+            var limbs_z: Limbs = undefined;
+            fiat.fromMontgomery(&limbs_z, fe.limbs);
+            var s: [encoded_length]u8 = undefined;
+            fiat.toBytes(&s, limbs_z);
+            return if (endian == .Little) s else orderSwap(s);
+        }
+
+        /// Element as an integer.
+        pub const IntRepr = meta.Int(.unsigned, params.field_bits);
+
+        /// Create a field element from an integer.
+        pub fn fromInt(comptime x: IntRepr) NonCanonicalError!Fe {
+            var s: [encoded_length]u8 = undefined;
+            mem.writeIntLittle(IntRepr, &s, x);
+            return fromBytes(s, .Little);
+        }
+
+        /// Return the field element as an integer.
+        pub fn toInt(fe: Fe) IntRepr {
+            const s = fe.toBytes(.Little);
+            return mem.readIntLittle(IntRepr, &s);
+        }
+
+        /// Return true if the field element is zero.
+        pub fn isZero(fe: Fe) bool {
+            var z: @TypeOf(fe.limbs[0]) = undefined;
+            fiat.nonzero(&z, fe.limbs);
+            return z == 0;
+        }
+
+        /// Return true if both field elements are equivalent.
+        pub fn equivalent(a: Fe, b: Fe) bool {
+            return a.sub(b).isZero();
+        }
+
+        /// Return true if the element is odd.
+        pub fn isOdd(fe: Fe) bool {
+            const s = fe.toBytes(.Little);
+            return @truncate(u1, s[0]) != 0;
+        }
+
+        /// Conditonally replace a field element with `a` if `c` is positive.
+        pub fn cMov(fe: *Fe, a: Fe, c: u1) void {
+            fiat.selectznz(&fe.limbs, c, fe.limbs, a.limbs);
+        }
+
+        /// Add field elements.
+        pub fn add(a: Fe, b: Fe) Fe {
+            var fe: Fe = undefined;
+            fiat.add(&fe.limbs, a.limbs, b.limbs);
+            return fe;
+        }
+
+        /// Subtract field elements.
+        pub fn sub(a: Fe, b: Fe) Fe {
+            var fe: Fe = undefined;
+            fiat.sub(&fe.limbs, a.limbs, b.limbs);
+            return fe;
+        }
+
+        /// Double a field element.
+        pub fn dbl(a: Fe) Fe {
+            var fe: Fe = undefined;
+            fiat.add(&fe.limbs, a.limbs, a.limbs);
+            return fe;
+        }
+
+        /// Multiply field elements.
+        pub fn mul(a: Fe, b: Fe) Fe {
+            var fe: Fe = undefined;
+            fiat.mul(&fe.limbs, a.limbs, b.limbs);
+            return fe;
+        }
+
+        /// Square a field element.
+        pub fn sq(a: Fe) Fe {
+            var fe: Fe = undefined;
+            fiat.square(&fe.limbs, a.limbs);
+            return fe;
+        }
+
+        /// Square a field element n times.
+        fn sqn(a: Fe, comptime n: comptime_int) Fe {
+            var i: usize = 0;
+            var fe = a;
+            while (i < n) : (i += 1) {
+                fe = fe.sq();
+            }
+            return fe;
+        }
+
+        /// Compute a^n.
+        pub fn pow(a: Fe, comptime T: type, comptime n: T) Fe {
+            var fe = one;
+            var x: T = n;
+            var t = a;
+            while (true) {
+                if (@truncate(u1, x) != 0) fe = fe.mul(t);
+                x >>= 1;
+                if (x == 0) break;
+                t = t.sq();
+            }
+            return fe;
+        }
+
+        /// Negate a field element.
+        pub fn neg(a: Fe) Fe {
+            var fe: Fe = undefined;
+            fiat.opp(&fe.limbs, a.limbs);
+            return fe;
+        }
+
+        /// Return the inverse of a field element, or 0 if a=0.
+        // Field inversion from https://eprint.iacr.org/2021/549.pdf
+        pub fn invert(a: Fe) Fe {
+            const iterations = (49 * field_bits + 57) / 17;
+            const Word = @TypeOf(a.limbs[0]);
+            const XLimbs = [a.limbs.len + 1]Word;
+
+            var d: Word = 1;
+            var f: XLimbs = undefined;
+            fiat.msat(&f);
+
+            var g: XLimbs = undefined;
+            fiat.fromMontgomery(g[0..a.limbs.len], a.limbs);
+            g[g.len - 1] = 0;
+
+            var r: Limbs = undefined;
+            fiat.setOne(&r);
+            var v = mem.zeroes(Limbs);
+
+            var precomp: Limbs = undefined;
+            fiat.divstepPrecomp(&precomp);
+
+            var out1: Word = undefined;
+            var out2: XLimbs = undefined;
+            var out3: XLimbs = undefined;
+            var out4: Limbs = undefined;
+            var out5: Limbs = undefined;
+
+            var i: usize = 0;
+            while (i < iterations - iterations % 2) : (i += 2) {
+                fiat.divstep(&out1, &out2, &out3, &out4, &out5, d, f, g, v, r);
+                fiat.divstep(&d, &f, &g, &v, &r, out1, out2, out3, out4, out5);
+            }
+            if (iterations % 2 != 0) {
+                fiat.divstep(&out1, &out2, &out3, &out4, &out5, d, f, g, v, r);
+                mem.copy(Word, &v, &out4);
+                mem.copy(Word, &f, &out2);
+            }
+            var v_opp: Limbs = undefined;
+            fiat.opp(&v_opp, v);
+            fiat.selectznz(&v, @truncate(u1, f[f.len - 1] >> (meta.bitCount(Word) - 1)), v, v_opp);
+            var fe: Fe = undefined;
+            fiat.mul(&fe.limbs, v, precomp);
+            return fe;
+        }
+
+        /// Return true if the field element is a square.
+        pub fn isSquare(x2: Fe) bool {
+            if (field_order == 115792089210356248762697446949407573530086143415290314195533631308867097853951) {
+                const t110 = x2.mul(x2.sq()).sq();
+                const t111 = x2.mul(t110);
+                const t111111 = t111.mul(x2.mul(t110).sqn(3));
+                const x15 = t111111.sqn(6).mul(t111111).sqn(3).mul(t111);
+                const x16 = x15.sq().mul(x2);
+                const x53 = x16.sqn(16).mul(x16).sqn(15);
+                const x47 = x15.mul(x53);
+                const ls = x47.mul(((x53.sqn(17).mul(x2)).sqn(143).mul(x47)).sqn(47)).sq().mul(x2);
+                return ls.equivalent(Fe.one);
+            } else {
+                const ls = x2.pow(std.meta.Int(.unsigned, field_bits), (field_order - 1) / 2); // Legendre symbol
+                return ls.equivalent(Fe.one);
+            }
+        }
+
+        // x=x2^((field_order+1)/4) w/ field order=3 (mod 4).
+        fn uncheckedSqrt(x2: Fe) Fe {
+            comptime debug.assert(field_order % 4 == 3);
+            if (field_order == 115792089210356248762697446949407573530086143415290314195533631308867097853951) {
+                const t11 = x2.mul(x2.sq());
+                const t1111 = t11.mul(t11.sqn(2));
+                const t11111111 = t1111.mul(t1111.sqn(4));
+                const x16 = t11111111.sqn(8).mul(t11111111);
+                return x16.sqn(16).mul(x16).sqn(32).mul(x2).sqn(96).mul(x2).sqn(94);
+            } else {
+                return x2.pow(std.meta.Int(.unsigned, field_bits), (field_order + 1) / 4);
+            }
+        }
+
+        /// Compute the square root of `x2`, returning `error.NotSquare` if `x2` was not a square.
+        pub fn sqrt(x2: Fe) NotSquareError!Fe {
+            const x = x2.uncheckedSqrt();
+            if (x.sq().equivalent(x2)) {
+                return x;
+            }
+            return error.NotSquare;
+        }
+    };
+}