Commit a0b35249a2

Frank Denis <124872+jedisct1@users.noreply.github.com>
2023-07-18 00:40:31
Replace hand-written endian-specific loads with std.mem.readInt*() (#16431)
And when we have the choice, favor little-endian because it's 2023. Gives a slight performance improvement: md5: 552 -> 555 MiB/s sha1: 768 -> 786 MiB/s sha512: 211 -> 217 MiB/s
1 parent a86f589
Changed files (4)
lib/std/crypto/aes/soft.zig
@@ -122,14 +122,14 @@ pub const Block = struct {
 
         // Last round uses s-box directly and XORs to produce output.
         var x: [4]u8 = undefined;
-        x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s3 >> 24)), @as(u8, @truncate(s2 >> 16)), @as(u8, @truncate(s1 >> 8)), @as(u8, @truncate(s0)));
-        var t0 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
-        x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s0 >> 24)), @as(u8, @truncate(s3 >> 16)), @as(u8, @truncate(s2 >> 8)), @as(u8, @truncate(s1)));
-        var t1 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
-        x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s1 >> 24)), @as(u8, @truncate(s0 >> 16)), @as(u8, @truncate(s3 >> 8)), @as(u8, @truncate(s2)));
-        var t2 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
-        x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s2 >> 24)), @as(u8, @truncate(s1 >> 16)), @as(u8, @truncate(s0 >> 8)), @as(u8, @truncate(s3)));
-        var t3 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
+        x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s0)), @as(u8, @truncate(s1 >> 8)), @as(u8, @truncate(s2 >> 16)), @as(u8, @truncate(s3 >> 24)));
+        var t0 = mem.readIntLittle(u32, &x);
+        x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s1)), @as(u8, @truncate(s2 >> 8)), @as(u8, @truncate(s3 >> 16)), @as(u8, @truncate(s0 >> 24)));
+        var t1 = mem.readIntLittle(u32, &x);
+        x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s2)), @as(u8, @truncate(s3 >> 8)), @as(u8, @truncate(s0 >> 16)), @as(u8, @truncate(s1 >> 24)));
+        var t2 = mem.readIntLittle(u32, &x);
+        x = sbox_lookup(&sbox_encrypt, @as(u8, @truncate(s3)), @as(u8, @truncate(s0 >> 8)), @as(u8, @truncate(s1 >> 16)), @as(u8, @truncate(s2 >> 24)));
+        var t3 = mem.readIntLittle(u32, &x);
 
         t0 ^= round_key.repr[0];
         t1 ^= round_key.repr[1];
@@ -218,14 +218,14 @@ pub const Block = struct {
 
         // Last round uses s-box directly and XORs to produce output.
         var x: [4]u8 = undefined;
-        x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s1 >> 24)), @as(u8, @truncate(s2 >> 16)), @as(u8, @truncate(s3 >> 8)), @as(u8, @truncate(s0)));
-        var t0 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
-        x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s2 >> 24)), @as(u8, @truncate(s3 >> 16)), @as(u8, @truncate(s0 >> 8)), @as(u8, @truncate(s1)));
-        var t1 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
-        x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s3 >> 24)), @as(u8, @truncate(s0 >> 16)), @as(u8, @truncate(s1 >> 8)), @as(u8, @truncate(s2)));
-        var t2 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
-        x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s0 >> 24)), @as(u8, @truncate(s1 >> 16)), @as(u8, @truncate(s2 >> 8)), @as(u8, @truncate(s3)));
-        var t3 = @as(u32, x[0]) << 24 | @as(u32, x[1]) << 16 | @as(u32, x[2]) << 8 | @as(u32, x[3]);
+        x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s0)), @as(u8, @truncate(s3 >> 8)), @as(u8, @truncate(s2 >> 16)), @as(u8, @truncate(s1 >> 24)));
+        var t0 = mem.readIntLittle(u32, &x);
+        x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s1)), @as(u8, @truncate(s0 >> 8)), @as(u8, @truncate(s3 >> 16)), @as(u8, @truncate(s2 >> 24)));
+        var t1 = mem.readIntLittle(u32, &x);
+        x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s2)), @as(u8, @truncate(s1 >> 8)), @as(u8, @truncate(s0 >> 16)), @as(u8, @truncate(s3 >> 24)));
+        var t2 = mem.readIntLittle(u32, &x);
+        x = sbox_lookup(&sbox_decrypt, @as(u8, @truncate(s3)), @as(u8, @truncate(s2 >> 8)), @as(u8, @truncate(s1 >> 16)), @as(u8, @truncate(s0 >> 24)));
+        var t3 = mem.readIntLittle(u32, &x);
 
         t0 ^= round_key.repr[0];
         t1 ^= round_key.repr[1];
@@ -349,7 +349,7 @@ fn KeySchedule(comptime Aes: type) type {
                 // Apply sbox_encrypt to each byte in w.
                 fn func(w: u32) u32 {
                     const x = sbox_lookup(&sbox_key_schedule, @as(u8, @truncate(w)), @as(u8, @truncate(w >> 8)), @as(u8, @truncate(w >> 16)), @as(u8, @truncate(w >> 24)));
-                    return @as(u32, x[3]) << 24 | @as(u32, x[2]) << 16 | @as(u32, x[1]) << 8 | @as(u32, x[0]);
+                    return mem.readIntLittle(u32, &x);
                 }
             }.func;
 
lib/std/crypto/md5.zig
@@ -121,12 +121,7 @@ pub const Md5 = struct {
 
         var i: usize = 0;
         while (i < 16) : (i += 1) {
-            // NOTE: Performing or's separately improves perf by ~10%
-            s[i] = 0;
-            s[i] |= @as(u32, b[i * 4 + 0]);
-            s[i] |= @as(u32, b[i * 4 + 1]) << 8;
-            s[i] |= @as(u32, b[i * 4 + 2]) << 16;
-            s[i] |= @as(u32, b[i * 4 + 3]) << 24;
+            s[i] = mem.readIntLittle(u32, b[i * 4 ..][0..4]);
         }
 
         var v: [4]u32 = [_]u32{
lib/std/crypto/sha1.zig
@@ -151,7 +151,7 @@ pub const Sha1 = struct {
             roundParam(0, 1, 2, 3, 4, 15),
         };
         inline for (round0a) |r| {
-            s[r.i] = (@as(u32, b[r.i * 4 + 0]) << 24) | (@as(u32, b[r.i * 4 + 1]) << 16) | (@as(u32, b[r.i * 4 + 2]) << 8) | (@as(u32, b[r.i * 4 + 3]) << 0);
+            s[r.i] = mem.readIntBig(u32, b[r.i * 4 ..][0..4]);
 
             v[r.e] = v[r.e] +% math.rotl(u32, v[r.a], @as(u32, 5)) +% 0x5A827999 +% s[r.i & 0xf] +% ((v[r.b] & v[r.c]) | (~v[r.b] & v[r.d]));
             v[r.b] = math.rotl(u32, v[r.b], @as(u32, 30));
lib/std/crypto/sha2.zig
@@ -678,15 +678,7 @@ fn Sha2x64(comptime params: Sha2Params64) type {
 
             var i: usize = 0;
             while (i < 16) : (i += 1) {
-                s[i] = 0;
-                s[i] |= @as(u64, b[i * 8 + 0]) << 56;
-                s[i] |= @as(u64, b[i * 8 + 1]) << 48;
-                s[i] |= @as(u64, b[i * 8 + 2]) << 40;
-                s[i] |= @as(u64, b[i * 8 + 3]) << 32;
-                s[i] |= @as(u64, b[i * 8 + 4]) << 24;
-                s[i] |= @as(u64, b[i * 8 + 5]) << 16;
-                s[i] |= @as(u64, b[i * 8 + 6]) << 8;
-                s[i] |= @as(u64, b[i * 8 + 7]) << 0;
+                s[i] = mem.readIntBig(u64, b[i * 8 ..][0..8]);
             }
             while (i < 80) : (i += 1) {
                 s[i] = s[i - 16] +% s[i - 7] +%