Commit `59ac0d1eed`

Carl Åstholm <carl@astholm.se>

2023-12-19 22:21:03

Deprecate `suggestVectorSize` in favor of `suggestVectorLength`

The function returns the vector length, not the byte size of the vector or the bit size of individual elements. This distinction is very important and some usages of this function in the stdlib operated under these incorrect assumptions.

master

1 parent 781c3a9

Changed files (5)

lib

std

crypto

http

@@ -158,11 +158,7 @@ fn Hash(comptime endian: std.builtin.Endian, comptime shift_key: bool) type {
         /// clmulSoft128_64 is faster on platforms with no native 128-bit registers.
         const clmulSoft = switch (builtin.cpu.arch) {
             .wasm32, .wasm64 => clmulSoft128_64,
-            else => impl: {
-                const vector_size = std.simd.suggestVectorSize(u128) orelse 0;
-                if (vector_size < 128) break :impl clmulSoft128_64;
-                break :impl clmulSoft128;
-            },
+            else => if (std.simd.suggestVectorLength(u128) != null) clmulSoft128 else clmulSoft128_64,
         };
 
         // Software carryless multiplication of two 64-bit integers using native 128-bit registers.

@@ -84,7 +84,7 @@ pub const HeadersParser = struct {
     /// If the amount returned is less than `bytes.len`, you may assume that the parser is in a content state and the
     /// first byte of content is located at `bytes[result]`.
     pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 {
-        const vector_len: comptime_int = @max(std.simd.suggestVectorSize(u8) orelse 1, 8);
+        const vector_len: comptime_int = @max(std.simd.suggestVectorLength(u8) orelse 1, 8);
         const len: u32 = @intCast(bytes.len);
         var index: u32 = 0;

@@ -966,7 +966,7 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co
             // The below branch assumes that reading past the end of the buffer is valid, as long
             // as we don't read into a new page. This should be the case for most architectures
             // which use paged memory, however should be confirmed before adding a new arch below.
-            .aarch64, .x86, .x86_64 => if (std.simd.suggestVectorSize(T)) |block_len| {
+            .aarch64, .x86, .x86_64 => if (std.simd.suggestVectorLength(T)) |block_len| {
                 const Block = @Vector(block_len, T);
                 const mask: Block = @splat(sentinel);
 
@@ -1020,7 +1020,7 @@ test "indexOfSentinel vector paths" {
     const allocator = std.testing.allocator;
 
     inline for (Types) |T| {
-        const block_len = std.simd.suggestVectorSize(T) orelse continue;
+        const block_len = std.simd.suggestVectorLength(T) orelse continue;
 
         // Allocate three pages so we guarantee a page-crossing address with a full page after
         const memory = try allocator.alloc(T, 3 * std.mem.page_size / @sizeOf(T));
@@ -1111,11 +1111,11 @@ pub fn indexOfScalarPos(comptime T: type, slice: []const T, start_index: usize,
         !@inComptime() and
         (@typeInfo(T) == .Int or @typeInfo(T) == .Float) and std.math.isPowerOfTwo(@bitSizeOf(T)))
     {
-        if (std.simd.suggestVectorSize(T)) |block_len| {
+        if (std.simd.suggestVectorLength(T)) |block_len| {
             // For Intel Nehalem (2009) and AMD Bulldozer (2012) or later, unaligned loads on aligned data result
             // in the same execution as aligned loads. We ignore older arch's here and don't bother pre-aligning.
             //
-            // Use `std.simd.suggestVectorSize(T)` to get the same alignment as used in this function
+            // Use `std.simd.suggestVectorLength(T)` to get the same alignment as used in this function
             // however this usually isn't necessary unless your arch has a performance penalty due to this.
             //
             // This may differ for other arch's. Arm for example costs a cycle when loading across a cache

@@ -6,7 +6,9 @@
 const std = @import("std");
 const builtin = @import("builtin");
 
-pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?comptime_int {
+pub const suggestVectorSizeForCpu = @compileError("deprecated; use 'suggestVectorLengthForCpu'");
+
+pub fn suggestVectorLengthForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?comptime_int {
     // This is guesswork, if you have better suggestions can add it or edit the current here
     // This can run in comptime only, but stage 1 fails at it, stage 2 can understand it
     const element_bit_size = @max(8, std.math.ceilPowerOfTwo(u16, @bitSizeOf(T)) catch unreachable);
@@ -53,24 +55,26 @@ pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?
     return @divExact(vector_bit_size, element_bit_size);
 }
 
-/// Suggests a target-dependant vector size for a given type, or null if scalars are recommended.
+pub const suggestVectorSize = @compileError("deprecated; use 'suggestVectorLength'");
+
+/// Suggests a target-dependant vector length for a given type, or null if scalars are recommended.
 /// Not yet implemented for every CPU architecture.
-pub fn suggestVectorSize(comptime T: type) ?comptime_int {
-    return suggestVectorSizeForCpu(T, builtin.cpu);
+pub fn suggestVectorLength(comptime T: type) ?comptime_int {
+    return suggestVectorLengthForCpu(T, builtin.cpu);
 }
 
-test "suggestVectorSizeForCpu works with signed and unsigned values" {
+test "suggestVectorLengthForCpu works with signed and unsigned values" {
     comptime var cpu = std.Target.Cpu.baseline(std.Target.Cpu.Arch.x86_64);
     comptime cpu.features.addFeature(@intFromEnum(std.Target.x86.Feature.avx512f));
     comptime cpu.features.populateDependencies(&std.Target.x86.all_features);
-    const expected_size: usize = switch (builtin.zig_backend) {
+    const expected_len: usize = switch (builtin.zig_backend) {
         .stage2_x86_64 => 8,
         else => 16,
     };
-    const signed_integer_size = suggestVectorSizeForCpu(i32, cpu).?;
-    const unsigned_integer_size = suggestVectorSizeForCpu(u32, cpu).?;
-    try std.testing.expectEqual(expected_size, unsigned_integer_size);
-    try std.testing.expectEqual(expected_size, signed_integer_size);
+    const signed_integer_len = suggestVectorLengthForCpu(i32, cpu).?;
+    const unsigned_integer_len = suggestVectorLengthForCpu(u32, cpu).?;
+    try std.testing.expectEqual(expected_len, unsigned_integer_len);
+    try std.testing.expectEqual(expected_len, signed_integer_len);
 }
 
 fn vectorLength(comptime VectorType: type) comptime_int {
@@ -232,7 +236,7 @@ test "vector patterns" {
     }
 }
 
-/// Joins two vectors, shifts them leftwards (towards lower indices) and extracts the leftmost elements into a vector the size of a and b.
+/// Joins two vectors, shifts them leftwards (towards lower indices) and extracts the leftmost elements into a vector the length of a and b.
 pub fn mergeShift(a: anytype, b: anytype, comptime shift: VectorCount(@TypeOf(a, b))) @TypeOf(a, b) {
     const len = vectorLength(@TypeOf(a, b));
 
@@ -240,7 +244,7 @@ pub fn mergeShift(a: anytype, b: anytype, comptime shift: VectorCount(@TypeOf(a,
 }
 
 /// Elements are shifted rightwards (towards higher indices). New elements are added to the left, and the rightmost elements are cut off
-/// so that the size of the vector stays the same.
+/// so that the length of the vector stays the same.
 pub fn shiftElementsRight(vec: anytype, comptime amount: VectorCount(@TypeOf(vec)), shift_in: std.meta.Child(@TypeOf(vec))) @TypeOf(vec) {
     // It may be possible to implement shifts and rotates with a runtime-friendly slice of two joined vectors, as the length of the
     // slice would be comptime-known. This would permit vector shifts and rotates by a non-comptime-known amount.

@@ -202,7 +202,7 @@ pub fn utf8CountCodepoints(s: []const u8) !usize {
 pub fn utf8ValidateSlice(input: []const u8) bool {
     var remaining = input;
 
-    const chunk_len = std.simd.suggestVectorSize(u8) orelse 1;
+    const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
     const Chunk = @Vector(chunk_len, u8);
 
     // Fast path. Check for and skip ASCII characters at the start of the input.
@@ -758,7 +758,7 @@ pub fn utf16leToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) ![]u8
 
     var remaining = utf16le;
     if (builtin.zig_backend != .stage2_x86_64) {
-        const chunk_len = std.simd.suggestVectorSize(u16) orelse 1;
+        const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
         const Chunk = @Vector(chunk_len, u16);
 
         // Fast path. Check for and encode ASCII characters at the start of the input.
@@ -801,7 +801,7 @@ pub fn utf16leToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0]
 
     var remaining = utf16le;
     if (builtin.zig_backend != .stage2_x86_64) {
-        const chunk_len = std.simd.suggestVectorSize(u16) orelse 1;
+        const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
         const Chunk = @Vector(chunk_len, u16);
 
         // Fast path. Check for and encode ASCII characters at the start of the input.
@@ -842,7 +842,7 @@ pub fn utf16leToUtf8(utf8: []u8, utf16le: []const u16) !usize {
 
     var remaining = utf16le;
     if (builtin.zig_backend != .stage2_x86_64) {
-        const chunk_len = std.simd.suggestVectorSize(u16) orelse 1;
+        const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
         const Chunk = @Vector(chunk_len, u16);
 
         // Fast path. Check for and encode ASCII characters at the start of the input.
@@ -941,7 +941,7 @@ pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u1
     var remaining = utf8;
     // Need support for std.simd.interlace
     if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) {
-        const chunk_len = std.simd.suggestVectorSize(u8) orelse 1;
+        const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
         const Chunk = @Vector(chunk_len, u8);
 
         // Fast path. Check for and encode ASCII characters at the start of the input.
@@ -986,7 +986,7 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {
     var remaining = utf8;
     // Need support for std.simd.interlace
     if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) {
-        const chunk_len = std.simd.suggestVectorSize(u8) orelse 1;
+        const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
         const Chunk = @Vector(chunk_len, u8);
 
         // Fast path. Check for and encode ASCII characters at the start of the input.

Commit 59ac0d1eed

Commit `59ac0d1eed`