Commit 59ac0d1eed
Changed files (5)
lib
std
lib/std/crypto/ghash_polyval.zig
@@ -158,11 +158,7 @@ fn Hash(comptime endian: std.builtin.Endian, comptime shift_key: bool) type {
/// clmulSoft128_64 is faster on platforms with no native 128-bit registers.
const clmulSoft = switch (builtin.cpu.arch) {
.wasm32, .wasm64 => clmulSoft128_64,
- else => impl: {
- const vector_size = std.simd.suggestVectorSize(u128) orelse 0;
- if (vector_size < 128) break :impl clmulSoft128_64;
- break :impl clmulSoft128;
- },
+ else => if (std.simd.suggestVectorLength(u128) != null) clmulSoft128 else clmulSoft128_64,
};
// Software carryless multiplication of two 64-bit integers using native 128-bit registers.
lib/std/http/protocol.zig
@@ -84,7 +84,7 @@ pub const HeadersParser = struct {
/// If the amount returned is less than `bytes.len`, you may assume that the parser is in a content state and the
/// first byte of content is located at `bytes[result]`.
pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 {
- const vector_len: comptime_int = @max(std.simd.suggestVectorSize(u8) orelse 1, 8);
+ const vector_len: comptime_int = @max(std.simd.suggestVectorLength(u8) orelse 1, 8);
const len: u32 = @intCast(bytes.len);
var index: u32 = 0;
lib/std/mem.zig
@@ -966,7 +966,7 @@ pub fn indexOfSentinel(comptime T: type, comptime sentinel: T, p: [*:sentinel]co
// The below branch assumes that reading past the end of the buffer is valid, as long
// as we don't read into a new page. This should be the case for most architectures
// which use paged memory, however should be confirmed before adding a new arch below.
- .aarch64, .x86, .x86_64 => if (std.simd.suggestVectorSize(T)) |block_len| {
+ .aarch64, .x86, .x86_64 => if (std.simd.suggestVectorLength(T)) |block_len| {
const Block = @Vector(block_len, T);
const mask: Block = @splat(sentinel);
@@ -1020,7 +1020,7 @@ test "indexOfSentinel vector paths" {
const allocator = std.testing.allocator;
inline for (Types) |T| {
- const block_len = std.simd.suggestVectorSize(T) orelse continue;
+ const block_len = std.simd.suggestVectorLength(T) orelse continue;
// Allocate three pages so we guarantee a page-crossing address with a full page after
const memory = try allocator.alloc(T, 3 * std.mem.page_size / @sizeOf(T));
@@ -1111,11 +1111,11 @@ pub fn indexOfScalarPos(comptime T: type, slice: []const T, start_index: usize,
!@inComptime() and
(@typeInfo(T) == .Int or @typeInfo(T) == .Float) and std.math.isPowerOfTwo(@bitSizeOf(T)))
{
- if (std.simd.suggestVectorSize(T)) |block_len| {
+ if (std.simd.suggestVectorLength(T)) |block_len| {
// For Intel Nehalem (2009) and AMD Bulldozer (2012) or later, unaligned loads on aligned data result
// in the same execution as aligned loads. We ignore older arch's here and don't bother pre-aligning.
//
- // Use `std.simd.suggestVectorSize(T)` to get the same alignment as used in this function
+ // Use `std.simd.suggestVectorLength(T)` to get the same alignment as used in this function
// however this usually isn't necessary unless your arch has a performance penalty due to this.
//
// This may differ for other arch's. Arm for example costs a cycle when loading across a cache
lib/std/simd.zig
@@ -6,7 +6,9 @@
const std = @import("std");
const builtin = @import("builtin");
-pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?comptime_int {
+pub const suggestVectorSizeForCpu = @compileError("deprecated; use 'suggestVectorLengthForCpu'");
+
+pub fn suggestVectorLengthForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?comptime_int {
// This is guesswork, if you have better suggestions can add it or edit the current here
// This can run in comptime only, but stage 1 fails at it, stage 2 can understand it
const element_bit_size = @max(8, std.math.ceilPowerOfTwo(u16, @bitSizeOf(T)) catch unreachable);
@@ -53,24 +55,26 @@ pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?
return @divExact(vector_bit_size, element_bit_size);
}
-/// Suggests a target-dependant vector size for a given type, or null if scalars are recommended.
+pub const suggestVectorSize = @compileError("deprecated; use 'suggestVectorLength'");
+
+/// Suggests a target-dependant vector length for a given type, or null if scalars are recommended.
/// Not yet implemented for every CPU architecture.
-pub fn suggestVectorSize(comptime T: type) ?comptime_int {
- return suggestVectorSizeForCpu(T, builtin.cpu);
+pub fn suggestVectorLength(comptime T: type) ?comptime_int {
+ return suggestVectorLengthForCpu(T, builtin.cpu);
}
-test "suggestVectorSizeForCpu works with signed and unsigned values" {
+test "suggestVectorLengthForCpu works with signed and unsigned values" {
comptime var cpu = std.Target.Cpu.baseline(std.Target.Cpu.Arch.x86_64);
comptime cpu.features.addFeature(@intFromEnum(std.Target.x86.Feature.avx512f));
comptime cpu.features.populateDependencies(&std.Target.x86.all_features);
- const expected_size: usize = switch (builtin.zig_backend) {
+ const expected_len: usize = switch (builtin.zig_backend) {
.stage2_x86_64 => 8,
else => 16,
};
- const signed_integer_size = suggestVectorSizeForCpu(i32, cpu).?;
- const unsigned_integer_size = suggestVectorSizeForCpu(u32, cpu).?;
- try std.testing.expectEqual(expected_size, unsigned_integer_size);
- try std.testing.expectEqual(expected_size, signed_integer_size);
+ const signed_integer_len = suggestVectorLengthForCpu(i32, cpu).?;
+ const unsigned_integer_len = suggestVectorLengthForCpu(u32, cpu).?;
+ try std.testing.expectEqual(expected_len, unsigned_integer_len);
+ try std.testing.expectEqual(expected_len, signed_integer_len);
}
fn vectorLength(comptime VectorType: type) comptime_int {
@@ -232,7 +236,7 @@ test "vector patterns" {
}
}
-/// Joins two vectors, shifts them leftwards (towards lower indices) and extracts the leftmost elements into a vector the size of a and b.
+/// Joins two vectors, shifts them leftwards (towards lower indices) and extracts the leftmost elements into a vector the length of a and b.
pub fn mergeShift(a: anytype, b: anytype, comptime shift: VectorCount(@TypeOf(a, b))) @TypeOf(a, b) {
const len = vectorLength(@TypeOf(a, b));
@@ -240,7 +244,7 @@ pub fn mergeShift(a: anytype, b: anytype, comptime shift: VectorCount(@TypeOf(a,
}
/// Elements are shifted rightwards (towards higher indices). New elements are added to the left, and the rightmost elements are cut off
-/// so that the size of the vector stays the same.
+/// so that the length of the vector stays the same.
pub fn shiftElementsRight(vec: anytype, comptime amount: VectorCount(@TypeOf(vec)), shift_in: std.meta.Child(@TypeOf(vec))) @TypeOf(vec) {
// It may be possible to implement shifts and rotates with a runtime-friendly slice of two joined vectors, as the length of the
// slice would be comptime-known. This would permit vector shifts and rotates by a non-comptime-known amount.
lib/std/unicode.zig
@@ -202,7 +202,7 @@ pub fn utf8CountCodepoints(s: []const u8) !usize {
pub fn utf8ValidateSlice(input: []const u8) bool {
var remaining = input;
- const chunk_len = std.simd.suggestVectorSize(u8) orelse 1;
+ const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
const Chunk = @Vector(chunk_len, u8);
// Fast path. Check for and skip ASCII characters at the start of the input.
@@ -758,7 +758,7 @@ pub fn utf16leToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) ![]u8
var remaining = utf16le;
if (builtin.zig_backend != .stage2_x86_64) {
- const chunk_len = std.simd.suggestVectorSize(u16) orelse 1;
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
const Chunk = @Vector(chunk_len, u16);
// Fast path. Check for and encode ASCII characters at the start of the input.
@@ -801,7 +801,7 @@ pub fn utf16leToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0]
var remaining = utf16le;
if (builtin.zig_backend != .stage2_x86_64) {
- const chunk_len = std.simd.suggestVectorSize(u16) orelse 1;
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
const Chunk = @Vector(chunk_len, u16);
// Fast path. Check for and encode ASCII characters at the start of the input.
@@ -842,7 +842,7 @@ pub fn utf16leToUtf8(utf8: []u8, utf16le: []const u16) !usize {
var remaining = utf16le;
if (builtin.zig_backend != .stage2_x86_64) {
- const chunk_len = std.simd.suggestVectorSize(u16) orelse 1;
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
const Chunk = @Vector(chunk_len, u16);
// Fast path. Check for and encode ASCII characters at the start of the input.
@@ -941,7 +941,7 @@ pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u1
var remaining = utf8;
// Need support for std.simd.interlace
if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) {
- const chunk_len = std.simd.suggestVectorSize(u8) orelse 1;
+ const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
const Chunk = @Vector(chunk_len, u8);
// Fast path. Check for and encode ASCII characters at the start of the input.
@@ -986,7 +986,7 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {
var remaining = utf8;
// Need support for std.simd.interlace
if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) {
- const chunk_len = std.simd.suggestVectorSize(u8) orelse 1;
+ const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
const Chunk = @Vector(chunk_len, u8);
// Fast path. Check for and encode ASCII characters at the start of the input.