Commit f278038923
Changed files (3)
lib
compiler_rt
lib/compiler_rt/memcpy.zig
@@ -5,24 +5,141 @@ const builtin = @import("builtin");
comptime {
if (builtin.object_format != .c) {
@export(&memcpy, .{ .name = "memcpy", .linkage = common.linkage, .visibility = common.visibility });
+ @export(&memcpy, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility });
}
}
-pub fn memcpy(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
- @setRuntimeSafety(false);
-
- if (len != 0) {
- var d = dest.?;
- var s = src.?;
- var n = len;
- while (true) {
- d[0] = s[0];
- n -= 1;
- if (n == 0) break;
- d += 1;
- s += 1;
+// a port of https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S
+pub fn memcpy(maybe_dest: ?[*]u8, maybe_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
+ if (len == 0) {
+ @branchHint(.unlikely);
+ return maybe_dest;
+ }
+
+ const dest = maybe_dest.?;
+ const src = maybe_src.?;
+
+ if (len < 8) {
+ @branchHint(.unlikely);
+ if (len == 1) {
+ @branchHint(.unlikely);
+ dest[0] = src[0];
+ } else if (len >= 4) {
+ @branchHint(.unlikely);
+ blockCopy(dest, src, 4, len);
+ } else {
+ blockCopy(dest, src, 2, len);
}
+ return dest;
}
+ if (len > 32) {
+ @branchHint(.unlikely);
+ if (len > 256) {
+ @branchHint(.unlikely);
+ copyMove(dest, src, len);
+ return dest;
+ }
+ copyLong(dest, src, len);
+ return dest;
+ }
+
+ if (len > 16) {
+ @branchHint(.unlikely);
+ blockCopy(dest, src, 16, len);
+ return dest;
+ }
+
+ blockCopy(dest, src, 8, len);
+
return dest;
}
+
+inline fn blockCopy(dest: [*]u8, src: [*]const u8, block_size: comptime_int, len: usize) void {
+ const first = @as(*align(1) const @Vector(block_size, u8), src[0..block_size]).*;
+ const second = @as(*align(1) const @Vector(block_size, u8), src[len - block_size ..][0..block_size]).*;
+ dest[0..block_size].* = first;
+ dest[len - block_size ..][0..block_size].* = second;
+}
+
+inline fn copyLong(dest: [*]u8, src: [*]const u8, len: usize) void {
+ var array: [8]@Vector(32, u8) = undefined;
+
+ inline for (.{ 64, 128, 192, 256 }, 0..) |N, i| {
+ array[i * 2] = src[(N / 2) - 32 ..][0..32].*;
+ array[(i * 2) + 1] = src[len - N / 2 ..][0..32].*;
+
+ if (len <= N) {
+ @branchHint(.unlikely);
+ for (0..i + 1) |j| {
+ dest[j * 32 ..][0..32].* = array[j * 2];
+ dest[len - ((j * 32) + 32) ..][0..32].* = array[(j * 2) + 1];
+ }
+ return;
+ }
+ }
+}
+
+inline fn copyMove(dest: [*]u8, src: [*]const u8, len: usize) void {
+ if (@intFromPtr(src) >= @intFromPtr(dest)) {
+ @branchHint(.unlikely);
+ copyForward(dest, src, len);
+ } else if (@intFromPtr(src) + len > @intFromPtr(dest)) {
+ @branchHint(.unlikely);
+ overlapBwd(dest, src, len);
+ } else {
+ copyForward(dest, src, len);
+ }
+}
+
+inline fn copyForward(dest: [*]u8, src: [*]const u8, len: usize) void {
+ const tail: @Vector(32, u8) = src[len - 32 ..][0..32].*;
+
+ const N: usize = len & ~@as(usize, 127);
+ var i: usize = 0;
+
+ while (i < N) : (i += 128) {
+ dest[i..][0..32].* = src[i..][0..32].*;
+ dest[i + 32 ..][0..32].* = src[i + 32 ..][0..32].*;
+ dest[i + 64 ..][0..32].* = src[i + 64 ..][0..32].*;
+ dest[i + 96 ..][0..32].* = src[i + 96 ..][0..32].*;
+ }
+
+ if (len - i <= 32) {
+ @branchHint(.unlikely);
+ dest[len - 32 ..][0..32].* = tail;
+ } else {
+ copyLong(dest[i..], src[i..], len - i);
+ }
+}
+
+inline fn overlapBwd(dest: [*]u8, src: [*]const u8, len: usize) void {
+ var array: [5]@Vector(32, u8) = undefined;
+ array[0] = src[len - 32 ..][0..32].*;
+ inline for (1..5) |i| array[i] = src[(i - 1) << 5 ..][0..32].*;
+
+ const end: usize = (@intFromPtr(dest) + len - 32) & 31;
+ const range = len - end;
+ var s = src + range;
+ var d = dest + range;
+
+ while (@intFromPtr(s) > @intFromPtr(src + 128)) {
+ // zig fmt: off
+ const first = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 32)).*;
+ const second = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 64)).*;
+ const third = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 96)).*;
+ const fourth = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 128)).*;
+
+ @as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 32))).* = first;
+ @as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 64))).* = second;
+ @as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 96))).* = third;
+ @as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 128))).* = fourth;
+ // zig fmt: on
+
+ s -= 128;
+ d -= 128;
+ }
+
+ inline for (array[1..], 0..) |vec, i| dest[i * 32 ..][0..32].* = vec;
+ dest[len - 32 ..][0..32].* = array[0];
+}
lib/compiler_rt/memmove.zig
@@ -1,25 +0,0 @@
-const std = @import("std");
-const common = @import("./common.zig");
-
-comptime {
- @export(&memmove, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility });
-}
-
-pub fn memmove(dest: ?[*]u8, src: ?[*]const u8, n: usize) callconv(.C) ?[*]u8 {
- @setRuntimeSafety(false);
-
- if (@intFromPtr(dest) < @intFromPtr(src)) {
- var index: usize = 0;
- while (index != n) : (index += 1) {
- dest.?[index] = src.?[index];
- }
- } else {
- var index = n;
- while (index != 0) {
- index -= 1;
- dest.?[index] = src.?[index];
- }
- }
-
- return dest;
-}
lib/compiler_rt.zig
@@ -233,7 +233,6 @@ comptime {
_ = @import("compiler_rt/memcpy.zig");
_ = @import("compiler_rt/memset.zig");
- _ = @import("compiler_rt/memmove.zig");
_ = @import("compiler_rt/memcmp.zig");
_ = @import("compiler_rt/bcmp.zig");
_ = @import("compiler_rt/ssp.zig");