master
1const std = @import("std");
2const assert = std.debug.assert;
3const common = @import("./common.zig");
4const builtin = @import("builtin");
5
6comptime {
7 if (builtin.object_format != .c) {
8 const export_options: std.builtin.ExportOptions = .{
9 .name = "memcpy",
10 .linkage = common.linkage,
11 .visibility = common.visibility,
12 };
13
14 if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64)
15 @export(&memcpySmall, export_options)
16 else
17 @export(&memcpyFast, export_options);
18 }
19}
20
21const Element = common.PreferredLoadStoreElement;
22
23comptime {
24 assert(std.math.isPowerOfTwo(@sizeOf(Element)));
25}
26
27fn memcpySmall(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 {
28 @setRuntimeSafety(false);
29
30 for (0..len) |i| {
31 dest.?[i] = src.?[i];
32 }
33
34 return dest;
35}
36
37fn memcpyFast(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 {
38 @setRuntimeSafety(false);
39
40 const small_limit = 2 * @sizeOf(Element);
41
42 if (copySmallLength(small_limit, dest.?, src.?, len)) return dest;
43
44 copyForwards(dest.?, src.?, len);
45
46 return dest;
47}
48
49inline fn copySmallLength(
50 comptime small_limit: comptime_int,
51 dest: [*]u8,
52 src: [*]const u8,
53 len: usize,
54) bool {
55 if (len < 16) {
56 copyLessThan16(dest, src, len);
57 return true;
58 }
59
60 if (comptime 2 < (std.math.log2(small_limit) + 1) / 2) {
61 if (copy16ToSmallLimit(small_limit, dest, src, len)) return true;
62 }
63
64 return false;
65}
66
67inline fn copyLessThan16(
68 dest: [*]u8,
69 src: [*]const u8,
70 len: usize,
71) void {
72 @setRuntimeSafety(false);
73 if (len < 4) {
74 if (len == 0) return;
75 dest[0] = src[0];
76 dest[len / 2] = src[len / 2];
77 dest[len - 1] = src[len - 1];
78 return;
79 }
80 copyRange4(4, dest, src, len);
81}
82
83inline fn copy16ToSmallLimit(
84 comptime small_limit: comptime_int,
85 dest: [*]u8,
86 src: [*]const u8,
87 len: usize,
88) bool {
89 @setRuntimeSafety(false);
90 inline for (2..(std.math.log2(small_limit) + 1) / 2 + 1) |p| {
91 const limit = 1 << (2 * p);
92 if (len < limit) {
93 copyRange4(limit / 4, dest, src, len);
94 return true;
95 }
96 }
97 return false;
98}
99
100inline fn copyForwards(
101 noalias dest: [*]u8,
102 noalias src: [*]const u8,
103 len: usize,
104) void {
105 @setRuntimeSafety(false);
106
107 copyFixedLength(dest, src, @sizeOf(Element));
108 const alignment_offset = @alignOf(Element) - @intFromPtr(src) % @alignOf(Element);
109 const n = len - alignment_offset;
110 const d = dest + alignment_offset;
111 const s = src + alignment_offset;
112
113 copyBlocksAlignedSource(@ptrCast(d), @ptrCast(@alignCast(s)), n);
114
115 // copy last `@sizeOf(Element)` bytes unconditionally, since block copy
116 // methods only copy a multiple of `@sizeOf(Element)` bytes.
117 const offset = len - @sizeOf(Element);
118 copyFixedLength(dest + offset, src + offset, @sizeOf(Element));
119}
120
121inline fn copyBlocksAlignedSource(
122 noalias dest: [*]align(1) Element,
123 noalias src: [*]const Element,
124 max_bytes: usize,
125) void {
126 copyBlocks(dest, src, max_bytes);
127}
128
129/// Copies the largest multiple of `@sizeOf(T)` bytes from `src` to `dest`,
130/// that is less than `max_bytes` where `T` is the child type of `src` and
131/// `dest`.
132inline fn copyBlocks(
133 noalias dest: anytype,
134 noalias src: anytype,
135 max_bytes: usize,
136) void {
137 @setRuntimeSafety(false);
138
139 const T = @typeInfo(@TypeOf(dest)).pointer.child;
140 comptime assert(T == @typeInfo(@TypeOf(src)).pointer.child);
141
142 const loop_count = max_bytes / @sizeOf(T);
143
144 for (dest[0..loop_count], src[0..loop_count]) |*d, s| {
145 d.* = s;
146 }
147}
148
149inline fn copyFixedLength(
150 noalias dest: [*]u8,
151 noalias src: [*]const u8,
152 comptime len: comptime_int,
153) void {
154 @setRuntimeSafety(false);
155 comptime assert(std.math.isPowerOfTwo(len));
156
157 const T = if (len >= @sizeOf(Element))
158 Element
159 else if (len > @sizeOf(usize))
160 @Vector(len, u8)
161 else
162 @Int(.unsigned, len * 8);
163
164 const loop_count = @divExact(len, @sizeOf(T));
165
166 const d: [*]align(1) T = @ptrCast(dest);
167 const s: [*]align(1) const T = @ptrCast(src);
168
169 inline for (0..loop_count) |i| {
170 d[i] = s[i];
171 }
172}
173
174/// copy `len` bytes from `src` to `dest`; `len` must be in the range
175/// `[copy_len, 4 * copy_len)`.
176inline fn copyRange4(
177 comptime copy_len: comptime_int,
178 noalias dest: [*]u8,
179 noalias src: [*]const u8,
180 len: usize,
181) void {
182 @setRuntimeSafety(false);
183 comptime assert(std.math.isPowerOfTwo(copy_len));
184
185 const a = len & (copy_len * 2);
186 const b = a / 2;
187
188 const last = len - copy_len;
189 const pen = last - b;
190
191 copyFixedLength(dest, src, copy_len);
192 copyFixedLength(dest + b, src + b, copy_len);
193 copyFixedLength(dest + pen, src + pen, copy_len);
194 copyFixedLength(dest + last, src + last, copy_len);
195}
196
197fn testMemcpyImpl(comptime memcpyImpl: anytype) !void {
198 const max_len = 1024;
199 var buffer: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined;
200 for (&buffer, 0..) |*b, i| {
201 b.* = @intCast(i % 97);
202 }
203 var dest: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined;
204
205 for (0..max_len) |copy_len| {
206 for (0..@alignOf(Element)) |s_offset| {
207 for (0..@alignOf(Element)) |d_offset| {
208 @memset(&dest, 0xff);
209 const s = buffer[s_offset..][0..copy_len];
210 const d = dest[d_offset..][0..copy_len];
211 _ = memcpyImpl(@ptrCast(d.ptr), @ptrCast(s.ptr), s.len);
212 std.testing.expectEqualSlices(u8, s, d) catch |e| {
213 std.debug.print("error encountered for length={d}, s_offset={d}, d_offset={d}\n", .{
214 copy_len, s_offset, d_offset,
215 });
216 return e;
217 };
218 }
219 }
220 }
221}
222test memcpySmall {
223 if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
224 try testMemcpyImpl(memcpySmall);
225}
226test memcpyFast {
227 if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
228 try testMemcpyImpl(memcpyFast);
229}