master
1const std = @import("std");
2const common = @import("./common.zig");
3const builtin = @import("builtin");
4const assert = std.debug.assert;
5const memcpy = @import("memcpy.zig");
6
7const Element = common.PreferredLoadStoreElement;
8
9comptime {
10 if (builtin.object_format != .c) {
11 const export_options: std.builtin.ExportOptions = .{
12 .name = "memmove",
13 .linkage = common.linkage,
14 .visibility = common.visibility,
15 };
16
17 if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64)
18 @export(&memmoveSmall, export_options)
19 else
20 @export(&memmoveFast, export_options);
21 }
22}
23
24fn memmoveSmall(opt_dest: ?[*]u8, opt_src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 {
25 const dest = opt_dest.?;
26 const src = opt_src.?;
27
28 if (@intFromPtr(dest) < @intFromPtr(src)) {
29 for (0..len) |i| {
30 dest[i] = src[i];
31 }
32 } else {
33 for (0..len) |i| {
34 dest[len - 1 - i] = src[len - 1 - i];
35 }
36 }
37
38 return dest;
39}
40
41fn memmoveFast(dest: ?[*]u8, src: ?[*]u8, len: usize) callconv(.c) ?[*]u8 {
42 @setRuntimeSafety(common.test_safety);
43 const small_limit = @max(2 * @sizeOf(Element), @sizeOf(Element));
44
45 if (copySmallLength(small_limit, dest.?, src.?, len)) return dest;
46
47 const dest_address = @intFromPtr(dest);
48 const src_address = @intFromPtr(src);
49
50 if (src_address < dest_address) {
51 copyBackwards(dest.?, src.?, len);
52 } else {
53 copyForwards(dest.?, src.?, len);
54 }
55
56 return dest;
57}
58
59inline fn copySmallLength(
60 comptime small_limit: comptime_int,
61 dest: [*]u8,
62 src: [*]const u8,
63 len: usize,
64) bool {
65 if (len < 16) {
66 copyLessThan16(dest, src, len);
67 return true;
68 }
69
70 if (comptime 2 < (std.math.log2(small_limit) + 1) / 2) {
71 if (copy16ToSmallLimit(small_limit, dest, src, len)) return true;
72 }
73
74 return false;
75}
76
77inline fn copyLessThan16(
78 dest: [*]u8,
79 src: [*]const u8,
80 len: usize,
81) void {
82 @setRuntimeSafety(common.test_safety);
83 if (len < 4) {
84 if (len == 0) return;
85 const b = len / 2;
86 const d0 = src[0];
87 const db = src[b];
88 const de = src[len - 1];
89 dest[0] = d0;
90 dest[b] = db;
91 dest[len - 1] = de;
92 return;
93 }
94 copyRange4(4, dest, src, len);
95}
96
97inline fn copy16ToSmallLimit(
98 comptime small_limit: comptime_int,
99 dest: [*]u8,
100 src: [*]const u8,
101 len: usize,
102) bool {
103 @setRuntimeSafety(common.test_safety);
104 inline for (2..(std.math.log2(small_limit) + 1) / 2 + 1) |p| {
105 const limit = 1 << (2 * p);
106 if (len < limit) {
107 copyRange4(limit / 4, dest, src, len);
108 return true;
109 }
110 }
111 return false;
112}
113
114/// copy `len` bytes from `src` to `dest`; `len` must be in the range
115/// `[copy_len, 4 * copy_len)`.
116inline fn copyRange4(
117 comptime copy_len: comptime_int,
118 dest: [*]u8,
119 src: [*]const u8,
120 len: usize,
121) void {
122 @setRuntimeSafety(common.test_safety);
123 comptime assert(std.math.isPowerOfTwo(copy_len));
124 assert(len >= copy_len);
125 assert(len < 4 * copy_len);
126
127 const a = len & (copy_len * 2);
128 const b = a / 2;
129
130 const last = len - copy_len;
131 const pen = last - b;
132
133 const d0 = src[0..copy_len].*;
134 const d1 = src[b..][0..copy_len].*;
135 const d2 = src[pen..][0..copy_len].*;
136 const d3 = src[last..][0..copy_len].*;
137
138 // the slice dest[0..len] is needed to workaround -ODebug miscompilation
139 dest[0..len][0..copy_len].* = d0;
140 dest[b..][0..copy_len].* = d1;
141 dest[pen..][0..copy_len].* = d2;
142 dest[last..][0..copy_len].* = d3;
143}
144
145inline fn copyForwards(
146 dest: [*]u8,
147 src: [*]const u8,
148 len: usize,
149) void {
150 @setRuntimeSafety(common.test_safety);
151 assert(len >= 2 * @sizeOf(Element));
152
153 const head = src[0..@sizeOf(Element)].*;
154 const tail = src[len - @sizeOf(Element) ..][0..@sizeOf(Element)].*;
155 const alignment_offset = @alignOf(Element) - @intFromPtr(src) % @alignOf(Element);
156 const n = len - alignment_offset;
157 const d = dest + alignment_offset;
158 const s = src + alignment_offset;
159
160 copyBlocksAlignedSource(@ptrCast(d), @ptrCast(@alignCast(s)), n);
161
162 // copy last `copy_size` bytes unconditionally, since block copy
163 // methods only copy a multiple of `copy_size` bytes.
164 dest[len - @sizeOf(Element) ..][0..@sizeOf(Element)].* = tail;
165 dest[0..@sizeOf(Element)].* = head;
166}
167
168inline fn copyBlocksAlignedSource(
169 dest: [*]align(1) Element,
170 src: [*]const Element,
171 max_bytes: usize,
172) void {
173 copyBlocks(dest, src, max_bytes);
174}
175
176/// Copies the largest multiple of `@sizeOf(T)` bytes from `src` to `dest`,
177/// that is less than `max_bytes` where `T` is the child type of `src` and
178/// `dest`; `max_bytes` must be at least `@sizeOf(T)`.
179inline fn copyBlocks(
180 dest: anytype,
181 src: anytype,
182 max_bytes: usize,
183) void {
184 @setRuntimeSafety(common.test_safety);
185
186 const T = @typeInfo(@TypeOf(dest)).pointer.child;
187 comptime assert(T == @typeInfo(@TypeOf(src)).pointer.child);
188
189 const loop_count = max_bytes / @sizeOf(T);
190
191 for (dest[0..loop_count], src[0..loop_count]) |*d, s| {
192 d.* = s;
193 }
194}
195
196inline fn copyBackwards(
197 dest: [*]u8,
198 src: [*]const u8,
199 len: usize,
200) void {
201 const end_bytes = src[len - @sizeOf(Element) ..][0..@sizeOf(Element)].*;
202 const start_bytes = src[0..@sizeOf(Element)].*;
203
204 const d_addr: usize = std.mem.alignBackward(usize, @intFromPtr(dest) + len, @alignOf(Element));
205 const d: [*]Element = @ptrFromInt(d_addr);
206 const n = d_addr - @intFromPtr(dest);
207 const s: [*]align(1) const Element = @ptrCast(src + n);
208
209 const loop_count = n / @sizeOf(Element);
210 var i: usize = 1;
211 while (i < loop_count + 1) : (i += 1) {
212 (d - i)[0] = (s - i)[0];
213 }
214
215 dest[0..@sizeOf(Element)].* = start_bytes;
216 dest[len - @sizeOf(Element) ..][0..@sizeOf(Element)].* = end_bytes;
217}
218
219test memmoveFast {
220 if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
221
222 const max_len = 1024;
223 var buffer: [max_len + @alignOf(Element) - 1]u8 = undefined;
224 for (&buffer, 0..) |*b, i| {
225 b.* = @intCast(i % 97);
226 }
227
228 var move_buffer: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined;
229
230 for (0..max_len) |copy_len| {
231 for (0..@alignOf(Element)) |s_offset| {
232 for (0..@alignOf(Element)) |d_offset| {
233 for (&move_buffer, buffer) |*d, s| {
234 d.* = s;
235 }
236 const dest = move_buffer[d_offset..][0..copy_len];
237 const src = move_buffer[s_offset..][0..copy_len];
238 _ = memmoveFast(dest.ptr, src.ptr, copy_len);
239 std.testing.expectEqualSlices(u8, buffer[s_offset..][0..copy_len], dest) catch |e| {
240 std.debug.print(
241 "error occured with source offset {d} and destination offset {d}\n",
242 .{
243 s_offset,
244 d_offset,
245 },
246 );
247 return e;
248 };
249 }
250 }
251 }
252}