master
  1const std = @import("std");
  2const common = @import("./common.zig");
  3const builtin = @import("builtin");
  4const assert = std.debug.assert;
  5const memcpy = @import("memcpy.zig");
  6
  7const Element = common.PreferredLoadStoreElement;
  8
  9comptime {
 10    if (builtin.object_format != .c) {
 11        const export_options: std.builtin.ExportOptions = .{
 12            .name = "memmove",
 13            .linkage = common.linkage,
 14            .visibility = common.visibility,
 15        };
 16
 17        if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64)
 18            @export(&memmoveSmall, export_options)
 19        else
 20            @export(&memmoveFast, export_options);
 21    }
 22}
 23
 24fn memmoveSmall(opt_dest: ?[*]u8, opt_src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 {
 25    const dest = opt_dest.?;
 26    const src = opt_src.?;
 27
 28    if (@intFromPtr(dest) < @intFromPtr(src)) {
 29        for (0..len) |i| {
 30            dest[i] = src[i];
 31        }
 32    } else {
 33        for (0..len) |i| {
 34            dest[len - 1 - i] = src[len - 1 - i];
 35        }
 36    }
 37
 38    return dest;
 39}
 40
 41fn memmoveFast(dest: ?[*]u8, src: ?[*]u8, len: usize) callconv(.c) ?[*]u8 {
 42    @setRuntimeSafety(common.test_safety);
 43    const small_limit = @max(2 * @sizeOf(Element), @sizeOf(Element));
 44
 45    if (copySmallLength(small_limit, dest.?, src.?, len)) return dest;
 46
 47    const dest_address = @intFromPtr(dest);
 48    const src_address = @intFromPtr(src);
 49
 50    if (src_address < dest_address) {
 51        copyBackwards(dest.?, src.?, len);
 52    } else {
 53        copyForwards(dest.?, src.?, len);
 54    }
 55
 56    return dest;
 57}
 58
 59inline fn copySmallLength(
 60    comptime small_limit: comptime_int,
 61    dest: [*]u8,
 62    src: [*]const u8,
 63    len: usize,
 64) bool {
 65    if (len < 16) {
 66        copyLessThan16(dest, src, len);
 67        return true;
 68    }
 69
 70    if (comptime 2 < (std.math.log2(small_limit) + 1) / 2) {
 71        if (copy16ToSmallLimit(small_limit, dest, src, len)) return true;
 72    }
 73
 74    return false;
 75}
 76
 77inline fn copyLessThan16(
 78    dest: [*]u8,
 79    src: [*]const u8,
 80    len: usize,
 81) void {
 82    @setRuntimeSafety(common.test_safety);
 83    if (len < 4) {
 84        if (len == 0) return;
 85        const b = len / 2;
 86        const d0 = src[0];
 87        const db = src[b];
 88        const de = src[len - 1];
 89        dest[0] = d0;
 90        dest[b] = db;
 91        dest[len - 1] = de;
 92        return;
 93    }
 94    copyRange4(4, dest, src, len);
 95}
 96
 97inline fn copy16ToSmallLimit(
 98    comptime small_limit: comptime_int,
 99    dest: [*]u8,
100    src: [*]const u8,
101    len: usize,
102) bool {
103    @setRuntimeSafety(common.test_safety);
104    inline for (2..(std.math.log2(small_limit) + 1) / 2 + 1) |p| {
105        const limit = 1 << (2 * p);
106        if (len < limit) {
107            copyRange4(limit / 4, dest, src, len);
108            return true;
109        }
110    }
111    return false;
112}
113
114/// copy `len` bytes from `src` to `dest`; `len` must be in the range
115/// `[copy_len, 4 * copy_len)`.
116inline fn copyRange4(
117    comptime copy_len: comptime_int,
118    dest: [*]u8,
119    src: [*]const u8,
120    len: usize,
121) void {
122    @setRuntimeSafety(common.test_safety);
123    comptime assert(std.math.isPowerOfTwo(copy_len));
124    assert(len >= copy_len);
125    assert(len < 4 * copy_len);
126
127    const a = len & (copy_len * 2);
128    const b = a / 2;
129
130    const last = len - copy_len;
131    const pen = last - b;
132
133    const d0 = src[0..copy_len].*;
134    const d1 = src[b..][0..copy_len].*;
135    const d2 = src[pen..][0..copy_len].*;
136    const d3 = src[last..][0..copy_len].*;
137
138    // the slice dest[0..len] is needed to workaround -ODebug miscompilation
139    dest[0..len][0..copy_len].* = d0;
140    dest[b..][0..copy_len].* = d1;
141    dest[pen..][0..copy_len].* = d2;
142    dest[last..][0..copy_len].* = d3;
143}
144
145inline fn copyForwards(
146    dest: [*]u8,
147    src: [*]const u8,
148    len: usize,
149) void {
150    @setRuntimeSafety(common.test_safety);
151    assert(len >= 2 * @sizeOf(Element));
152
153    const head = src[0..@sizeOf(Element)].*;
154    const tail = src[len - @sizeOf(Element) ..][0..@sizeOf(Element)].*;
155    const alignment_offset = @alignOf(Element) - @intFromPtr(src) % @alignOf(Element);
156    const n = len - alignment_offset;
157    const d = dest + alignment_offset;
158    const s = src + alignment_offset;
159
160    copyBlocksAlignedSource(@ptrCast(d), @ptrCast(@alignCast(s)), n);
161
162    // copy last `copy_size` bytes unconditionally, since block copy
163    // methods only copy a multiple of `copy_size` bytes.
164    dest[len - @sizeOf(Element) ..][0..@sizeOf(Element)].* = tail;
165    dest[0..@sizeOf(Element)].* = head;
166}
167
168inline fn copyBlocksAlignedSource(
169    dest: [*]align(1) Element,
170    src: [*]const Element,
171    max_bytes: usize,
172) void {
173    copyBlocks(dest, src, max_bytes);
174}
175
176/// Copies the largest multiple of `@sizeOf(T)` bytes from `src` to `dest`,
177/// that is less than `max_bytes` where `T` is the child type of `src` and
178/// `dest`; `max_bytes` must be at least `@sizeOf(T)`.
179inline fn copyBlocks(
180    dest: anytype,
181    src: anytype,
182    max_bytes: usize,
183) void {
184    @setRuntimeSafety(common.test_safety);
185
186    const T = @typeInfo(@TypeOf(dest)).pointer.child;
187    comptime assert(T == @typeInfo(@TypeOf(src)).pointer.child);
188
189    const loop_count = max_bytes / @sizeOf(T);
190
191    for (dest[0..loop_count], src[0..loop_count]) |*d, s| {
192        d.* = s;
193    }
194}
195
196inline fn copyBackwards(
197    dest: [*]u8,
198    src: [*]const u8,
199    len: usize,
200) void {
201    const end_bytes = src[len - @sizeOf(Element) ..][0..@sizeOf(Element)].*;
202    const start_bytes = src[0..@sizeOf(Element)].*;
203
204    const d_addr: usize = std.mem.alignBackward(usize, @intFromPtr(dest) + len, @alignOf(Element));
205    const d: [*]Element = @ptrFromInt(d_addr);
206    const n = d_addr - @intFromPtr(dest);
207    const s: [*]align(1) const Element = @ptrCast(src + n);
208
209    const loop_count = n / @sizeOf(Element);
210    var i: usize = 1;
211    while (i < loop_count + 1) : (i += 1) {
212        (d - i)[0] = (s - i)[0];
213    }
214
215    dest[0..@sizeOf(Element)].* = start_bytes;
216    dest[len - @sizeOf(Element) ..][0..@sizeOf(Element)].* = end_bytes;
217}
218
219test memmoveFast {
220    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
221
222    const max_len = 1024;
223    var buffer: [max_len + @alignOf(Element) - 1]u8 = undefined;
224    for (&buffer, 0..) |*b, i| {
225        b.* = @intCast(i % 97);
226    }
227
228    var move_buffer: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined;
229
230    for (0..max_len) |copy_len| {
231        for (0..@alignOf(Element)) |s_offset| {
232            for (0..@alignOf(Element)) |d_offset| {
233                for (&move_buffer, buffer) |*d, s| {
234                    d.* = s;
235                }
236                const dest = move_buffer[d_offset..][0..copy_len];
237                const src = move_buffer[s_offset..][0..copy_len];
238                _ = memmoveFast(dest.ptr, src.ptr, copy_len);
239                std.testing.expectEqualSlices(u8, buffer[s_offset..][0..copy_len], dest) catch |e| {
240                    std.debug.print(
241                        "error occured with source offset {d} and destination offset {d}\n",
242                        .{
243                            s_offset,
244                            d_offset,
245                        },
246                    );
247                    return e;
248                };
249            }
250        }
251    }
252}