master
  1const std = @import("std");
  2const assert = std.debug.assert;
  3const common = @import("./common.zig");
  4const builtin = @import("builtin");
  5
  6comptime {
  7    if (builtin.object_format != .c) {
  8        const export_options: std.builtin.ExportOptions = .{
  9            .name = "memcpy",
 10            .linkage = common.linkage,
 11            .visibility = common.visibility,
 12        };
 13
 14        if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64)
 15            @export(&memcpySmall, export_options)
 16        else
 17            @export(&memcpyFast, export_options);
 18    }
 19}
 20
 21const Element = common.PreferredLoadStoreElement;
 22
 23comptime {
 24    assert(std.math.isPowerOfTwo(@sizeOf(Element)));
 25}
 26
 27fn memcpySmall(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 {
 28    @setRuntimeSafety(false);
 29
 30    for (0..len) |i| {
 31        dest.?[i] = src.?[i];
 32    }
 33
 34    return dest;
 35}
 36
 37fn memcpyFast(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 {
 38    @setRuntimeSafety(false);
 39
 40    const small_limit = 2 * @sizeOf(Element);
 41
 42    if (copySmallLength(small_limit, dest.?, src.?, len)) return dest;
 43
 44    copyForwards(dest.?, src.?, len);
 45
 46    return dest;
 47}
 48
 49inline fn copySmallLength(
 50    comptime small_limit: comptime_int,
 51    dest: [*]u8,
 52    src: [*]const u8,
 53    len: usize,
 54) bool {
 55    if (len < 16) {
 56        copyLessThan16(dest, src, len);
 57        return true;
 58    }
 59
 60    if (comptime 2 < (std.math.log2(small_limit) + 1) / 2) {
 61        if (copy16ToSmallLimit(small_limit, dest, src, len)) return true;
 62    }
 63
 64    return false;
 65}
 66
 67inline fn copyLessThan16(
 68    dest: [*]u8,
 69    src: [*]const u8,
 70    len: usize,
 71) void {
 72    @setRuntimeSafety(false);
 73    if (len < 4) {
 74        if (len == 0) return;
 75        dest[0] = src[0];
 76        dest[len / 2] = src[len / 2];
 77        dest[len - 1] = src[len - 1];
 78        return;
 79    }
 80    copyRange4(4, dest, src, len);
 81}
 82
 83inline fn copy16ToSmallLimit(
 84    comptime small_limit: comptime_int,
 85    dest: [*]u8,
 86    src: [*]const u8,
 87    len: usize,
 88) bool {
 89    @setRuntimeSafety(false);
 90    inline for (2..(std.math.log2(small_limit) + 1) / 2 + 1) |p| {
 91        const limit = 1 << (2 * p);
 92        if (len < limit) {
 93            copyRange4(limit / 4, dest, src, len);
 94            return true;
 95        }
 96    }
 97    return false;
 98}
 99
100inline fn copyForwards(
101    noalias dest: [*]u8,
102    noalias src: [*]const u8,
103    len: usize,
104) void {
105    @setRuntimeSafety(false);
106
107    copyFixedLength(dest, src, @sizeOf(Element));
108    const alignment_offset = @alignOf(Element) - @intFromPtr(src) % @alignOf(Element);
109    const n = len - alignment_offset;
110    const d = dest + alignment_offset;
111    const s = src + alignment_offset;
112
113    copyBlocksAlignedSource(@ptrCast(d), @ptrCast(@alignCast(s)), n);
114
115    // copy last `@sizeOf(Element)` bytes unconditionally, since block copy
116    // methods only copy a multiple of `@sizeOf(Element)` bytes.
117    const offset = len - @sizeOf(Element);
118    copyFixedLength(dest + offset, src + offset, @sizeOf(Element));
119}
120
121inline fn copyBlocksAlignedSource(
122    noalias dest: [*]align(1) Element,
123    noalias src: [*]const Element,
124    max_bytes: usize,
125) void {
126    copyBlocks(dest, src, max_bytes);
127}
128
129/// Copies the largest multiple of `@sizeOf(T)` bytes from `src` to `dest`,
130/// that is less than `max_bytes` where `T` is the child type of `src` and
131/// `dest`.
132inline fn copyBlocks(
133    noalias dest: anytype,
134    noalias src: anytype,
135    max_bytes: usize,
136) void {
137    @setRuntimeSafety(false);
138
139    const T = @typeInfo(@TypeOf(dest)).pointer.child;
140    comptime assert(T == @typeInfo(@TypeOf(src)).pointer.child);
141
142    const loop_count = max_bytes / @sizeOf(T);
143
144    for (dest[0..loop_count], src[0..loop_count]) |*d, s| {
145        d.* = s;
146    }
147}
148
149inline fn copyFixedLength(
150    noalias dest: [*]u8,
151    noalias src: [*]const u8,
152    comptime len: comptime_int,
153) void {
154    @setRuntimeSafety(false);
155    comptime assert(std.math.isPowerOfTwo(len));
156
157    const T = if (len >= @sizeOf(Element))
158        Element
159    else if (len > @sizeOf(usize))
160        @Vector(len, u8)
161    else
162        @Int(.unsigned, len * 8);
163
164    const loop_count = @divExact(len, @sizeOf(T));
165
166    const d: [*]align(1) T = @ptrCast(dest);
167    const s: [*]align(1) const T = @ptrCast(src);
168
169    inline for (0..loop_count) |i| {
170        d[i] = s[i];
171    }
172}
173
174/// copy `len` bytes from `src` to `dest`; `len` must be in the range
175/// `[copy_len, 4 * copy_len)`.
176inline fn copyRange4(
177    comptime copy_len: comptime_int,
178    noalias dest: [*]u8,
179    noalias src: [*]const u8,
180    len: usize,
181) void {
182    @setRuntimeSafety(false);
183    comptime assert(std.math.isPowerOfTwo(copy_len));
184
185    const a = len & (copy_len * 2);
186    const b = a / 2;
187
188    const last = len - copy_len;
189    const pen = last - b;
190
191    copyFixedLength(dest, src, copy_len);
192    copyFixedLength(dest + b, src + b, copy_len);
193    copyFixedLength(dest + pen, src + pen, copy_len);
194    copyFixedLength(dest + last, src + last, copy_len);
195}
196
197fn testMemcpyImpl(comptime memcpyImpl: anytype) !void {
198    const max_len = 1024;
199    var buffer: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined;
200    for (&buffer, 0..) |*b, i| {
201        b.* = @intCast(i % 97);
202    }
203    var dest: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined;
204
205    for (0..max_len) |copy_len| {
206        for (0..@alignOf(Element)) |s_offset| {
207            for (0..@alignOf(Element)) |d_offset| {
208                @memset(&dest, 0xff);
209                const s = buffer[s_offset..][0..copy_len];
210                const d = dest[d_offset..][0..copy_len];
211                _ = memcpyImpl(@ptrCast(d.ptr), @ptrCast(s.ptr), s.len);
212                std.testing.expectEqualSlices(u8, s, d) catch |e| {
213                    std.debug.print("error encountered for length={d}, s_offset={d}, d_offset={d}\n", .{
214                        copy_len, s_offset, d_offset,
215                    });
216                    return e;
217                };
218            }
219        }
220    }
221}
222test memcpySmall {
223    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
224    try testMemcpyImpl(memcpySmall);
225}
226test memcpyFast {
227    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
228    try testMemcpyImpl(memcpyFast);
229}