Commit aae5560712
Changed files (10)
lib
src
lib/libc/musl/src/string/aarch64/memset.S
@@ -1,115 +0,0 @@
-/*
- * memset - fill memory with a constant byte
- *
- * Copyright (c) 2012-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
- *
- */
-
-#define dstin x0
-#define val x1
-#define valw w1
-#define count x2
-#define dst x3
-#define dstend x4
-#define zva_val x5
-
-.global memset
-.type memset,%function
-memset:
-
- dup v0.16B, valw
- add dstend, dstin, count
-
- cmp count, 96
- b.hi .Lset_long
- cmp count, 16
- b.hs .Lset_medium
- mov val, v0.D[0]
-
- /* Set 0..15 bytes. */
- tbz count, 3, 1f
- str val, [dstin]
- str val, [dstend, -8]
- ret
- nop
-1: tbz count, 2, 2f
- str valw, [dstin]
- str valw, [dstend, -4]
- ret
-2: cbz count, 3f
- strb valw, [dstin]
- tbz count, 1, 3f
- strh valw, [dstend, -2]
-3: ret
-
- /* Set 17..96 bytes. */
-.Lset_medium:
- str q0, [dstin]
- tbnz count, 6, .Lset96
- str q0, [dstend, -16]
- tbz count, 5, 1f
- str q0, [dstin, 16]
- str q0, [dstend, -32]
-1: ret
-
- .p2align 4
- /* Set 64..96 bytes. Write 64 bytes from the start and
- 32 bytes from the end. */
-.Lset96:
- str q0, [dstin, 16]
- stp q0, q0, [dstin, 32]
- stp q0, q0, [dstend, -32]
- ret
-
- .p2align 4
-.Lset_long:
- and valw, valw, 255
- bic dst, dstin, 15
- str q0, [dstin]
- cmp count, 160
- ccmp valw, 0, 0, hs
- b.ne .Lno_zva
-
-#ifndef SKIP_ZVA_CHECK
- mrs zva_val, dczid_el0
- and zva_val, zva_val, 31
- cmp zva_val, 4 /* ZVA size is 64 bytes. */
- b.ne .Lno_zva
-#endif
- str q0, [dst, 16]
- stp q0, q0, [dst, 32]
- bic dst, dst, 63
- sub count, dstend, dst /* Count is now 64 too large. */
- sub count, count, 128 /* Adjust count and bias for loop. */
-
- .p2align 4
-.Lzva_loop:
- add dst, dst, 64
- dc zva, dst
- subs count, count, 64
- b.hi .Lzva_loop
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
-
-.Lno_zva:
- sub count, dstend, dst /* Count is 16 too large. */
- sub dst, dst, 16 /* Dst is biased by -32. */
- sub count, count, 64 + 16 /* Adjust count and bias for loop. */
-.Lno_zva_loop:
- stp q0, q0, [dst, 32]
- stp q0, q0, [dst, 64]!
- subs count, count, 64
- b.hi .Lno_zva_loop
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
-
-.size memset,.-memset
-
lib/libc/musl/src/string/arm/__aeabi_memset.s
@@ -1,31 +0,0 @@
-.syntax unified
-
-.global __aeabi_memclr8
-.global __aeabi_memclr4
-.global __aeabi_memclr
-.global __aeabi_memset8
-.global __aeabi_memset4
-.global __aeabi_memset
-
-.type __aeabi_memclr8,%function
-.type __aeabi_memclr4,%function
-.type __aeabi_memclr,%function
-.type __aeabi_memset8,%function
-.type __aeabi_memset4,%function
-.type __aeabi_memset,%function
-
-__aeabi_memclr8:
-__aeabi_memclr4:
-__aeabi_memclr:
- movs r2, #0
-__aeabi_memset8:
-__aeabi_memset4:
-__aeabi_memset:
- cmp r1, #0
- beq 2f
- adds r1, r0, r1
-1: strb r2, [r0]
- adds r0, r0, #1
- cmp r1, r0
- bne 1b
-2: bx lr
lib/libc/musl/src/string/i386/memset.s
@@ -1,76 +0,0 @@
-.global memset
-.type memset,@function
-memset:
- mov 12(%esp),%ecx
- cmp $62,%ecx
- ja 2f
-
- mov 8(%esp),%dl
- mov 4(%esp),%eax
- test %ecx,%ecx
- jz 1f
-
- mov %dl,%dh
-
- mov %dl,(%eax)
- mov %dl,-1(%eax,%ecx)
- cmp $2,%ecx
- jbe 1f
-
- mov %dx,1(%eax)
- mov %dx,(-1-2)(%eax,%ecx)
- cmp $6,%ecx
- jbe 1f
-
- shl $16,%edx
- mov 8(%esp),%dl
- mov 8(%esp),%dh
-
- mov %edx,(1+2)(%eax)
- mov %edx,(-1-2-4)(%eax,%ecx)
- cmp $14,%ecx
- jbe 1f
-
- mov %edx,(1+2+4)(%eax)
- mov %edx,(1+2+4+4)(%eax)
- mov %edx,(-1-2-4-8)(%eax,%ecx)
- mov %edx,(-1-2-4-4)(%eax,%ecx)
- cmp $30,%ecx
- jbe 1f
-
- mov %edx,(1+2+4+8)(%eax)
- mov %edx,(1+2+4+8+4)(%eax)
- mov %edx,(1+2+4+8+8)(%eax)
- mov %edx,(1+2+4+8+12)(%eax)
- mov %edx,(-1-2-4-8-16)(%eax,%ecx)
- mov %edx,(-1-2-4-8-12)(%eax,%ecx)
- mov %edx,(-1-2-4-8-8)(%eax,%ecx)
- mov %edx,(-1-2-4-8-4)(%eax,%ecx)
-
-1: ret
-
-2: movzbl 8(%esp),%eax
- mov %edi,12(%esp)
- imul $0x1010101,%eax
- mov 4(%esp),%edi
- test $15,%edi
- mov %eax,-4(%edi,%ecx)
- jnz 2f
-
-1: shr $2, %ecx
- rep
- stosl
- mov 4(%esp),%eax
- mov 12(%esp),%edi
- ret
-
-2: xor %edx,%edx
- sub %edi,%edx
- and $15,%edx
- mov %eax,(%edi)
- mov %eax,4(%edi)
- mov %eax,8(%edi)
- mov %eax,12(%edi)
- sub %edx,%ecx
- add %edx,%edi
- jmp 1b
lib/libc/musl/src/string/x86_64/memset.s
@@ -1,72 +0,0 @@
-.global memset
-.type memset,@function
-memset:
- movzbq %sil,%rax
- mov $0x101010101010101,%r8
- imul %r8,%rax
-
- cmp $126,%rdx
- ja 2f
-
- test %edx,%edx
- jz 1f
-
- mov %sil,(%rdi)
- mov %sil,-1(%rdi,%rdx)
- cmp $2,%edx
- jbe 1f
-
- mov %ax,1(%rdi)
- mov %ax,(-1-2)(%rdi,%rdx)
- cmp $6,%edx
- jbe 1f
-
- mov %eax,(1+2)(%rdi)
- mov %eax,(-1-2-4)(%rdi,%rdx)
- cmp $14,%edx
- jbe 1f
-
- mov %rax,(1+2+4)(%rdi)
- mov %rax,(-1-2-4-8)(%rdi,%rdx)
- cmp $30,%edx
- jbe 1f
-
- mov %rax,(1+2+4+8)(%rdi)
- mov %rax,(1+2+4+8+8)(%rdi)
- mov %rax,(-1-2-4-8-16)(%rdi,%rdx)
- mov %rax,(-1-2-4-8-8)(%rdi,%rdx)
- cmp $62,%edx
- jbe 1f
-
- mov %rax,(1+2+4+8+16)(%rdi)
- mov %rax,(1+2+4+8+16+8)(%rdi)
- mov %rax,(1+2+4+8+16+16)(%rdi)
- mov %rax,(1+2+4+8+16+24)(%rdi)
- mov %rax,(-1-2-4-8-16-32)(%rdi,%rdx)
- mov %rax,(-1-2-4-8-16-24)(%rdi,%rdx)
- mov %rax,(-1-2-4-8-16-16)(%rdi,%rdx)
- mov %rax,(-1-2-4-8-16-8)(%rdi,%rdx)
-
-1: mov %rdi,%rax
- ret
-
-2: test $15,%edi
- mov %rdi,%r8
- mov %rax,-8(%rdi,%rdx)
- mov %rdx,%rcx
- jnz 2f
-
-1: shr $3,%rcx
- rep
- stosq
- mov %r8,%rax
- ret
-
-2: xor %edx,%edx
- sub %edi,%edx
- and $15,%edx
- mov %rax,(%rdi)
- mov %rax,8(%rdi)
- sub %rdx,%rcx
- add %rdx,%rdi
- jmp 1b
lib/libc/musl/src/string/memcmp.c
@@ -1,8 +0,0 @@
-#include <string.h>
-
-int memcmp(const void *vl, const void *vr, size_t n)
-{
- const unsigned char *l=vl, *r=vr;
- for (; n && *l == *r; n--, l++, r++);
- return n ? *l-*r : 0;
-}
lib/libc/musl/src/string/memset.c
@@ -1,90 +0,0 @@
-#include <string.h>
-#include <stdint.h>
-
-void *memset(void *dest, int c, size_t n)
-{
- unsigned char *s = dest;
- size_t k;
-
- /* Fill head and tail with minimal branching. Each
- * conditional ensures that all the subsequently used
- * offsets are well-defined and in the dest region. */
-
- if (!n) return dest;
- s[0] = c;
- s[n-1] = c;
- if (n <= 2) return dest;
- s[1] = c;
- s[2] = c;
- s[n-2] = c;
- s[n-3] = c;
- if (n <= 6) return dest;
- s[3] = c;
- s[n-4] = c;
- if (n <= 8) return dest;
-
- /* Advance pointer to align it at a 4-byte boundary,
- * and truncate n to a multiple of 4. The previous code
- * already took care of any head/tail that get cut off
- * by the alignment. */
-
- k = -(uintptr_t)s & 3;
- s += k;
- n -= k;
- n &= -4;
-
-#ifdef __GNUC__
- typedef uint32_t __attribute__((__may_alias__)) u32;
- typedef uint64_t __attribute__((__may_alias__)) u64;
-
- u32 c32 = ((u32)-1)/255 * (unsigned char)c;
-
- /* In preparation to copy 32 bytes at a time, aligned on
- * an 8-byte bounary, fill head/tail up to 28 bytes each.
- * As in the initial byte-based head/tail fill, each
- * conditional below ensures that the subsequent offsets
- * are valid (e.g. !(n<=24) implies n>=28). */
-
- *(u32 *)(s+0) = c32;
- *(u32 *)(s+n-4) = c32;
- if (n <= 8) return dest;
- *(u32 *)(s+4) = c32;
- *(u32 *)(s+8) = c32;
- *(u32 *)(s+n-12) = c32;
- *(u32 *)(s+n-8) = c32;
- if (n <= 24) return dest;
- *(u32 *)(s+12) = c32;
- *(u32 *)(s+16) = c32;
- *(u32 *)(s+20) = c32;
- *(u32 *)(s+24) = c32;
- *(u32 *)(s+n-28) = c32;
- *(u32 *)(s+n-24) = c32;
- *(u32 *)(s+n-20) = c32;
- *(u32 *)(s+n-16) = c32;
-
- /* Align to a multiple of 8 so we can fill 64 bits at a time,
- * and avoid writing the same bytes twice as much as is
- * practical without introducing additional branching. */
-
- k = 24 + ((uintptr_t)s & 4);
- s += k;
- n -= k;
-
- /* If this loop is reached, 28 tail bytes have already been
- * filled, so any remainder when n drops below 32 can be
- * safely ignored. */
-
- u64 c64 = c32 | ((u64)c32 << 32);
- for (; n >= 32; n-=32, s+=32) {
- *(u64 *)(s+0) = c64;
- *(u64 *)(s+8) = c64;
- *(u64 *)(s+16) = c64;
- *(u64 *)(s+24) = c64;
- }
-#else
- /* Pure C fallback with no aliasing violations. */
- for (; n; n--, s++) *s = c;
-#endif
-
- return dest;
-}
lib/libc/wasi/libc-top-half/musl/src/string/memcmp.c
@@ -1,43 +0,0 @@
-#include <string.h>
-
-#ifdef __wasm_simd128__
-#include <wasm_simd128.h>
-#endif
-
-int memcmp(const void *vl, const void *vr, size_t n)
-{
-#if defined(__wasm_simd128__) && defined(__wasilibc_simd_string)
- if (n >= sizeof(v128_t)) {
- // memcmp is allowed to read up to n bytes from each object.
- // Find the first different character in the objects.
- // Unaligned loads handle the case where the objects
- // have mismatching alignments.
- const v128_t *v1 = (v128_t *)vl;
- const v128_t *v2 = (v128_t *)vr;
- while (n) {
- const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(v1), wasm_v128_load(v2));
- // Bitmask is slow on AArch64, all_true is much faster.
- if (!wasm_i8x16_all_true(cmp)) {
- // Find the offset of the first zero bit (little-endian).
- size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
- const unsigned char *u1 = (unsigned char *)v1 + ctz;
- const unsigned char *u2 = (unsigned char *)v2 + ctz;
- // This may help the compiler if the function is inlined.
- __builtin_assume(*u1 - *u2 != 0);
- return *u1 - *u2;
- }
- // This makes n a multiple of sizeof(v128_t)
- // for every iteration except the first.
- size_t align = (n - 1) % sizeof(v128_t) + 1;
- v1 = (v128_t *)((char *)v1 + align);
- v2 = (v128_t *)((char *)v2 + align);
- n -= align;
- }
- return 0;
- }
-#endif
-
- const unsigned char *l=vl, *r=vr;
- for (; n && *l == *r; n--, l++, r++);
- return n ? *l-*r : 0;
-}
lib/libc/wasi/libc-top-half/musl/src/string/memset.c
@@ -1,94 +0,0 @@
-#include <string.h>
-#include <stdint.h>
-
-void *memset(void *dest, int c, size_t n)
-{
-#if defined(__wasm_bulk_memory__)
- if (n > BULK_MEMORY_THRESHOLD)
- return __builtin_memset(dest, c, n);
-#endif
- unsigned char *s = dest;
- size_t k;
-
- /* Fill head and tail with minimal branching. Each
- * conditional ensures that all the subsequently used
- * offsets are well-defined and in the dest region. */
-
- if (!n) return dest;
- s[0] = c;
- s[n-1] = c;
- if (n <= 2) return dest;
- s[1] = c;
- s[2] = c;
- s[n-2] = c;
- s[n-3] = c;
- if (n <= 6) return dest;
- s[3] = c;
- s[n-4] = c;
- if (n <= 8) return dest;
-
- /* Advance pointer to align it at a 4-byte boundary,
- * and truncate n to a multiple of 4. The previous code
- * already took care of any head/tail that get cut off
- * by the alignment. */
-
- k = -(uintptr_t)s & 3;
- s += k;
- n -= k;
- n &= -4;
-
-#ifdef __GNUC__
- typedef uint32_t __attribute__((__may_alias__)) u32;
- typedef uint64_t __attribute__((__may_alias__)) u64;
-
- u32 c32 = ((u32)-1)/255 * (unsigned char)c;
-
- /* In preparation to copy 32 bytes at a time, aligned on
- * an 8-byte bounary, fill head/tail up to 28 bytes each.
- * As in the initial byte-based head/tail fill, each
- * conditional below ensures that the subsequent offsets
- * are valid (e.g. !(n<=24) implies n>=28). */
-
- *(u32 *)(s+0) = c32;
- *(u32 *)(s+n-4) = c32;
- if (n <= 8) return dest;
- *(u32 *)(s+4) = c32;
- *(u32 *)(s+8) = c32;
- *(u32 *)(s+n-12) = c32;
- *(u32 *)(s+n-8) = c32;
- if (n <= 24) return dest;
- *(u32 *)(s+12) = c32;
- *(u32 *)(s+16) = c32;
- *(u32 *)(s+20) = c32;
- *(u32 *)(s+24) = c32;
- *(u32 *)(s+n-28) = c32;
- *(u32 *)(s+n-24) = c32;
- *(u32 *)(s+n-20) = c32;
- *(u32 *)(s+n-16) = c32;
-
- /* Align to a multiple of 8 so we can fill 64 bits at a time,
- * and avoid writing the same bytes twice as much as is
- * practical without introducing additional branching. */
-
- k = 24 + ((uintptr_t)s & 4);
- s += k;
- n -= k;
-
- /* If this loop is reached, 28 tail bytes have already been
- * filled, so any remainder when n drops below 32 can be
- * safely ignored. */
-
- u64 c64 = c32 | ((u64)c32 << 32);
- for (; n >= 32; n-=32, s+=32) {
- *(u64 *)(s+0) = c64;
- *(u64 *)(s+8) = c64;
- *(u64 *)(s+16) = c64;
- *(u64 *)(s+24) = c64;
- }
-#else
- /* Pure C fallback with no aliasing violations. */
- for (; n; n--, s++) *s = c;
-#endif
-
- return dest;
-}
src/libs/musl.zig
@@ -1786,20 +1786,15 @@ const src_files = [_][]const u8{
"musl/src/stdlib/strtol.c",
"musl/src/stdlib/wcstod.c",
"musl/src/stdlib/wcstol.c",
- "musl/src/string/aarch64/memset.S",
- "musl/src/string/arm/__aeabi_memset.s",
"musl/src/string/bcmp.c",
"musl/src/string/bcopy.c",
"musl/src/string/explicit_bzero.c",
- "musl/src/string/i386/memset.s",
"musl/src/string/index.c",
"musl/src/string/memccpy.c",
"musl/src/string/memchr.c",
- "musl/src/string/memcmp.c",
"musl/src/string/memmem.c",
"musl/src/string/mempcpy.c",
"musl/src/string/memrchr.c",
- "musl/src/string/memset.c",
"musl/src/string/rindex.c",
"musl/src/string/stpcpy.c",
"musl/src/string/stpncpy.c",
@@ -1855,7 +1850,6 @@ const src_files = [_][]const u8{
"musl/src/string/wmemcpy.c",
"musl/src/string/wmemmove.c",
"musl/src/string/wmemset.c",
- "musl/src/string/x86_64/memset.s",
"musl/src/temp/mkdtemp.c",
"musl/src/temp/mkostemp.c",
"musl/src/temp/mkostemps.c",
src/libs/wasi_libc.zig
@@ -1221,9 +1221,7 @@ const libc_top_half_src_files = [_][]const u8{
"wasi/libc-top-half/musl/src/stdlib/wcstod.c",
"wasi/libc-top-half/musl/src/stdlib/wcstol.c",
"wasi/libc-top-half/musl/src/string/memchr.c",
- "wasi/libc-top-half/musl/src/string/memcmp.c",
"wasi/libc-top-half/musl/src/string/memrchr.c",
- "wasi/libc-top-half/musl/src/string/memset.c",
"wasi/libc-top-half/musl/src/string/strchrnul.c",
"wasi/libc-top-half/musl/src/thread/pthread_attr_get.c",
"wasi/libc-top-half/musl/src/thread/pthread_attr_setguardsize.c",