summaryrefslogtreecommitdiff
path: root/zap/src/memcpy.c
diff options
context:
space:
mode:
Diffstat (limited to 'zap/src/memcpy.c')
-rw-r--r--zap/src/memcpy.c143
1 files changed, 0 insertions, 143 deletions
diff --git a/zap/src/memcpy.c b/zap/src/memcpy.c
deleted file mode 100644
index ae923c3..0000000
--- a/zap/src/memcpy.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <zap/priv.h>
-
-#include <zap/mem.h>
-
-#include <stddef.h>
-#include <stdint.h>
-
-#if zap_priv_fastimpl
-__asm__ (
- ".globl zap_memcpy\n"
-
- "zap_memcpy:\n"
- /*
- void const * in
- size_t num
- void * out
- */
-#if defined(sus_arch_amd64)
- /* rdi: Address of the current input element. */
- /* rsi: Number of remaining elements. */
- /* rdx: Address of the current output element. */
- /* rcx: Current element. */
- /* xmm0: Current element. */
- /* ymm0: Current element. */
-#if defined(sus_archfeat_avx)
- ".big256cpy:\n"
- "cmpq $0x20,%rsi\n"
- "jl .big128cpy\n"
- "vmovups (%rdi),%ymm0\n"
- "vmovups %ymm0,(%rdx)\n"
- "addq $0x20,%rdi\n"
- "addq $0x20,%rdx\n"
- "subq $0x20,%rsi\n"
- "jmp .big256cpy\n"
-#endif
- ".big128cpy:\n"
- "cmpq $0x10,%rsi\n"
- "jl .wrdcpy\n"
- "movdqu (%rdi),%xmm0\n"
- "movdqu %xmm0,(%rdx)\n"
- "addq $0x10,%rdi\n"
- "addq $0x10,%rdx\n"
- "subq $0x10,%rsi\n"
- "jmp .big128cpy\n"
- ".wrdcpy:\n"
- "cmpq $0x8,%rsi\n"
- "jl .bytecpy\n"
- "movq (%rdi),%rcx\n"
- "movq %rcx,(%rdx)\n"
- "addq $0x8,%rdi\n"
- "addq $0x8,%rdx\n"
- "subq $0x8,%rsi\n"
- "jmp .wrdcpy\n"
- ".bytecpy:\n"
- "testq %rsi,%rsi\n"
- "jz .done\n"
- "movb (%rdi),%cl\n"
- "movb %cl,(%rdx)\n"
- "incq %rdi\n"
- "incq %rdx\n"
- "decq %rsi\n"
- "jmp .bytecpy\n"
- ".done:\n"
- "ret\n"
-#elif defined(sus_arch_ia32)
- /* eax: Address of the current input element. */
- "movl 0x4(%esp),%eax\n"
- /* ecx: Number of remaining elements. */
- "movl 0x8(%esp),%ecx\n"
- /* edx: Address of the current output element. */
- "movl 0xC(%esp),%edx\n"
- /* ebx: Current element. */
- "pushl %ebx\n" /* ebx must be restored. */
- /* xmm0: Current element. */
- /* ymm0: Current element. */
-#if defined(sus_archfeat_avx)
- ".big256cpy:\n"
- "cmpl $0x20,%ecx\n"
-#if defined(sus_archfeat_sse)
- "jl .big128cpy\n"
-#else
- "jl .wrdcpy\n"
-#endif
- "vmovdqu (%eax),%ymm0\n"
- "vmovdqu %ymm0,(%edx)\n"
- "addl $0x20,%eax\n"
- "addl $0x20,%edx\n"
- "subl $0x20,%ecx\n"
- "jmp .big256cpy\n"
-#endif
-#if defined(sus_archfeat_sse)
- ".big128cpy:\n"
- "cmpl $0x10,%ecx\n"
- "jl .wrdcpy\n"
-#if defined(sus_archfeat_sse2)
- "movdqu (%eax),%xmm0\n"
- "movdqu %xmm0,(%edx)\n"
-#else
- "movups (%eax),%xmm0\n"
- "movups %xmm0,(%edx)\n"
-#endif
- "addl $0x10,%eax\n"
- "addl $0x10,%edx\n"
- "subl $0x10,%ecx\n"
- "jmp .big128cpy\n"
-#endif
- ".wrdcpy:\n"
- "cmpl $0x4,%ecx\n"
- "jl .bytecpy\n"
- "movl (%eax),%ebx\n"
- "movl %ebx,(%edx)\n"
- "addl $0x4,%eax\n"
- "addl $0x4,%edx\n"
- "subl $0x4,%ecx\n"
- "jmp .wrdcpy\n"
- ".bytecpy:\n"
- "testl %ecx,%ecx\n"
- "jz .done\n"
- "movb (%eax),%bl\n"
- "movb %bl,(%edx)\n"
- "incl %eax\n"
- "incl %edx\n"
- "decl %ecx\n"
- "jmp .bytecpy\n"
- ".done:\n"
- "popl %ebx\n"
- "ret\n"
-#endif
-);
-#else
-void zap_memcpy(void const * const _in,size_t const _num,void * const _out) {
- unsigned char const * in = _in;
- unsigned char * out = _out;
- unsigned char const * const afterbuf = in + _num;
- for (;in != afterbuf;++in,++out) {*out = *in;}
-}
-#endif