diff options
Diffstat (limited to 'zap/src/memcpy.c')
-rw-r--r-- | zap/src/memcpy.c | 143 |
1 files changed, 0 insertions, 143 deletions
diff --git a/zap/src/memcpy.c b/zap/src/memcpy.c deleted file mode 100644 index ae923c3..0000000 --- a/zap/src/memcpy.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#include <zap/priv.h> - -#include <zap/mem.h> - -#include <stddef.h> -#include <stdint.h> - -#if zap_priv_fastimpl -__asm__ ( - ".globl zap_memcpy\n" - - "zap_memcpy:\n" - /* - void const * in - size_t num - void * out - */ -#if defined(sus_arch_amd64) - /* rdi: Address of the current input element. */ - /* rsi: Number of remaining elements. */ - /* rdx: Address of the current output element. */ - /* rcx: Current element. */ - /* xmm0: Current element. */ - /* ymm0: Current element. */ -#if defined(sus_archfeat_avx) - ".big256cpy:\n" - "cmpq $0x20,%rsi\n" - "jl .big128cpy\n" - "vmovups (%rdi),%ymm0\n" - "vmovups %ymm0,(%rdx)\n" - "addq $0x20,%rdi\n" - "addq $0x20,%rdx\n" - "subq $0x20,%rsi\n" - "jmp .big256cpy\n" -#endif - ".big128cpy:\n" - "cmpq $0x10,%rsi\n" - "jl .wrdcpy\n" - "movdqu (%rdi),%xmm0\n" - "movdqu %xmm0,(%rdx)\n" - "addq $0x10,%rdi\n" - "addq $0x10,%rdx\n" - "subq $0x10,%rsi\n" - "jmp .big128cpy\n" - ".wrdcpy:\n" - "cmpq $0x8,%rsi\n" - "jl .bytecpy\n" - "movq (%rdi),%rcx\n" - "movq %rcx,(%rdx)\n" - "addq $0x8,%rdi\n" - "addq $0x8,%rdx\n" - "subq $0x8,%rsi\n" - "jmp .wrdcpy\n" - ".bytecpy:\n" - "testq %rsi,%rsi\n" - "jz .done\n" - "movb (%rdi),%cl\n" - "movb %cl,(%rdx)\n" - "incq %rdi\n" - "incq %rdx\n" - "decq %rsi\n" - "jmp .bytecpy\n" - ".done:\n" - "ret\n" -#elif defined(sus_arch_ia32) - /* eax: Address of the current input element. */ - "movl 0x4(%esp),%eax\n" - /* ecx: Number of remaining elements. */ - "movl 0x8(%esp),%ecx\n" - /* edx: Address of the current output element. */ - "movl 0xC(%esp),%edx\n" - /* ebx: Current element. */ - "pushl %ebx\n" /* ebx must be restored. */ - /* xmm0: Current element. */ - /* ymm0: Current element. */ -#if defined(sus_archfeat_avx) - ".big256cpy:\n" - "cmpl $0x20,%ecx\n" -#if defined(sus_archfeat_sse) - "jl .big128cpy\n" -#else - "jl .wrdcpy\n" -#endif - "vmovdqu (%eax),%ymm0\n" - "vmovdqu %ymm0,(%edx)\n" - "addl $0x20,%eax\n" - "addl $0x20,%edx\n" - "subl $0x20,%ecx\n" - "jmp .big256cpy\n" -#endif -#if defined(sus_archfeat_sse) - ".big128cpy:\n" - "cmpl $0x10,%ecx\n" - "jl .wrdcpy\n" -#if defined(sus_archfeat_sse2) - "movdqu (%eax),%xmm0\n" - "movdqu %xmm0,(%edx)\n" -#else - "movups (%eax),%xmm0\n" - "movups %xmm0,(%edx)\n" -#endif - "addl $0x10,%eax\n" - "addl $0x10,%edx\n" - "subl $0x10,%ecx\n" - "jmp .big128cpy\n" -#endif - ".wrdcpy:\n" - "cmpl $0x4,%ecx\n" - "jl .bytecpy\n" - "movl (%eax),%ebx\n" - "movl %ebx,(%edx)\n" - "addl $0x4,%eax\n" - "addl $0x4,%edx\n" - "subl $0x4,%ecx\n" - "jmp .wrdcpy\n" - ".bytecpy:\n" - "testl %ecx,%ecx\n" - "jz .done\n" - "movb (%eax),%bl\n" - "movb %bl,(%edx)\n" - "incl %eax\n" - "incl %edx\n" - "decl %ecx\n" - "jmp .bytecpy\n" - ".done:\n" - "popl %ebx\n" - "ret\n" -#endif -); -#else -void zap_memcpy(void const * const _in,size_t const _num,void * const _out) { - unsigned char const * in = _in; - unsigned char * out = _out; - unsigned char const * const afterbuf = in + _num; - for (;in != afterbuf;++in,++out) {*out = *in;} -} -#endif |