summaryrefslogtreecommitdiff
path: root/rgo/src/memcpy.c
diff options
context:
space:
mode:
Diffstat (limited to 'rgo/src/memcpy.c')
-rw-r--r--rgo/src/memcpy.c141
1 files changed, 0 insertions, 141 deletions
diff --git a/rgo/src/memcpy.c b/rgo/src/memcpy.c
deleted file mode 100644
index 33ca41c..0000000
--- a/rgo/src/memcpy.c
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <rgo-priv.h>
-
-#include <stddef.h>
-#include <stdint.h>
-
-#if defined(rgo_priv_fastimpl)
-__asm__ (
- ".global rgo_memcpy\n"
-
- "rgo_memcpy:\n"
- /*
- void const * in
- size_t num
- void * out
- */
-#if defined(sus_arch_amd64)
- /* rdi: Address of the current input element. */
- /* rsi: Number of remaining elements. */
- /* rdx: Address of the current output element. */
- /* rcx: Current element. */
- /* xmm0: Current element. */
- /* ymm0: Current element. */
-#if defined(sus_archfeat_avx)
- ".big256cpy:\n"
- "cmpq $0x20,%rsi\n"
- "jl .big128cpy\n"
- "vmovups (%rdi),%ymm0\n"
- "vmovups %ymm0,(%rdx)\n"
- "addq $0x20,%rdi\n"
- "addq $0x20,%rdx\n"
- "subq $0x20,%rsi\n"
- "jmp .big256cpy\n"
-#endif
- ".big128cpy:\n"
- "cmpq $0x10,%rsi\n"
- "jl .wrdcpy\n"
- "movdqu (%rdi),%xmm0\n"
- "movdqu %xmm0,(%rdx)\n"
- "addq $0x10,%rdi\n"
- "addq $0x10,%rdx\n"
- "subq $0x10,%rsi\n"
- "jmp .big128cpy\n"
- ".wrdcpy:\n"
- "cmpq $0x8,%rsi\n"
- "jl .bytecpy\n"
- "movq (%rdi),%rcx\n"
- "movq %rcx,(%rdx)\n"
- "addq $0x8,%rdi\n"
- "addq $0x8,%rdx\n"
- "subq $0x8,%rsi\n"
- "jmp .wrdcpy\n"
- ".bytecpy:\n"
- "testq %rsi,%rsi\n"
- "jz .done\n"
- "movb (%rdi),%cl\n"
- "movb %cl,(%rdx)\n"
- "incq %rdi\n"
- "incq %rdx\n"
- "decq %rsi\n"
- "jmp .bytecpy\n"
- ".done:\n"
- "ret\n"
-#elif defined(sus_arch_ia32)
- /* eax: Address of the current input element. */
- "movl 0x4(%esp),%eax\n"
- /* ecx: Number of remaining elements. */
- "movl 0x8(%esp),%ecx\n"
- /* edx: Address of the current output element. */
- "movl 0xC(%esp),%edx\n"
- /* ebx: Current element. */
- "pushl %ebx\n" /* ebx must be restored. */
- /* xmm0: Current element. */
- /* ymm0: Current element. */
-#if defined(sus_archfeat_avx)
- ".big256cpy:\n"
- "cmpl $0x20,%ecx\n"
-#if defined(sus_archfeat_sse)
- "jl .big128cpy\n"
-#else
- "jl .wrdcpy\n"
-#endif
- "vmovdqu (%eax),%ymm0\n"
- "vmovdqu %ymm0,(%edx)\n"
- "addl $0x20,%eax\n"
- "addl $0x20,%edx\n"
- "subl $0x20,%ecx\n"
- "jmp .big256cpy\n"
-#endif
-#if defined(sus_archfeat_sse)
- ".big128cpy:\n"
- "cmpl $0x10,%ecx\n"
- "jl .wrdcpy\n"
-#if defined(sus_archfeat_sse2)
- "movdqu (%eax),%xmm0\n"
- "movdqu %xmm0,(%edx)\n"
-#else
- "movups (%eax),%xmm0\n"
- "movups %xmm0,(%edx)\n"
-#endif
- "addl $0x10,%eax\n"
- "addl $0x10,%edx\n"
- "subl $0x10,%ecx\n"
- "jmp .big128cpy\n"
-#endif
- ".wrdcpy:\n"
- "cmpl $0x4,%ecx\n"
- "jl .bytecpy\n"
- "movl (%eax),%ebx\n"
- "movl %ebx,(%edx)\n"
- "addl $0x4,%eax\n"
- "addl $0x4,%edx\n"
- "subl $0x4,%ecx\n"
- "jmp .wrdcpy\n"
- ".bytecpy:\n"
- "testl %ecx,%ecx\n"
- "jz .done\n"
- "movb (%eax),%bl\n"
- "movb %bl,(%edx)\n"
- "incl %eax\n"
- "incl %edx\n"
- "decl %ecx\n"
- "jmp .bytecpy\n"
- ".done:\n"
- "popl %ebx\n"
- "ret\n"
-#endif
-);
-#else
-void rgo_memcpy(void const * const _in,size_t const _num,void * const _out) {
- uint_least8_t const * in = (uint_least8_t const *)_in;
- uint_least8_t * out = (uint_least8_t *)_out;
- uint_least8_t const * const afterbuf = in + _num;
- for (;in != afterbuf;++in,++out) {*out = *in;}
-}
-#endif