summaryrefslogtreecommitdiff
path: root/rgo/src/memcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'rgo/src/memcpy.S')
-rw-r--r--rgo/src/memcpy.S127
1 files changed, 0 insertions, 127 deletions
diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S
deleted file mode 100644
index fefa614..0000000
--- a/rgo/src/memcpy.S
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <rgo.h>
-
-.global rgo_memcpy
-
-rgo_memcpy:
- /*
- void const * in
- size_t num
- void * out
- */
-#if defined(__i386__)
- /* eax: Address of the current input element. */
- movl 0x4(%esp),%eax
- /* ecx: Number of remaining elements. */
- movl 0x8(%esp),%ecx
- /* edx: Address of the current output element. */
- movl 0xC(%esp),%edx
- /* ebx: Current element. */
- pushl %ebx /* ebx must be restored. */
- /* xmm0: Current element. */
- /* ymm0: Current element. */
-#if defined(__AVX__)
-.big256cpy:
- cmpl $0x20,%ecx
-#if defined(__SSE__)
- jl .big128cpy
-#else
- jl .wrdcpy
-#endif
- vmovdqu (%eax),%ymm0
- vmovdqu %ymm0,(%edx)
- addl $0x20,%eax
- addl $0x20,%edx
- subl $0x20,%ecx
- jmp .big256cpy
-#endif
-#if defined(__SSE__)
-.big128cpy:
- cmpl $0x10,%ecx
- jl .wrdcpy
-#if defined(__SSE2__)
- movdqu (%eax),%xmm0
- movdqu %xmm0,(%edx)
-#else
- movups (%eax),%xmm0
- movups %xmm0,(%edx)
-#endif
- addl $0x10,%eax
- addl $0x10,%edx
- subl $0x10,%ecx
- jmp .big128cpy
-#endif
-.wrdcpy:
- cmpl $0x4,%ecx
- jl .bytecpy
- movl (%eax),%ebx
- movl %ebx,(%edx)
- addl $0x4,%eax
- addl $0x4,%edx
- subl $0x4,%ecx
- jmp .wrdcpy
-.bytecpy:
- testl %ecx,%ecx
- jz .done
- movb (%eax),%bl
- movb %bl,(%edx)
- incl %eax
- incl %edx
- decl %ecx
- jmp .bytecpy
-.done:
- popl %ebx
- ret
-#elif defined(__x86_64__)
- /* rdi: Address of the current input element. */
- /* rsi: Number of remaining elements. */
- /* rdx: Address of the current output element. */
- /* rcx: Current element. */
- /* xmm0: Current element. */
- /* ymm0: Current element. */
-#if defined(__AVX__)
-.big256cpy:
- cmpq $0x20,%rsi
- jl .big128cpy
- vmovups (%rdi),%ymm0
- vmovups %ymm0,(%rdx)
- addq $0x20,%rdi
- addq $0x20,%rdx
- subq $0x20,%rsi
- jmp .big256cpy
-#endif
-.big128cpy:
- cmpq $0x10,%rsi
- jl .wrdcpy
- movdqu (%rdi),%xmm0
- movdqu %xmm0,(%rdx)
- addq $0x10,%rdi
- addq $0x10,%rdx
- subq $0x10,%rsi
- jmp .big128cpy
-.wrdcpy:
- cmpq $0x8,%rsi
- jl .bytecpy
- movq (%rdi),%rcx
- movq %rcx,(%rdx)
- addq $0x8,%rdi
- addq $0x8,%rdx
- subq $0x8,%rsi
- jmp .wrdcpy
-.bytecpy:
- testq %rsi,%rsi
- jz .done
- movb (%rdi),%cl
- movb %cl,(%rdx)
- incq %rdi
- incq %rdx
- decq %rsi
- jmp .bytecpy
-.done:
- ret
-#endif