diff options
Diffstat (limited to 'rgo/src/memcpy.S')
-rw-r--r-- | rgo/src/memcpy.S | 127 |
1 files changed, 0 insertions, 127 deletions
diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S deleted file mode 100644 index fefa614..0000000 --- a/rgo/src/memcpy.S +++ /dev/null @@ -1,127 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#include <rgo.h> - -.global rgo_memcpy - -rgo_memcpy: - /* - void const * in - size_t num - void * out - */ -#if defined(__i386__) - /* eax: Address of the current input element. */ - movl 0x4(%esp),%eax - /* ecx: Number of remaining elements. */ - movl 0x8(%esp),%ecx - /* edx: Address of the current output element. */ - movl 0xC(%esp),%edx - /* ebx: Current element. */ - pushl %ebx /* ebx must be restored. */ - /* xmm0: Current element. */ - /* ymm0: Current element. */ -#if defined(__AVX__) -.big256cpy: - cmpl $0x20,%ecx -#if defined(__SSE__) - jl .big128cpy -#else - jl .wrdcpy -#endif - vmovdqu (%eax),%ymm0 - vmovdqu %ymm0,(%edx) - addl $0x20,%eax - addl $0x20,%edx - subl $0x20,%ecx - jmp .big256cpy -#endif -#if defined(__SSE__) -.big128cpy: - cmpl $0x10,%ecx - jl .wrdcpy -#if defined(__SSE2__) - movdqu (%eax),%xmm0 - movdqu %xmm0,(%edx) -#else - movups (%eax),%xmm0 - movups %xmm0,(%edx) -#endif - addl $0x10,%eax - addl $0x10,%edx - subl $0x10,%ecx - jmp .big128cpy -#endif -.wrdcpy: - cmpl $0x4,%ecx - jl .bytecpy - movl (%eax),%ebx - movl %ebx,(%edx) - addl $0x4,%eax - addl $0x4,%edx - subl $0x4,%ecx - jmp .wrdcpy -.bytecpy: - testl %ecx,%ecx - jz .done - movb (%eax),%bl - movb %bl,(%edx) - incl %eax - incl %edx - decl %ecx - jmp .bytecpy -.done: - popl %ebx - ret -#elif defined(__x86_64__) - /* rdi: Address of the current input element. */ - /* rsi: Number of remaining elements. */ - /* rdx: Address of the current output element. */ - /* rcx: Current element. */ - /* xmm0: Current element. */ - /* ymm0: Current element. */ -#if defined(__AVX__) -.big256cpy: - cmpq $0x20,%rsi - jl .big128cpy - vmovups (%rdi),%ymm0 - vmovups %ymm0,(%rdx) - addq $0x20,%rdi - addq $0x20,%rdx - subq $0x20,%rsi - jmp .big256cpy -#endif -.big128cpy: - cmpq $0x10,%rsi - jl .wrdcpy - movdqu (%rdi),%xmm0 - movdqu %xmm0,(%rdx) - addq $0x10,%rdi - addq $0x10,%rdx - subq $0x10,%rsi - jmp .big128cpy -.wrdcpy: - cmpq $0x8,%rsi - jl .bytecpy - movq (%rdi),%rcx - movq %rcx,(%rdx) - addq $0x8,%rdi - addq $0x8,%rdx - subq $0x8,%rsi - jmp .wrdcpy -.bytecpy: - testq %rsi,%rsi - jz .done - movb (%rdi),%cl - movb %cl,(%rdx) - incq %rdi - incq %rdx - decq %rsi - jmp .bytecpy -.done: - ret -#endif |