diff options
Diffstat (limited to 'zap/src/mem/memcp.S')
-rw-r--r-- | zap/src/mem/memcp.S | 107 |
1 files changed, 0 insertions, 107 deletions
diff --git a/zap/src/mem/memcp.S b/zap/src/mem/memcp.S deleted file mode 100644 index ead0718..0000000 --- a/zap/src/mem/memcp.S +++ /dev/null @@ -1,107 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#include <zap/priv.h> - -.globl zap_memcp - -zap_memcp: - - /* - void const * in - zap_sz num - void * out - */ -#if defined(__amd64__) - - # rdi: Address of the current input element. - # rsi: Number of remaining elements. - # rdx: Address of the current output element. - # rcx: Current element. - # xmm0: Current element. - # ymm0: Current element. - -#if defined(__AVX__) - # AVX support 256-bit moves. - - # Copy 32 bytes: -.big20cp: - - # Check if there are at least 32 bytes remaining: - cmpq $0x20,%rsi - jl .big10cp # If not, skip to the 10 byte copying. - - # Copy: - vmovups (%rdi),%ymm0 # Move into a register. - vmovups %ymm0,(%rdx) # And then back into memory. - - # Continue: - addq $0x20,%rdi - addq $0x20,%rdx - subq $0x20,%rsi - jmp .big20cp - -#endif - - # AMD64 requires SSE(2). - - # Copy 16 bytes: -.big10cp: - - # Check if there are at least 16 bytes remaining: - cmpq $0x10,%rsi - jl .wrdcp - - # Copy: - movdqu (%rdi),%xmm0 - movdqu %xmm0,(%rdx) - - # Continue: - addq $0x10,%rdi - addq $0x10,%rdx - subq $0x10,%rsi - jmp .big10cp - - # Copy one word (8 bytes): -.wrdcp: - - # Check if there are at least 8 bytes remaining: - cmpq $0x8,%rsi - jl .bytecp - - # Copy: - movq (%rdi),%rcx - movq %rcx,(%rdx) - - # Continue: - addq $0x8,%rdi - addq $0x8,%rdx - subq $0x8,%rsi - jmp .wrdcp - - # Copy one byte: -.bytecp: - - # Check if we have any bytes remaining: - testq %rsi,%rsi - jz .done - - # Copy: - movb (%rdi),%cl - movb %cl,(%rdx) - - # Continue: - incq %rdi - incq %rdx - decq %rsi - jmp .bytecp - - # Finish: -.done: - - ret - -#endif |