diff options
Diffstat (limited to 'zap/source/amd64/mem/memcp.S')
-rw-r--r-- | zap/source/amd64/mem/memcp.S | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/zap/source/amd64/mem/memcp.S b/zap/source/amd64/mem/memcp.S new file mode 100644 index 0000000..5691446 --- /dev/null +++ b/zap/source/amd64/mem/memcp.S @@ -0,0 +1,94 @@ +# Copyright 2022 Gabriel Jensen. +# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. + +.globl zap_memcp + +zap_memcp: + # rdi: Address of the current input element. + # rsi: Number of remaining elements. + # rdx: Address of the current output element. + # rcx: Current element. + # xmm0: Current element. + # ymm0: Current element. + +#if defined(__AVX__) + # AVX support 256-bit moves. + + # Copy 32 bytes: +.big20cp: + + # Check if there are at least 32 bytes remaining: + cmpq $0x20,%rsi + jl .big10cp # If not, skip to the 10 byte copying. + + # Copy: + vmovups (%rdi),%ymm0 # Move into a register. + vmovups %ymm0,(%rdx) # And then back into memory. + + # Continue: + addq $0x20,%rdi + addq $0x20,%rdx + subq $0x20,%rsi + jmp .big20cp + +#endif + + # AMD64 requires SSE(2). + + # Copy 16 bytes: +.big10cp: + + # Check if there are at least 16 bytes remaining: + cmpq $0x10,%rsi + jl .wrdcp + + # Copy: + movdqu (%rdi),%xmm0 + movdqu %xmm0,(%rdx) + + # Continue: + addq $0x10,%rdi + addq $0x10,%rdx + subq $0x10,%rsi + jmp .big10cp + + # Copy one word (8 bytes): +.wrdcp: + + # Check if there are at least 8 bytes remaining: + cmpq $0x8,%rsi + jl .bytecp + + # Copy: + movq (%rdi),%rcx + movq %rcx,(%rdx) + + # Continue: + addq $0x8,%rdi + addq $0x8,%rdx + subq $0x8,%rsi + jmp .wrdcp + + # Copy one byte: +.bytecp: + + # Check if we have any bytes remaining: + testq %rsi,%rsi + jz .done + + # Copy: + movb (%rdi),%cl + movb %cl,(%rdx) + + # Continue: + incq %rdi + incq %rdx + decq %rsi + jmp .bytecp + + # Finish: +.done: + + ret +
\ No newline at end of file |