# Copyright 2022 Gabriel Jensen. # This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. # If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. .globl zap_memcp zap_memcp: # rdi: Address of the current input element. # rsi: Number of remaining elements. # rdx: Address of the current output element. # rcx: Current element. # xmm0: Current element. # ymm0: Current element. #if defined(__AVX__) # AVX support 256-bit moves. # Copy 32 bytes: .big20cp: # Check if there are at least 32 bytes remaining: cmpq $0x20,%rsi jl .big10cp # If not, skip to the 10 byte copying. # Copy: vmovups (%rdi),%ymm0 # Move into a register. vmovups %ymm0,(%rdx) # And then back into memory. # Continue: addq $0x20,%rdi addq $0x20,%rdx subq $0x20,%rsi jmp .big20cp #endif # AMD64 requires SSE(2). # Copy 16 bytes: .big10cp: # Check if there are at least 16 bytes remaining: cmpq $0x10,%rsi jl .wrdcp # Copy: movdqu (%rdi),%xmm0 movdqu %xmm0,(%rdx) # Continue: addq $0x10,%rdi addq $0x10,%rdx subq $0x10,%rsi jmp .big10cp # Copy one word (8 bytes): .wrdcp: # Check if there are at least 8 bytes remaining: cmpq $0x8,%rsi jl .bytecp # Copy: movq (%rdi),%rcx movq %rcx,(%rdx) # Continue: addq $0x8,%rdi addq $0x8,%rdx subq $0x8,%rsi jmp .wrdcp # Copy one byte: .bytecp: # Check if we have any bytes remaining: testq %rsi,%rsi jz .done # Copy: movb (%rdi),%cl movb %cl,(%rdx) # Continue: incq %rdi incq %rdx decq %rsi jmp .bytecp # Finish: .done: ret