# Copyright 2022 Gabriel Jensen. # This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. # If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. .globl zap_memcp zap_memcp: # Address of the current input element: #define iaddr %rdi # Number of remaining bytes: #define rem %rsi # Address of the current output element: #define oaddr %rdx # Current element: #define val1 %cl #define val8 %rcx #define val01 %xmm0 #define val02 %ymm0 #if defined(__AVX__) # AVX support 256-bit moves. # Copy 32 bytes: .big02cp: # Check if there are at least 32 bytes remaining: cmpq $0x20,rem # if (rem < 20) jl .big01cp # goto big01cp // If not, skip to the 10 byte copying. # Copy: vmovups (iaddr),val02 # val02 = *iaddr vmovups val02,(oaddr) # *oaddr = val02 # Continue: addq $0x20,iaddr # iaddr += 0x20 addq $0x20,oaddr # oaddr += 0x20 subq $0x20,rem # rem -= 0x20 jmp .big02cp # goto big02cp #endif # AMD64 requires SSE(2), so we don't have to test it. # Copy 16 bytes: .big01cp: # Check if there are at least 16 bytes remaining: cmpq $0x10,rem # if (rem < 10) jl .wrdcp # goto wrdcp # Copy: movdqu (iaddr),val01 # val01 = *iaddr movdqu val01,(oaddr) # *oaddr = val01 # Continue: addq $0x10,iaddr # iaddr += 0x10 addq $0x10,oaddr # oaddr += 0x10 subq $0x10,rem # rem -= 0x10 jmp .big01cp # goto big01cp # Copy one word (8 bytes): .wrdcp: # Check if there are at least 8 bytes remaining: cmpq $0x8,rem # if (rem < 8) jl .bytecp # goto bytecp # Copy: movq (iaddr),val8 # val8 = *iaddr movq val8,(oaddr) # *oaddr = val8 # Continue: addq $0x8,iaddr # iaddr += 0x8 addq $0x8,oaddr # oaddr += 0x8 subq $0x8,rem # rem -= 0x8 jmp .wrdcp # goto wrdcp # Copy one byte: .bytecp: # Check if we have any bytes remaining: testq rem,rem # if (rem == 0x0) jz .done # goto done # Copy: movb (iaddr),val1 # val1 = *iaddr movb val1,(oaddr) # *oaddr = val1 # Continue: incq iaddr # ++iaddr incq oaddr # ++oaddr decq rem # --rem jmp .bytecp # goto bytecp # Return: .done: ret # return