# Copyright 2022-2023 Gabriel Jensen.
# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>.
.intel_syntax noprefix
.globl zap_cp
zap_cp:
# zap_i8 val1;
# zap_i04 val8;
# unsigned int128_t val01;
# unsigned int256_t val02;
.big02cp: # // We assume AVX.
cmp rdx,0x20
jl short .big01cp # if (num < 0x20u) goto big01cp;
vmovups ymm0,[rsi] # val02 = *src;
vmovups [rdi],ymm0 # *dest = val02;
add rsi,0x20 # dest += 0x20u;
add rdi,0x20 # src += 0x20u;
sub rdx,0x20 # num -= 0x20u;
jmp short .big02cp # goto big02cp;
.big01cp: # big01cp:;
cmp rdx,0x10
jl short .wrdcp # if (num < 0x10u) goto wrdcp;
movdqu xmm0,[rsi] # val01 = *src;
movdqu [rdi],xmm0 # *dest = val01;
add rsi,0x10 # dest += 0x10u;
add rdi,0x10 # src += 0x10u;
sub rdx,0x10 # num -= 0x10u;
jmp short .big01cp # goto big01cp;
.wrdcp: # wrdcp:;
cmp rdx,0x8
jl short .bytecp # if (num < 0x8u) goto bytecp;
mov rcx,[rsi] # val8 = *src;
mov [rdi],rcx # *dest = val8;
add rdi,0x8 # dest += 0x8u;
add rsi,0x8 # src += 0x8u;
sub rdx,0x8 # num -= 0x8u;
jmp short .wrdcp # goto wrdcp
.bytecp: # bytecp:;
test rdx,rdx # if (rem == 0x0)
jz short .done # goto done
mov cl,[rsi] # val1 = *src;
mov [rsi],cl # *dest = val1;
inc rdi # ++dest;
inc rsi # ++src;
dec rdx # --rem;
jmp short .bytecp # goto bytecp;
.done:
ret # return