# Copyright 2022-2023 Gabriel Jensen. # This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. # If a copy of the MPL was not distributed with this file, You can obtain one at . .intel_syntax noprefix .globl zap_cp zap_cp: # zap_i8 val1; # zap_i04 val8; # unsigned int128_t val01; # unsigned int256_t val02; .big02cp: # big02cp:; // We assume AVX. cmp rdx,0x20 jl short .big01cp # if (rem < 0x20u) goto big01cp; vmovdqu ymm0,[rsi] # val02 = *(unsigned int256_t *)src; vmovdqu [rdi],ymm0 # *(unsigned int256_t *)dest = val02; add rdi,0x20 # dest += 0x20u; add rsi,0x20 # src += 0x20u; sub rdx,0x20 # rem -= 0x20u; jmp short .big02cp # goto big02cp; .big01cp: # big01cp:; cmp rdx,0x10 jl short .wrdcp # if (rem < 0x10u) goto wrdcp; movdqu xmm0,[rsi] # val01 = *(unsigned int128_t *)src; movdqu [rdi],xmm0 # *(unsigned int128_t *)dest = val01; add rdi,0x10 # dest += 0x10u; add rsi,0x10 # src += 0x10u; sub rdx,0x10 # rem -= 0x10u; jmp short .big01cp # goto big01cp; .wrdcp: # wrdcp:; cmp rdx,0x8 jl short .bytecp # if (rem < 0x8u) goto bytecp; mov rcx,[rsi] # val8 = *(zap_i04 *)src; mov [rdi],rcx # *(zap_i04 *)dest = val8; add rsi,0x8 # dest += 0x8u; add rdi,0x8 # src += 0x8u; sub rdx,0x8 # rem -= 0x8u; jmp short .wrdcp # goto wrdcp .bytecp: # bytecp:; test rdx,rdx # if (rem == 0x0) jz short .done # goto done mov cl,[rsi] # val1 = *(zap_i8 *)src; mov [rdi],cl # *(zap_i8 *)dest = val1; inc rdi # ++dest; inc rsi # ++src; dec rdx # --rem; jmp short .bytecp # goto bytecp; .done: mov rax,rdi mov rdx,rsi ret # return (zap_cpret) {.dest = dest,.src = src};