1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
# Copyright 2022-2023 Gabriel Jensen.
# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>.
.intel_syntax noprefix
.globl zap_cp
zap_cp:
# zap_i8 val1;
# zap_i04 val8;
# unsigned int128_t val01;
# unsigned int256_t val02;
.big02cp: # big02cp:; // We assume AVX.
cmp rdx,0x20
jl short .big01cp # if (num < 0x20u) goto big01cp;
vmovdqu ymm0,[rsi] # val02 = *(unsigned int256_t *)src;
vmovdqu [rdi],ymm0 # *(unsigned int256_t *)dest = val02;
add rdi,0x20 # dest += 0x20u;
add rsi,0x20 # src += 0x20u;
sub rdx,0x20 # num -= 0x20u;
jmp short .big02cp # goto big02cp;
.big01cp: # big01cp:;
cmp rdx,0x10
jl short .wrdcp # if (num < 0x10u) goto wrdcp;
movdqu xmm0,[rsi] # val01 = *(unsigned int128_t *)src;
movdqu [rdi],xmm0 # *(unsigned int128_t *)dest = val01;
add rdi,0x10 # dest += 0x10u;
add rsi,0x10 # src += 0x10u;
sub rdx,0x10 # num -= 0x10u;
jmp short .big01cp # goto big01cp;
.wrdcp: # wrdcp:;
cmp rdx,0x8
jl short .bytecp # if (num < 0x8u) goto bytecp;
mov rcx,[rsi] # val8 = *(zap_i04 *)src;
mov [rdi],rcx # *(zap_i04 *)dest = val8;
add rsi,0x8 # dest += 0x8u;
add rdi,0x8 # src += 0x8u;
sub rdx,0x8 # num -= 0x8u;
jmp short .wrdcp # goto wrdcp
.bytecp: # bytecp:;
test rdx,rdx # if (rem == 0x0)
jz short .done # goto done
mov cl,[rsi] # val1 = *(zap_i8 *)src;
mov [rdi],cl # *(zap_i8 *)dest = val1;
inc rdi # ++dest;
inc rsi # ++src;
dec rdx # --rem;
jmp short .bytecp # goto bytecp;
.done:
mov rax,rdi
ret # return dest;
|