summaryrefslogtreecommitdiff
path: root/zap/source/amd64/mem/cp.s
blob: 3c9f2e175b5fe61c89664397e82664bba3d1b17b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Copyright 2022-2023 Gabriel Jensen.
# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>.

.intel_syntax noprefix

.globl zap_cp

zap_cp:
	                       # zap_i8 val1;
	                       # zap_i04 val8;
	                       # unsigned int128_t val01;
	                       # unsigned int256_t val02;

.big02cp:                  # big02cp:; // We assume AVX.
	cmp     rdx,0x20
	jl      short .big01cp # if (num < 0x20u) goto big01cp;

	vmovdqu ymm0,[rsi]     # val02 = *(unsigned int256_t *)src;
	vmovdqu [rdi],ymm0     # *(unsigned int256_t *)dest = val02;

	add     rdi,0x20       # dest += 0x20u;
	add     rsi,0x20       # src += 0x20u;
	sub     rdx,0x20       # num -= 0x20u;
	jmp     short .big02cp # goto big02cp;

.big01cp:                  # big01cp:;
	cmp     rdx,0x10
	jl      short .wrdcp   # if (num < 0x10u) goto wrdcp;

	movdqu  xmm0,[rsi]     # val01 = *(unsigned int128_t *)src;
	movdqu  [rdi],xmm0     # *(unsigned int128_t *)dest = val01;

	add     rdi,0x10       # dest += 0x10u;
	add     rsi,0x10       # src += 0x10u;
	sub     rdx,0x10       # num -= 0x10u;
	jmp     short .big01cp # goto big01cp;

.wrdcp:                    # wrdcp:;
	cmp     rdx,0x8
	jl      short .bytecp  # if (num < 0x8u) goto bytecp;

	mov     rcx,[rsi]      # val8 = *(zap_i04 *)src;
	mov     [rdi],rcx      # *(zap_i04 *)dest = val8;

	add     rsi,0x8        # dest += 0x8u;
	add     rdi,0x8        # src += 0x8u;
	sub     rdx,0x8        # num -= 0x8u;
	jmp     short .wrdcp   # goto wrdcp

.bytecp:                   # bytecp:;
	test    rdx,rdx        # if (rem == 0x0)
	jz      short .done    # goto done

	mov     cl,[rsi]       # val1 = *(zap_i8 *)src;
	mov     [rdi],cl       # *(zap_i8 *)dest = val1;

	inc      rdi           # ++dest;
	inc      rsi           # ++src;
	dec      rdx           # --rem;
	jmp      short .bytecp # goto bytecp;

.done:
	mov      rax,rdi
	ret                    # return dest;