1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
/*
Copyright 2022 Gabriel Jensen
This file is part of rgo.
rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>.
*/
#include <rgo.h>
.global rgo_memcpy
rgo_memcpy:
#if defined(__x86_64__)
/*
rdi: void const * in
rsi: size_t num
rdx: void * out
*/
/* rcx: Address of the current input element. */
movq %rdi,%rcx
/* r8: Address of the current output element. */
movq %rdx,%r8
/* r9: Number of remaining elements. */
movq %rsi,%r9
/* r10: Temporary. */
/* xmm0: Temporary. */
.big128cpy:
cmpq $0x10,%r9
jl .wrdcpy
movups (%rcx),%xmm0
movups %xmm0,(%r8)
addq $0x10,%rcx
addq $0x10,%r8
subq $0x10,%r9
jmp .big128cpy
.wrdcpy:
cmpq $0x8,%r9
jl .bytecpy
movq (%rcx),%r10
movq %r10,(%r8)
addq $0x8,%rcx
addq $0x8,%r8
subq $0x8,%r9
jmp .wrdcpy
.bytecpy:
testq %r9,%r9
jz .done
movb (%rcx),%r10b
movb %r10b,(%r8)
incq %rcx
incq %r8
decq %r9
jmp .bytecpy
.done:
ret
#endif
|