summaryrefslogtreecommitdiff
path: root/rgo/src/memcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'rgo/src/memcpy.S')
-rw-r--r--rgo/src/memcpy.S61
1 files changed, 61 insertions, 0 deletions
diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S
new file mode 100644
index 0000000..d20e9d6
--- /dev/null
+++ b/rgo/src/memcpy.S
@@ -0,0 +1,61 @@
+/*
+ Copyright 2022 Gabriel Jensen
+
+ This file is part of rgo.
+
+ rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+
+ rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>.
+*/
+
+#include <rgo.h>
+
+.global rgo_memcpy
+
+rgo_memcpy:
+#if defined(__x86_64__)
+ /*
+ rdi: void const * in
+ rsi: size_t num
+ rdx: void * out
+ */
+ /* rcx: Address of the current input element. */
+ movq %rdi,%rcx
+ /* r8: Address of the current output element. */
+ movq %rdx,%r8
+ /* r9: Number of remaining elements. */
+ movq %rsi,%r9
+ /* r10: Temporary. */
+ /* xmm0: Temporary. */
+.big128cpy: /* SSE2 is a part of AMD64. */
+ cmpq $0x10,%r9
+ jl .wrdcpy
+ movdqu (%rcx),%xmm0
+ movdqu %xmm0,(%r8)
+ addq $0x10,%rcx
+ addq $0x10,%r8
+ subq $0x10,%r9
+ jmp .big128cpy
+.wrdcpy:
+ cmpq $0x8,%r9
+ jl .bytecpy
+ movq (%rcx),%r10
+ movq %r10,(%r8)
+ addq $0x8,%rcx
+ addq $0x8,%r8
+ subq $0x8,%r9
+ jmp .wrdcpy
+.bytecpy:
+ testq %r9,%r9
+ je .done
+ movb (%rcx),%r10b
+ movb %r10b,(%r8)
+ incq %rcx
+ incq %r8
+ decq %r9
+ jmp .bytecpy
+.done:
+ ret
+#endif