diff options
Diffstat (limited to 'rgo/src/memcpy.S')
-rw-r--r-- | rgo/src/memcpy.S | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S new file mode 100644 index 0000000..d20e9d6 --- /dev/null +++ b/rgo/src/memcpy.S @@ -0,0 +1,61 @@ +/* + Copyright 2022 Gabriel Jensen + + This file is part of rgo. + + rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>. +*/ + +#include <rgo.h> + +.global rgo_memcpy + +rgo_memcpy: +#if defined(__x86_64__) + /* + rdi: void const * in + rsi: size_t num + rdx: void * out + */ + /* rcx: Address of the current input element. */ + movq %rdi,%rcx + /* r8: Address of the current output element. */ + movq %rdx,%r8 + /* r9: Number of remaining elements. */ + movq %rsi,%r9 + /* r10: Temporary. */ + /* xmm0: Temporary. */ +.big128cpy: /* SSE2 is a part of AMD64. */ + cmpq $0x10,%r9 + jl .wrdcpy + movdqu (%rcx),%xmm0 + movdqu %xmm0,(%r8) + addq $0x10,%rcx + addq $0x10,%r8 + subq $0x10,%r9 + jmp .big128cpy +.wrdcpy: + cmpq $0x8,%r9 + jl .bytecpy + movq (%rcx),%r10 + movq %r10,(%r8) + addq $0x8,%rcx + addq $0x8,%r8 + subq $0x8,%r9 + jmp .wrdcpy +.bytecpy: + testq %r9,%r9 + je .done + movb (%rcx),%r10b + movb %r10b,(%r8) + incq %rcx + incq %r8 + decq %r9 + jmp .bytecpy +.done: + ret +#endif |