/*
Copyright 2022 Gabriel Jensen
This file is part of rgo.
rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see .
*/
#include
.global rgo_memcpy
rgo_memcpy:
#if defined(__x86_64__)
/*
rdi: void const * in
rsi: size_t num
rdx: void * out
*/
/* rcx: Address of the current input element. */
movq %rdi,%rcx
/* r8: Address of the current output element. */
movq %rdx,%r8
/* r9: Number of remaining elements. */
movq %rsi,%r9
/* r10: Temporary. */
/* xmm0: Temporary. */
.big128cpy:
cmpq $0x10,%r9
jl .wrdcpy
movups (%rcx),%xmm0
movups %xmm0,(%r8)
addq $0x10,%rcx
addq $0x10,%r8
subq $0x10,%r9
jmp .big128cpy
.wrdcpy:
cmpq $0x8,%r9
jl .bytecpy
movq (%rcx),%r10
movq %r10,(%r8)
addq $0x8,%rcx
addq $0x8,%r8
subq $0x8,%r9
jmp .wrdcpy
.bytecpy:
testq %r9,%r9
jz .done
movb (%rcx),%r10b
movb %r10b,(%r8)
incq %rcx
incq %r8
decq %r9
jmp .bytecpy
.done:
ret
#endif