/*
Copyright 2022 Gabriel Jensen
This file is part of rgo.
rgo is free software: you can reaxstribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see .
*/
#include
.global rgo_memcpy
rgo_memcpy:
/*
void const * in
size_t num
void * out
*/
#if defined(__i386__)
/* eax: Address of the current input element. */
movl 0x4(%esp),%eax
/* ecx: Number of remaining elements. */
movl 0x8(%esp),%ecx
/* edx: Address of the current output element. */
movl 0xC(%esp),%edx
/* ebx: Current element. */
pushl %ebx /* ebx must be restored. */
/* xmm0: Current element. */
#if defined(__SSE__)
.big128cpy:
cmpl $0x10,%ecx
jl .wrdcpy
movups (%eax),%xmm0
movups %xmm0,(%edx)
addl $0x10,%eax
addl $0x10,%edx
subl $0x10,%ecx
jmp .big128cpy
#endif
.wrdcpy:
cmpl $0x4,%ecx
jl .bytecpy
movl (%eax),%ebx
movl %ebx,(%edx)
addl $0x4,%eax
addl $0x4,%edx
subl $0x4,%ecx
jmp .wrdcpy
.bytecpy:
testl %ecx,%ecx
jz .done
movb (%eax),%bl
movb %bl,(%edx)
incl %eax
incl %edx
decl %ecx
jmp .bytecpy
.done:
popl %ebx
ret
#elif defined(__x86_64__)
/* rdi: Address of the current input element. */
/* rsi: Number of remaining elements. */
/* rdx: Address of the current output element. */
/* rcx: Current element. */
/* xmm0: Current element. */
.big128cpy:
cmpq $0x10,%rsi
jl .wrdcpy
movups (%rdi),%xmm0
movups %xmm0,(%rdx)
addq $0x10,%rdi
addq $0x10,%rdx
subq $0x10,%rsi
jmp .big128cpy
.wrdcpy:
cmpq $0x8,%rsi
jl .bytecpy
movq (%rdi),%rcx
movq %rcx,(%rdx)
addq $0x8,%rdi
addq $0x8,%rdx
subq $0x8,%rsi
jmp .wrdcpy
.bytecpy:
testq %rsi,%rsi
jz .done
movb (%rdi),%cl
movb %cl,(%rdx)
incq %rdi
incq %rdx
decq %rsi
jmp .bytecpy
.done:
ret
#endif