summaryrefslogblamecommitdiff
path: root/rgo/src/memcpy.S
blob: d20e9d638421753bb110501c1902ff752e2b32e3 (plain) (tree)




























































                                                                                                                                                                                                                                                 
/*
	Copyright 2022 Gabriel Jensen

	This file is part of rgo.

    rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

    rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>. 
*/

#include <rgo.h>

.global rgo_memcpy

rgo_memcpy:
#if defined(__x86_64__)
	/*
		rdi: void const * in
		rsi: size_t       num
		rdx: void *       out
	*/
	/* rcx: Address of the current input element. */
	movq %rdi,%rcx
	/* r8: Address of the current output element. */
	movq %rdx,%r8
	/* r9: Number of remaining elements. */
	movq %rsi,%r9
	/* r10: Temporary. */
	/* xmm0: Temporary. */
.big128cpy: /* SSE2 is a part of AMD64. */
	cmpq $0x10,%r9
	jl .wrdcpy
	movdqu (%rcx),%xmm0
	movdqu %xmm0,(%r8)
	addq $0x10,%rcx
	addq $0x10,%r8
	subq $0x10,%r9
	jmp .big128cpy
.wrdcpy:
	cmpq $0x8,%r9
	jl .bytecpy
	movq (%rcx),%r10
	movq %r10,(%r8)
	addq $0x8,%rcx
	addq $0x8,%r8
	subq $0x8,%r9
	jmp .wrdcpy
.bytecpy:
	testq %r9,%r9
	je .done
	movb (%rcx),%r10b
	movb %r10b,(%r8)
	incq %rcx
	incq %r8
	decq %r9
	jmp .bytecpy
.done:
	ret
#endif