summaryrefslogblamecommitdiff
path: root/rgo/src/memcpy.S
blob: 475da571f058b15243a2767276710ec7117a8089 (plain) (tree)
1
2
3
4
5
6
7
8
9




                                     
                                                                                                                                                                                                                                                     
 
                                                                                                                                                                                                                                              
 
                                                                                                                                              






                  
          


                                
          










                                                         
           
                       
                  









































                                                         

                      
                      
                   




                        

                   
                       
                




                       



                    
/*
	Copyright 2022 Gabriel Jensen

	This file is part of rgo.

	rgo is free software: you can reaxstribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

	rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.

	You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>. 
*/

#include <rgo.h>

.global rgo_memcpy

rgo_memcpy:
	/*
		void const * in
		size_t       num
		void *       out
	*/
#if defined(__i386__)
	/* eax: Address of the current input element. */
	movl 0x4(%esp),%eax
	/* ecx: Number of remaining elements. */
	movl 0x8(%esp),%ecx
	/* edx: Address of the current output element. */
	movl 0xC(%esp),%edx
	/* ebx: Current element. */
	pushl %ebx /* ebx must be restored. */
	/* xmm0: Current element. */
#if defined(__SSE__)
.big128cpy:
	cmpl $0x10,%ecx
	jl .wrdcpy
	movups (%eax),%xmm0
	movups %xmm0,(%edx)
	addl $0x10,%eax
	addl $0x10,%edx
	subl $0x10,%ecx
	jmp .big128cpy
#endif
.wrdcpy:
	cmpl $0x4,%ecx
	jl .bytecpy
	movl (%eax),%ebx
	movl %ebx,(%edx)
	addl $0x4,%eax
	addl $0x4,%edx
	subl $0x4,%ecx
	jmp .wrdcpy
.bytecpy:
	testl %ecx,%ecx
	jz .done
	movb (%eax),%bl
	movb %bl,(%edx)
	incl %eax
	incl %edx
	decl %ecx
	jmp .bytecpy
.done:
	popl %ebx
	ret
#elif defined(__x86_64__)
	/* rdi: Address of the current input element. */
	/* rsi: Number of remaining elements. */
	/* rdx: Address of the current output element. */
	/* rcx: Current element. */
	/* xmm0: Current element. */
.big128cpy:
	cmpq $0x10,%rsi
	jl .wrdcpy
	movups (%rdi),%xmm0
	movups %xmm0,(%rdx)
	addq $0x10,%rdi
	addq $0x10,%rdx
	subq $0x10,%rsi
	jmp .big128cpy
.wrdcpy:
	cmpq $0x8,%rsi
	jl .bytecpy
	movq (%rdi),%rcx
	movq %rcx,(%rdx)
	addq $0x8,%rdi
	addq $0x8,%rdx
	subq $0x8,%rsi
	jmp .wrdcpy
.bytecpy:
	testq %rsi,%rsi
	jz .done
	movb (%rdi),%cl
	movb %cl,(%rdx)
	incq %rdi
	incq %rdx
	decq %rsi
	jmp .bytecpy
.done:
	ret
#endif