/* Copyright 2022 Gabriel Jensen This file is part of rgo. rgo is free software: you can reaxstribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see . */ #include .global rgo_memcpy rgo_memcpy: /* void const * in size_t num void * out */ #if defined(__i386__) /* eax: Address of the current input element. */ movl 0x4(%esp),%eax /* ecx: Number of remaining elements. */ movl 0x8(%esp),%ecx /* edx: Address of the current output element. */ movl 0xC(%esp),%edx /* ebx: Current element. */ pushl %ebx /* ebx must be restored. */ /* xmm0: Current element. */ #if defined(__SSE__) .big128cpy: cmpl $0x10,%ecx jl .wrdcpy movups (%eax),%xmm0 movups %xmm0,(%edx) addl $0x10,%eax addl $0x10,%edx subl $0x10,%ecx jmp .big128cpy #endif .wrdcpy: cmpl $0x4,%ecx jl .bytecpy movl (%eax),%ebx movl %ebx,(%edx) addl $0x4,%eax addl $0x4,%edx subl $0x4,%ecx jmp .wrdcpy .bytecpy: testl %ecx,%ecx jz .done movb (%eax),%bl movb %bl,(%edx) incl %eax incl %edx decl %ecx jmp .bytecpy .done: popl %ebx ret #elif defined(__x86_64__) /* rdi: Address of the current input element. */ /* rsi: Number of remaining elements. */ /* rdx: Address of the current output element. */ /* rcx: Current element. */ /* xmm0: Current element. */ .big128cpy: cmpq $0x10,%rsi jl .wrdcpy movups (%rdi),%xmm0 movups %xmm0,(%rdx) addq $0x10,%rdi addq $0x10,%rdx subq $0x10,%rsi jmp .big128cpy .wrdcpy: cmpq $0x8,%rsi jl .bytecpy movq (%rdi),%rcx movq %rcx,(%rdx) addq $0x8,%rdi addq $0x8,%rdx subq $0x8,%rsi jmp .wrdcpy .bytecpy: testq %rsi,%rsi jz .done movb (%rdi),%cl movb %cl,(%rdx) incq %rdi incq %rdx decq %rsi jmp .bytecpy .done: ret #endif