/* Copyright 2022 Gabriel Jensen This file is part of rgo. rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see . */ #include .global rgo_memcpy rgo_memcpy: #if defined(__x86_64__) /* rdi: void const * in rsi: size_t num rdx: void * out */ /* rcx: Address of the current input element. */ movq %rdi,%rcx /* r8: Address of the current output element. */ movq %rdx,%r8 /* r9: Number of remaining elements. */ movq %rsi,%r9 /* r10: Temporary. */ /* xmm0: Temporary. */ .big128cpy: cmpq $0x10,%r9 jl .wrdcpy movups (%rcx),%xmm0 movups %xmm0,(%r8) addq $0x10,%rcx addq $0x10,%r8 subq $0x10,%r9 jmp .big128cpy .wrdcpy: cmpq $0x8,%r9 jl .bytecpy movq (%rcx),%r10 movq %r10,(%r8) addq $0x8,%rcx addq $0x8,%r8 subq $0x8,%r9 jmp .wrdcpy .bytecpy: testq %r9,%r9 jz .done movb (%rcx),%r10b movb %r10b,(%r8) incq %rcx incq %r8 decq %r9 jmp .bytecpy .done: ret #endif