diff options
Diffstat (limited to 'rgo/src/memcpy.S')
-rw-r--r-- | rgo/src/memcpy.S | 38 |
1 files changed, 36 insertions, 2 deletions
diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S index 475da57..820781d 100644 --- a/rgo/src/memcpy.S +++ b/rgo/src/memcpy.S @@ -30,12 +30,33 @@ rgo_memcpy: /* ebx: Current element. */ pushl %ebx /* ebx must be restored. */ /* xmm0: Current element. */ + /* ymm0: Current element. */ +#if defined(__AVX__) +.big256cpy: + cmpl $0x20,%ecx +#if defined(__SSE__) + jl .big128cpy +#else + jl .wrdcpy +#endif + vmovdqu (%eax),%ymm0 + vmovdqu %ymm0,(%edx) + addl $0x20,%eax + addl $0x20,%edx + subl $0x20,%ecx + jmp .big256cpy +#endif #if defined(__SSE__) .big128cpy: cmpl $0x10,%ecx jl .wrdcpy +#if defined(__SSE2__) + movdqu (%eax),%xmm0 + movdqu %xmm0,(%edx) +#else movups (%eax),%xmm0 movups %xmm0,(%edx) +#endif addl $0x10,%eax addl $0x10,%edx subl $0x10,%ecx @@ -68,11 +89,24 @@ rgo_memcpy: /* rdx: Address of the current output element. */ /* rcx: Current element. */ /* xmm0: Current element. */ + /* ymm0: Current element. */ +#if defined(__AVX__) +.big256cpy: + cmpq $0x20,%rsi + jl .big128cpy + vmovups (%rdi),%ymm0 + vmovups %ymm0,(%rdx) + addq $0x20,%rdi + addq $0x20,%rdx + subq $0x20,%rsi + jmp .big256cpy +#endif .big128cpy: + ret cmpq $0x10,%rsi jl .wrdcpy - movups (%rdi),%xmm0 - movups %xmm0,(%rdx) + movdqu (%rdi),%xmm0 + movdqu %xmm0,(%rdx) addq $0x10,%rdi addq $0x10,%rdx subq $0x10,%rsi |