summaryrefslogtreecommitdiff
path: root/rgo/src/memcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'rgo/src/memcpy.S')
-rw-r--r--rgo/src/memcpy.S38
1 files changed, 36 insertions, 2 deletions
diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S
index 475da57..820781d 100644
--- a/rgo/src/memcpy.S
+++ b/rgo/src/memcpy.S
@@ -30,12 +30,33 @@ rgo_memcpy:
/* ebx: Current element. */
pushl %ebx /* ebx must be restored. */
/* xmm0: Current element. */
+ /* ymm0: Current element. */
+#if defined(__AVX__)
+.big256cpy:
+ cmpl $0x20,%ecx
+#if defined(__SSE__)
+ jl .big128cpy
+#else
+ jl .wrdcpy
+#endif
+ vmovdqu (%eax),%ymm0
+ vmovdqu %ymm0,(%edx)
+ addl $0x20,%eax
+ addl $0x20,%edx
+ subl $0x20,%ecx
+ jmp .big256cpy
+#endif
#if defined(__SSE__)
.big128cpy:
cmpl $0x10,%ecx
jl .wrdcpy
+#if defined(__SSE2__)
+ movdqu (%eax),%xmm0
+ movdqu %xmm0,(%edx)
+#else
movups (%eax),%xmm0
movups %xmm0,(%edx)
+#endif
addl $0x10,%eax
addl $0x10,%edx
subl $0x10,%ecx
@@ -68,11 +89,24 @@ rgo_memcpy:
/* rdx: Address of the current output element. */
/* rcx: Current element. */
/* xmm0: Current element. */
+ /* ymm0: Current element. */
+#if defined(__AVX__)
+.big256cpy:
+ cmpq $0x20,%rsi
+ jl .big128cpy
+ vmovups (%rdi),%ymm0
+ vmovups %ymm0,(%rdx)
+ addq $0x20,%rdi
+ addq $0x20,%rdx
+ subq $0x20,%rsi
+ jmp .big256cpy
+#endif
.big128cpy:
+ ret
cmpq $0x10,%rsi
jl .wrdcpy
- movups (%rdi),%xmm0
- movups %xmm0,(%rdx)
+ movdqu (%rdi),%xmm0
+ movdqu %xmm0,(%rdx)
addq $0x10,%rdi
addq $0x10,%rdx
subq $0x10,%rsi