summaryrefslogtreecommitdiff
path: root/zap/src/mem/memcp.S
diff options
context:
space:
mode:
Diffstat (limited to 'zap/src/mem/memcp.S')
-rw-r--r--zap/src/mem/memcp.S107
1 files changed, 0 insertions, 107 deletions
diff --git a/zap/src/mem/memcp.S b/zap/src/mem/memcp.S
deleted file mode 100644
index ead0718..0000000
--- a/zap/src/mem/memcp.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <zap/priv.h>
-
-.globl zap_memcp
-
-zap_memcp:
-
- /*
- void const * in
- zap_sz num
- void * out
- */
-#if defined(__amd64__)
-
- # rdi: Address of the current input element.
- # rsi: Number of remaining elements.
- # rdx: Address of the current output element.
- # rcx: Current element.
- # xmm0: Current element.
- # ymm0: Current element.
-
-#if defined(__AVX__)
- # AVX support 256-bit moves.
-
- # Copy 32 bytes:
-.big20cp:
-
- # Check if there are at least 32 bytes remaining:
- cmpq $0x20,%rsi
- jl .big10cp # If not, skip to the 10 byte copying.
-
- # Copy:
- vmovups (%rdi),%ymm0 # Move into a register.
- vmovups %ymm0,(%rdx) # And then back into memory.
-
- # Continue:
- addq $0x20,%rdi
- addq $0x20,%rdx
- subq $0x20,%rsi
- jmp .big20cp
-
-#endif
-
- # AMD64 requires SSE(2).
-
- # Copy 16 bytes:
-.big10cp:
-
- # Check if there are at least 16 bytes remaining:
- cmpq $0x10,%rsi
- jl .wrdcp
-
- # Copy:
- movdqu (%rdi),%xmm0
- movdqu %xmm0,(%rdx)
-
- # Continue:
- addq $0x10,%rdi
- addq $0x10,%rdx
- subq $0x10,%rsi
- jmp .big10cp
-
- # Copy one word (8 bytes):
-.wrdcp:
-
- # Check if there are at least 8 bytes remaining:
- cmpq $0x8,%rsi
- jl .bytecp
-
- # Copy:
- movq (%rdi),%rcx
- movq %rcx,(%rdx)
-
- # Continue:
- addq $0x8,%rdi
- addq $0x8,%rdx
- subq $0x8,%rsi
- jmp .wrdcp
-
- # Copy one byte:
-.bytecp:
-
- # Check if we have any bytes remaining:
- testq %rsi,%rsi
- jz .done
-
- # Copy:
- movb (%rdi),%cl
- movb %cl,(%rdx)
-
- # Continue:
- incq %rdi
- incq %rdx
- decq %rsi
- jmp .bytecp
-
- # Finish:
-.done:
-
- ret
-
-#endif