summaryrefslogblamecommitdiff
path: root/zap/src/mem/memcp.S
blob: ead0718ff2e6d428d3de6d4e87634228f54e5dce (plain) (tree)










































































































                                                                                                                    
/*
	Copyright 2022 Gabriel Jensen.
	This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
	If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

#include <zap/priv.h>

.globl zap_memcp

zap_memcp:

	/*
		void const * in
		zap_sz       num
		void *       out
	*/
#if defined(__amd64__)

	# rdi:  Address of the current input element.
	# rsi:  Number of remaining elements.
	# rdx:  Address of the current output element.
	# rcx:  Current element.
	# xmm0: Current element.
	# ymm0: Current element.

#if defined(__AVX__)
	# AVX support 256-bit moves.

	# Copy 32 bytes:
.big20cp:

	# Check if there are at least 32 bytes remaining:
	cmpq $0x20,%rsi
	jl .big10cp # If not, skip to the 10 byte copying.

	# Copy:
	vmovups (%rdi),%ymm0 # Move into a register.
	vmovups %ymm0,(%rdx) # And then back into memory.

	# Continue:
	addq $0x20,%rdi
	addq $0x20,%rdx
	subq $0x20,%rsi
	jmp .big20cp

#endif

	# AMD64 requires SSE(2).

	# Copy 16 bytes:
.big10cp:

	# Check if there are at least 16 bytes remaining:
	cmpq $0x10,%rsi
	jl .wrdcp

	# Copy:
	movdqu (%rdi),%xmm0
	movdqu %xmm0,(%rdx)

	# Continue:
	addq $0x10,%rdi
	addq $0x10,%rdx
	subq $0x10,%rsi
	jmp .big10cp

	# Copy one word (8 bytes):
.wrdcp:

	# Check if there are at least 8 bytes remaining:
	cmpq $0x8,%rsi
	jl .bytecp

	# Copy:
	movq (%rdi),%rcx
	movq %rcx,(%rdx)

	# Continue:
	addq $0x8,%rdi
	addq $0x8,%rdx
	subq $0x8,%rsi
	jmp .wrdcp

	# Copy one byte:
.bytecp:

	# Check if we have any bytes remaining:
	testq %rsi,%rsi
	jz .done

	# Copy:
	movb (%rdi),%cl
	movb %cl,(%rdx)

	# Continue:
	incq %rdi
	incq %rdx
	decq %rsi
	jmp .bytecp

	# Finish:
.done:

	ret
	
#endif