summaryrefslogblamecommitdiff
path: root/zap/source/amd64/mem/cp.s
blob: 57e716fbe2b91c3d2e55f26f70320cf6dde86d72 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11










                                                                                                               
                                                     





                                                           

                                           





                                           
                                  










                                                         
                                










                                                         
                                 












                                            
# Copyright 2022-2023 Gabriel Jensen.
# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>.

.intel_syntax noprefix

.globl zap_cp

zap_cp:
	                   # zap_i8 val1;
	                   # zap_i04 val8;
	                   # unsigned int128_t val01;
	                   # unsigned int256_t val02;

.big02cp:
	cmp     rdx,0x20
	jl      .big01cp   # if (num < 0x20u) goto big01cp;

	vmovups ymm0,[rsi] # val02 = *src;
	vmovups [rdi],ymm0 # *dest = val02;

	add     rsi,0x20   # dest += 0x20u;
	add     rdi,0x20   # src += 0x20u;
	sub     rdx,0x20   # num -= 0x20u;
	jmp     .big02cp   # goto big02cp;

.big01cp:              # big01cp:;
	cmp     rdx,0x10
	jl      .wrdcp     # if (num < 0x10u) goto wrdcp;

	movdqu  xmm0,[rsi] # val01 = *src;
	movdqu  [rdi],xmm0 # *dest = val01;

	add     rsi,0x10   # dest += 0x10u;
	add     rdi,0x10   # src += 0x10u;
	sub     rdx,0x10   # num -= 0x10u;
	jmp     .big01cp   # goto big01cp;

.wrdcp:                # wrdcp:;
	cmp     rdx,0x8
	jl      .bytecp    # if (num < 0x8u) goto bytecp;

	mov     rcx,[rsi]  # val8 = *src;
	mov     [rdi],rcx  # *dest = val8;

	add     rdi,0x8    # dest += 0x8u;
	add     rsi,0x8    # src += 0x8u;
	sub     rdx,0x8    # num -= 0x8u;
	jmp     .wrdcp     # goto wrdcp

.bytecp:               # bytecp:;
	test    rdx,rdx    # if (rem == 0x0)
	jz      .done      # goto done

	mov     cl,[rsi]   # val1 = *src;
	mov     [rsi],cl   # *dest = val1;

	inc      rdi       # ++dest;
	inc      rsi       # ++src;
	dec      rdx       # --rem;
	jmp      .bytecp   # goto bytecp;

.done:
	ret                # return