diff options
Diffstat (limited to 'zap/source/amd64/mem')
-rw-r--r-- | zap/source/amd64/mem/cp.s | 11 | ||||
-rw-r--r-- | zap/source/amd64/mem/fill.s | 51 |
2 files changed, 57 insertions, 5 deletions
diff --git a/zap/source/amd64/mem/cp.s b/zap/source/amd64/mem/cp.s index 5985f04..57e716f 100644 --- a/zap/source/amd64/mem/cp.s +++ b/zap/source/amd64/mem/cp.s @@ -9,21 +9,22 @@ zap_cp: # zap_i8 val1; # zap_i04 val8; + # unsigned int128_t val01; # unsigned int256_t val02; .big02cp: cmp rdx,0x20 jl .big01cp # if (num < 0x20u) goto big01cp; - vmovups ymm0,[rsi] # val01 = *src; - vmovups [rdi],ymm0 # *dest = val01; + vmovups ymm0,[rsi] # val02 = *src; + vmovups [rdi],ymm0 # *dest = val02; add rsi,0x20 # dest += 0x20u; add rdi,0x20 # src += 0x20u; sub rdx,0x20 # num -= 0x20u; jmp .big02cp # goto big02cp; -.big01cp: +.big01cp: # big01cp:; cmp rdx,0x10 jl .wrdcp # if (num < 0x10u) goto wrdcp; @@ -35,7 +36,7 @@ zap_cp: sub rdx,0x10 # num -= 0x10u; jmp .big01cp # goto big01cp; -.wrdcp: +.wrdcp: # wrdcp:; cmp rdx,0x8 jl .bytecp # if (num < 0x8u) goto bytecp; @@ -47,7 +48,7 @@ zap_cp: sub rdx,0x8 # num -= 0x8u; jmp .wrdcp # goto wrdcp -.bytecp: +.bytecp: # bytecp:; test rdx,rdx # if (rem == 0x0) jz .done # goto done diff --git a/zap/source/amd64/mem/fill.s b/zap/source/amd64/mem/fill.s new file mode 100644 index 0000000..0abd2be --- /dev/null +++ b/zap/source/amd64/mem/fill.s @@ -0,0 +1,51 @@ +# Copyright 2022-2023 Gabriel Jensen. +# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>. + +.intel_syntax noprefix + +.globl zap_fill + +zap_fill: + # zap_i8 val1; + # zap_i04 val8; + + movzx rax,sil # val8 = val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + # val1 = val8; + +.wrdfill: # wrdfill:; + cmp rdx,0x8 + jl .bytefill # if (num < 0x8u) goto bytefill; + + mov [rdi],rax # *dest = val8; + + add rdi,0x8 # dest += 0x8u; + sub rdx,0x8 # num -= 0x8u; + jmp .wrdfill # goto wrdfill + +.bytefill: # bytefill:; + test rdx,rdx # if (rem == 0x0) + jz .done # goto done + + mov [rsi],al # *dest = val1; + + inc rdi # ++dest; + dec rdx # --rem; + jmp .bytefill # goto bytefill; + +.done: + ret # return |