diff options
Diffstat (limited to 'zap')
-rw-r--r-- | zap/source/amd64/mem/cp.s | 18 | ||||
-rw-r--r-- | zap/source/amd64/mem/fill.s | 4 |
2 files changed, 11 insertions, 11 deletions
diff --git a/zap/source/amd64/mem/cp.s b/zap/source/amd64/mem/cp.s index ef333e0..2860acf 100644 --- a/zap/source/amd64/mem/cp.s +++ b/zap/source/amd64/mem/cp.s @@ -12,12 +12,12 @@ zap_cp: # unsigned int128_t val01; # unsigned int256_t val02; -.big02cp: # // We assume AVX. +.big02cp: # big02cp:; // We assume AVX. cmp rdx,0x20 jl short .big01cp # if (num < 0x20u) goto big01cp; - vmovups ymm0,[rsi] # val02 = *src; - vmovups [rdi],ymm0 # *dest = val02; + vmovdqu ymm0,[rsi] # val02 = *(unsigned int256_t *)src; + vmovdqu [rdi],ymm0 # *(unsigned int256_t *)dest = val02; add rsi,0x20 # dest += 0x20u; add rdi,0x20 # src += 0x20u; @@ -28,8 +28,8 @@ zap_cp: cmp rdx,0x10 jl short .wrdcp # if (num < 0x10u) goto wrdcp; - movdqu xmm0,[rsi] # val01 = *src; - movdqu [rdi],xmm0 # *dest = val01; + movdqu xmm0,[rsi] # val01 = *(unsigned int128_t *)src; + movdqu [rdi],xmm0 # *(unsigned int128_t *)dest = val01; add rsi,0x10 # dest += 0x10u; add rdi,0x10 # src += 0x10u; @@ -40,8 +40,8 @@ zap_cp: cmp rdx,0x8 jl short .bytecp # if (num < 0x8u) goto bytecp; - mov rcx,[rsi] # val8 = *src; - mov [rdi],rcx # *dest = val8; + mov rcx,[rsi] # val8 = *(zap_i04 *)src; + mov [rdi],rcx # *(zap_i04 *)dest = val8; add rdi,0x8 # dest += 0x8u; add rsi,0x8 # src += 0x8u; @@ -52,8 +52,8 @@ zap_cp: test rdx,rdx # if (rem == 0x0) jz short .done # goto done - mov cl,[rsi] # val1 = *src; - mov [rsi],cl # *dest = val1; + mov cl,[rsi] # val1 = *(zap_i8 *)src; + mov [rsi],cl # *(zap_i8 *)dest = val1; inc rdi # ++dest; inc rsi # ++src; diff --git a/zap/source/amd64/mem/fill.s b/zap/source/amd64/mem/fill.s index f4022d0..7edd36b 100644 --- a/zap/source/amd64/mem/fill.s +++ b/zap/source/amd64/mem/fill.s @@ -15,7 +15,7 @@ zap_fill: cmp rdx,0x8 jl short .bytefill # if (num < 0x8u) goto bytefill; - mov [rdi],rax # *dest = val8; + mov [rdi],rax # *(zap_i04 *)dest = val8; add rdi,0x8 # dest += 0x8u; sub rdx,0x8 # num -= 0x8u; @@ -25,7 +25,7 @@ zap_fill: test rdx,rdx # if (rem == 0x0) jz short .done # goto done - mov [rsi],al # *dest = val1; + mov [rsi],al # *(zap_i8 *)dest = val1; inc rdi # ++dest; dec rdx # --rem; |