summaryrefslogtreecommitdiff
path: root/zap/source/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'zap/source/amd64')
-rw-r--r--zap/source/amd64/mem/cp.s76
-rw-r--r--zap/source/amd64/mem/fill.s52
2 files changed, 56 insertions, 72 deletions
diff --git a/zap/source/amd64/mem/cp.s b/zap/source/amd64/mem/cp.s
index 57e716f..ef333e0 100644
--- a/zap/source/amd64/mem/cp.s
+++ b/zap/source/amd64/mem/cp.s
@@ -7,58 +7,58 @@
.globl zap_cp
zap_cp:
- # zap_i8 val1;
- # zap_i04 val8;
- # unsigned int128_t val01;
- # unsigned int256_t val02;
+ # zap_i8 val1;
+ # zap_i04 val8;
+ # unsigned int128_t val01;
+ # unsigned int256_t val02;
-.big02cp:
+.big02cp: # // We assume AVX.
cmp rdx,0x20
- jl .big01cp # if (num < 0x20u) goto big01cp;
+ jl short .big01cp # if (num < 0x20u) goto big01cp;
- vmovups ymm0,[rsi] # val02 = *src;
- vmovups [rdi],ymm0 # *dest = val02;
+ vmovups ymm0,[rsi] # val02 = *src;
+ vmovups [rdi],ymm0 # *dest = val02;
- add rsi,0x20 # dest += 0x20u;
- add rdi,0x20 # src += 0x20u;
- sub rdx,0x20 # num -= 0x20u;
- jmp .big02cp # goto big02cp;
+ add rsi,0x20 # dest += 0x20u;
+ add rdi,0x20 # src += 0x20u;
+ sub rdx,0x20 # num -= 0x20u;
+ jmp short .big02cp # goto big02cp;
-.big01cp: # big01cp:;
+.big01cp: # big01cp:;
cmp rdx,0x10
- jl .wrdcp # if (num < 0x10u) goto wrdcp;
+ jl short .wrdcp # if (num < 0x10u) goto wrdcp;
- movdqu xmm0,[rsi] # val01 = *src;
- movdqu [rdi],xmm0 # *dest = val01;
+ movdqu xmm0,[rsi] # val01 = *src;
+ movdqu [rdi],xmm0 # *dest = val01;
- add rsi,0x10 # dest += 0x10u;
- add rdi,0x10 # src += 0x10u;
- sub rdx,0x10 # num -= 0x10u;
- jmp .big01cp # goto big01cp;
+ add rsi,0x10 # dest += 0x10u;
+ add rdi,0x10 # src += 0x10u;
+ sub rdx,0x10 # num -= 0x10u;
+ jmp short .big01cp # goto big01cp;
-.wrdcp: # wrdcp:;
+.wrdcp: # wrdcp:;
cmp rdx,0x8
- jl .bytecp # if (num < 0x8u) goto bytecp;
+ jl short .bytecp # if (num < 0x8u) goto bytecp;
- mov rcx,[rsi] # val8 = *src;
- mov [rdi],rcx # *dest = val8;
+ mov rcx,[rsi] # val8 = *src;
+ mov [rdi],rcx # *dest = val8;
- add rdi,0x8 # dest += 0x8u;
- add rsi,0x8 # src += 0x8u;
- sub rdx,0x8 # num -= 0x8u;
- jmp .wrdcp # goto wrdcp
+ add rdi,0x8 # dest += 0x8u;
+ add rsi,0x8 # src += 0x8u;
+ sub rdx,0x8 # num -= 0x8u;
+ jmp short .wrdcp # goto wrdcp
-.bytecp: # bytecp:;
- test rdx,rdx # if (rem == 0x0)
- jz .done # goto done
+.bytecp: # bytecp:;
+ test rdx,rdx # if (rem == 0x0)
+ jz short .done # goto done
- mov cl,[rsi] # val1 = *src;
- mov [rsi],cl # *dest = val1;
+ mov cl,[rsi] # val1 = *src;
+ mov [rsi],cl # *dest = val1;
- inc rdi # ++dest;
- inc rsi # ++src;
- dec rdx # --rem;
- jmp .bytecp # goto bytecp;
+ inc rdi # ++dest;
+ inc rsi # ++src;
+ dec rdx # --rem;
+ jmp short .bytecp # goto bytecp;
.done:
- ret # return
+ ret # return
diff --git a/zap/source/amd64/mem/fill.s b/zap/source/amd64/mem/fill.s
index 0abd2be..f4022d0 100644
--- a/zap/source/amd64/mem/fill.s
+++ b/zap/source/amd64/mem/fill.s
@@ -7,45 +7,29 @@
.globl zap_fill
zap_fill:
- # zap_i8 val1;
- # zap_i04 val8;
-
- movzx rax,sil # val8 = val;
- shl rax,0x8 # val8 <<= 0x8u;
- mov al,sil # val8 |= val;
- shl rax,0x8 # val8 <<= 0x8u;
- mov al,sil # val8 |= val;
- shl rax,0x8 # val8 <<= 0x8u;
- mov al,sil # val8 |= val;
- shl rax,0x8 # val8 <<= 0x8u;
- mov al,sil # val8 |= val;
- shl rax,0x8 # val8 <<= 0x8u;
- mov al,sil # val8 |= val;
- shl rax,0x8 # val8 <<= 0x8u;
- mov al,sil # val8 |= val;
- shl rax,0x8 # val8 <<= 0x8u;
- mov al,sil # val8 |= val;
- # val1 = val8;
-
-.wrdfill: # wrdfill:;
+ movzx rsi,sil # zap_i04 extval = val;
+ mov rax,0x0101010101010101 # zap_i04 val = 0x0101010101010101u;
+ imul rax,rsi # val *= extval;
+
+.wrdfill: # wrdfill:;
cmp rdx,0x8
- jl .bytefill # if (num < 0x8u) goto bytefill;
+ jl short .bytefill # if (num < 0x8u) goto bytefill;
- mov [rdi],rax # *dest = val8;
+ mov [rdi],rax # *dest = val8;
- add rdi,0x8 # dest += 0x8u;
- sub rdx,0x8 # num -= 0x8u;
- jmp .wrdfill # goto wrdfill
+ add rdi,0x8 # dest += 0x8u;
+ sub rdx,0x8 # num -= 0x8u;
+ jmp short .wrdfill # goto wrdfill
-.bytefill: # bytefill:;
- test rdx,rdx # if (rem == 0x0)
- jz .done # goto done
+.bytefill: # bytefill:;
+ test rdx,rdx # if (rem == 0x0)
+ jz short .done # goto done
- mov [rsi],al # *dest = val1;
+ mov [rsi],al # *dest = val1;
- inc rdi # ++dest;
- dec rdx # --rem;
- jmp .bytefill # goto bytefill;
+ inc rdi # ++dest;
+ dec rdx # --rem;
+ jmp short .bytefill # goto bytefill;
.done:
- ret # return
+ ret # return