summaryrefslogtreecommitdiff
path: root/zap/source/amd64/mem
diff options
context:
space:
mode:
Diffstat (limited to 'zap/source/amd64/mem')
-rw-r--r--zap/source/amd64/mem/fndbyte.S42
-rw-r--r--zap/source/amd64/mem/fndchr.S37
-rw-r--r--zap/source/amd64/mem/foreach.S54
-rw-r--r--zap/source/amd64/mem/memcp.S91
-rw-r--r--zap/source/amd64/mem/memeq.S87
-rw-r--r--zap/source/amd64/mem/memfill.S24
-rw-r--r--zap/source/amd64/mem/strcp.S34
-rw-r--r--zap/source/amd64/mem/streq.S36
-rw-r--r--zap/source/amd64/mem/strfill.S25
-rw-r--r--zap/source/amd64/mem/strlen.S25
-rw-r--r--zap/source/amd64/mem/utf20len.S27
11 files changed, 265 insertions, 217 deletions
diff --git a/zap/source/amd64/mem/fndbyte.S b/zap/source/amd64/mem/fndbyte.S
index 9298b73..af2ee84 100644
--- a/zap/source/amd64/mem/fndbyte.S
+++ b/zap/source/amd64/mem/fndbyte.S
@@ -5,40 +5,46 @@
.globl zap_fndbyte
zap_fndbyte:
- # rax: Address of the current element.
- # rdi: Address of the first element.
- # rsi: Address of the element after the last element.
- # rdx: Byte value.
- # rcx: Current byte.
+# Address of the current element:
+#define addr %rax
+# Address of the first element:
+#define start %rdi
+# Address of the element after the last element:
+#define afterbuf %rsi
+# Byte value:
+#define cmp %dl
+# Current byte:
+#define val %cl
- movq %rdi,%rax
+ movq start,addr # addr = start
- addq %rdi,%rsi
+ addq start,afterbuf # afterbuf += start
# Iterate over the array:
.loop:
# Check if we have reached the end of the array:
- cmpq %rax,%rsi
- je .nfnd
+ cmpq addr,afterbuf # if (addr == afterbuf)
+ je .nfnd # goto nfnd
# Check if we have found the byte value:
- movb (%rax),%cl
- cmpb %cl,%dl
- je .fnd
+ movb (addr),val # val = *addr
+ cmpb val,cmp # if (val == cmp)
+ je .fnd # goto fnd
# Continue to the next byte:
- incq %rax
- jmp .loop
+ incq addr # ++addr
+ jmp .loop # goto loop
# Found:
.fnd:
- subq %rdi,%rax
- ret
+ # Get the offset of the byte:
+ subq start,addr # addr -= start
+ ret # return addr
# Not found:
.nfnd:
- movq $0xFFFFFFFFFFFFFFFF,%rax
- ret
+ movq $0xFFFFFFFFFFFFFFFF,addr # addr = FFFFFFFFFFFFFFFF
+ ret # return addr
diff --git a/zap/source/amd64/mem/fndchr.S b/zap/source/amd64/mem/fndchr.S
index 1078a10..67e0ea1 100644
--- a/zap/source/amd64/mem/fndchr.S
+++ b/zap/source/amd64/mem/fndchr.S
@@ -5,39 +5,44 @@
.globl zap_fndchr
zap_fndchr:
- # rdi: Address of the first character.
- # rsi: Character to be found.
- # rax: Address of the current character.
- # rdx: Current character.
+# Address of the first character:
+#define start %rdi
+# Character to be found:
+#define cmp %sil
+# Address of the current character:
+#define addr %rax
+# Current character:
+#define chr %dl
- movq %rdi,%rax
+ movq start,addr # addr = start
# Iterate over the string:
.loop:
# Copy the character into a register:
- movb (%rax),%dl
+ movb (addr),chr # chr = *addr
# Check if we have found the character:
- cmpb %dl,%sil
- je .fnd
+ cmpb chr,cmp # if (chr == cmp)
+ je .fnd # goto fnd
# Check if we have found the null-terminator:
- testb %dl,%dl
- jz .nfnd
+ testb chr,chr # if (chr == 0x0)
+ jz .nfnd # goto nfnd
# Continue to the next character:
- incq %rax
- jmp .loop
+ incq addr # ++addr
+ jmp .loop # goto loop
# Found:
.fnd:
- subq %rdi,%rax
- ret
+ # Get the offset of the character:
+ subq start,addr # addr -= start
+ ret # return addr
# Not found:
.nfnd:
- movq $0xFFFFFFFFFFFFFFFF,%rax
- ret
+ movq $0xFFFFFFFFFFFFFFFF,addr # addr = FFFFFFFFFFFFFFFF
+ ret # return addr
diff --git a/zap/source/amd64/mem/foreach.S b/zap/source/amd64/mem/foreach.S
index f19bcfa..6766933 100644
--- a/zap/source/amd64/mem/foreach.S
+++ b/zap/source/amd64/mem/foreach.S
@@ -5,51 +5,55 @@
.globl zap_foreach
zap_foreach:
- # rbx: Address of the current element.
- # r12: Address of the element after the last input element.
- # r13: Size of each input element.
- # r14: Address of the function.
+# Address of the current element:
+#define addr %rbx
+# Address of the element after the last input element:
+#define afterbuf %r12
+# Size of each input element:
+#define sz %r13
+# Address of the function:
+#define fn %r14
# We're gonna use callee-saved registers for storing values so they don't get overwritten with each function call.
# Push the callee-saved registers:
- pushq %rbx
- pushq %r12
- pushq %r13
- pushq %r14
+ pushq addr
+ pushq afterbuf
+ pushq sz
+ pushq fn
# Move registers into place:
- movq %rdi,%rbx
- movq %rsi,%r13
- movq %rcx,%r14
+ movq %rdi,addr
+ movq %rsi,sz
+ movq %rcx,fn
# Get the one-past-the-end address:
- movq %rdx,%r12
- imulq %r13,%r12 # Calculate the array size in bytes (sz * num). We're using signed multiply because the equivalent using the unsigned instruction would use more instructions.
- addq %rbx,%r12
+ movq %rdx,afterbuf
+ imulq sz,afterbuf # afterbuf *= sz // Calculate the array size in bytes (sz * num). We're using signed multiply because the equivalent using the unsigned instruction would use more instructions.
+ addq addr,afterbuf # afterbuf += addr
# Iterate through the array:
.loop:
# Check if we have reached the one-past-the-end address:
- cmpq %rbx,%r12
- je .done
+ cmpq addr,afterbuf # if (addr == afterbuf)
+ je .done # goto done
# Call the provided function:
- movq %rbx,%rdi # Provide the current address to the function.
- call *%r14 # We don't need to save any registers for this as we only use callee-saved registers.
+ movq addr,%rdi # // Provide the current address to the function.
+ call *fn # fn(addr) // We don't need to save any registers for this as we only use callee-saved registers.
# Continue to the next element:
- addq %r13,%rbx
- jmp .loop
+ addq sz,addr # addr += sz
+ jmp .loop # goto loop
# Finish:
.done:
# Restore the callee-saved registers:
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
+ popq fn
+ popq sz
+ popq afterbuf
+ popq addr
- ret
+ ret # return
diff --git a/zap/source/amd64/mem/memcp.S b/zap/source/amd64/mem/memcp.S
index 5691446..ac310ae 100644
--- a/zap/source/amd64/mem/memcp.S
+++ b/zap/source/amd64/mem/memcp.S
@@ -5,90 +5,95 @@
.globl zap_memcp
zap_memcp:
- # rdi: Address of the current input element.
- # rsi: Number of remaining elements.
- # rdx: Address of the current output element.
- # rcx: Current element.
- # xmm0: Current element.
- # ymm0: Current element.
+# Address of the current input element:
+#define iaddr %rdi
+# Number of remaining bytes:
+#define rem %rsi
+# Address of the current output element:
+#define oaddr %rdx
+# Current element:
+#define val1 %cl
+#define val8 %rcx
+#define val01 %xmm0
+#define val02 %ymm0
#if defined(__AVX__)
# AVX support 256-bit moves.
# Copy 32 bytes:
-.big20cp:
+.big02cp:
# Check if there are at least 32 bytes remaining:
- cmpq $0x20,%rsi
- jl .big10cp # If not, skip to the 10 byte copying.
+ cmpq $0x20,rem # if (rem < 20)
+ jl .big01cp # goto big01cp // If not, skip to the 10 byte copying.
# Copy:
- vmovups (%rdi),%ymm0 # Move into a register.
- vmovups %ymm0,(%rdx) # And then back into memory.
+ vmovups (iaddr),val02 # val02 = *iaddr
+ vmovups val02,(oaddr) # *oaddr = val02
# Continue:
- addq $0x20,%rdi
- addq $0x20,%rdx
- subq $0x20,%rsi
- jmp .big20cp
+ addq $0x20,iaddr # iaddr += 0x20
+ addq $0x20,oaddr # oaddr += 0x20
+ subq $0x20,rem # rem -= 0x20
+ jmp .big02cp # goto big02cp
#endif
- # AMD64 requires SSE(2).
+ # AMD64 requires SSE(2), so we don't have to test it.
# Copy 16 bytes:
-.big10cp:
+.big01cp:
# Check if there are at least 16 bytes remaining:
- cmpq $0x10,%rsi
- jl .wrdcp
+ cmpq $0x10,rem # if (rem < 10)
+ jl .wrdcp # goto wrdcp
# Copy:
- movdqu (%rdi),%xmm0
- movdqu %xmm0,(%rdx)
+ movdqu (iaddr),val01 # val01 = *iaddr
+ movdqu val01,(oaddr) # *oaddr = val01
# Continue:
- addq $0x10,%rdi
- addq $0x10,%rdx
- subq $0x10,%rsi
- jmp .big10cp
+ addq $0x10,iaddr # iaddr += 0x10
+ addq $0x10,oaddr # oaddr += 0x10
+ subq $0x10,rem # rem -= 0x10
+ jmp .big01cp # goto big01cp
# Copy one word (8 bytes):
.wrdcp:
# Check if there are at least 8 bytes remaining:
- cmpq $0x8,%rsi
- jl .bytecp
+ cmpq $0x8,rem # if (rem < 8)
+ jl .bytecp # goto bytecp
# Copy:
- movq (%rdi),%rcx
- movq %rcx,(%rdx)
+ movq (iaddr),val8 # val8 = *iaddr
+ movq val8,(oaddr) # *oaddr = val8
# Continue:
- addq $0x8,%rdi
- addq $0x8,%rdx
- subq $0x8,%rsi
- jmp .wrdcp
+ addq $0x8,iaddr # iaddr += 0x8
+ addq $0x8,oaddr # oaddr += 0x8
+ subq $0x8,rem # rem -= 0x8
+ jmp .wrdcp # goto wrdcp
# Copy one byte:
.bytecp:
# Check if we have any bytes remaining:
- testq %rsi,%rsi
- jz .done
+ testq rem,rem # if (rem == 0x0)
+ jz .done # goto done
# Copy:
- movb (%rdi),%cl
- movb %cl,(%rdx)
+ movb (iaddr),val1 # val1 = *iaddr
+ movb val1,(oaddr) # *oaddr = val1
# Continue:
- incq %rdi
- incq %rdx
- decq %rsi
- jmp .bytecp
+ incq iaddr # ++iaddr
+ incq oaddr # ++oaddr
+ decq rem # --rem
+ jmp .bytecp # goto bytecp
- # Finish:
+ # Return:
.done:
- ret
+ ret # return
\ No newline at end of file
diff --git a/zap/source/amd64/mem/memeq.S b/zap/source/amd64/mem/memeq.S
index ba43dfc..b30a884 100644
--- a/zap/source/amd64/mem/memeq.S
+++ b/zap/source/amd64/mem/memeq.S
@@ -5,63 +5,70 @@
.globl zap_memeq
zap_memeq:
- /* rdi: Left pointer. */
- /* rsi: Number of remaining elements. */
- /* rdx: Right pointer. */
- /* rax: Current left element. */
- /* rcx: Current right element. */
+# Left pointer:
+#define laddr %rdi
+# Number of remaining elements:
+#define rem %rsi
+# Right pointer:
+#define raddr %rdx
+# Current left element:
+#define lval8 %rax
+#define lval1 %al
+# Current right element:
+#define rval8 %rcx
+#define rval1 %cl
- /* Compare words: */
+ # Compare words:
.wrdcmp:
- /* Check if there's at least one word left: */
- cmpq $0x8,%rsi
- jl .bytecmp /* If not, skip to byte checks: */
+ # Check if there's at least one word left:
+ cmpq $0x8,rem # if (rem == 8)
+ jl .bytecmp # goto bytecmp
- /* Copy the values into registers: */
- movq (%rdi),%rax
- movq (%rdx),%rcx
+ # Copy the values into registers:
+ movq (laddr),lval8 # lval8 = *laddr
+ movq (raddr),rval8 # rval8 = *raddr
- /* Check if the words are equal: */
- cmpq %rax,%rcx
- jne .neq
+ # Check if the words are equal:
+ cmpq lval8,rval8 # if (lval8 != rval8)
+ jne .neq # goto neq
- /* Mark eight more bytes as equal: */
- addq $0x8,%rdi
- addq $0x8,%rdx
- subq $0x8,%rsi
+ # Mark eight more bytes as equal:
+ addq $0x8,laddr # laddr += 0x8
+ addq $0x8,raddr # raddr += 0x8
+ subq $0x8,rem # rem -= 0x8
- /* Continue to the next word: */
- jmp .wrdcmp
+ # Continue to the next word:
+ jmp .wrdcmp # goto wrdcmp
- /* Compare bytes: */
+ # Compare bytes:
.bytecmp:
- /* Check if there are any bytes left: */
- testq %rsi,%rsi
- jz .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
+ # Check if there are any bytes left:
+ testq rem,rem # if (rem == 0x0)
+ jz .eq # goto eq // If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal.
- /* Copy the values into registers: */
- movb (%rdi),%al
- movb (%rdx),%cl
+ # Copy the values into registers:
+ movb (laddr),lval1 # lval1 = *laddr
+ movb (raddr),rval1 # rval1 = *raddr
- cmpb %al,%cl
- jne .neq
+ cmpb lval1,rval1 # if (lval1 != rval1)
+ jne .neq # goto neq
- /* Mark another byte as equal: */
- incq %rdi
- incq %rdx
- decq %rsi
+ # Mark another byte as equal:
+ incq laddr # ++laddr
+ incq raddr # ++raddr
+ decq rem # --rem
- /* Continue to the next byte: */
- jmp .bytecmp
+ # Continue to the next byte:
+ jmp .bytecmp # goto bytecmp
- /* The memory sequences have compared equal: */
+ # The memory sequences have compared equal:
.eq:
movb $0xFF,%al
- ret
+ ret # return FF
- /* The memory sequences have compared NOT equal: */
+ # The memory sequences have compared NOT equal:
.neq:
movb $0x0,%al
- ret
+ ret # return 0
diff --git a/zap/source/amd64/mem/memfill.S b/zap/source/amd64/mem/memfill.S
index e563b55..c38eec8 100644
--- a/zap/source/amd64/mem/memfill.S
+++ b/zap/source/amd64/mem/memfill.S
@@ -5,26 +5,30 @@
.globl zap_memfill
zap_memfill:
- # rdi: Address of the current element.
- # rsi: Address of the element after the last element.
- # rdx: Byte value.
+# Address of the current element:
+#define addr %rdi
+# Address of the element after the last element:
+#define afterbuf %rsi
+# Byte value:
+#define val %dl
- addq %rdi,%rsi
+ addq addr,afterbuf # afterbuf += addr // afterbuf contains the number of bytes
# Iterate over buffer:
.loop:
# Check if we have reached the final element:
- cmpq %rdi,%rsi
- je .done # Exit loop if we have.
+ cmpq addr,afterbuf # if (addr == afterbuf)
+ je .done # goto done
# Set the value of the current element:
- movb %dl,(%rdi)
+ movb val,(addr) # *addr = val
# Continue to next element:
- incq %rdi
- jmp .loop
+ incq addr # ++addr
+ jmp .loop # goto loop
# Finish:
.done:
- ret
+
+ ret # return
diff --git a/zap/source/amd64/mem/strcp.S b/zap/source/amd64/mem/strcp.S
index eb5c276..820ed96 100644
--- a/zap/source/amd64/mem/strcp.S
+++ b/zap/source/amd64/mem/strcp.S
@@ -5,35 +5,39 @@
.globl zap_strcp
zap_strcp:
- # rax: Address of the current input character.
- # rdi: Address of the first input character.
- # rsi: Address of the current output character.
- # rdx: Current character.
+# Address of the current input character:
+#define iaddr %rax
+# Address of the first input character:
+#define start %rdi
+# Address of the current output character:
+#define oaddr %rsi
+# Current character:
+#define chr %dl
- movq %rdi,%rax
+ movq start,iaddr
# Iterate over the strings:
.loop:
# Copy character:
- movb (%rax),%dl # Move it into a register...
- movb %dl,(%rsi) # ... and then back into memory.
+ movb (iaddr),chr # chr = *iaddr
+ movb chr,(oaddr) # *oaddr = chr
# Check if we have reached the null-terminator:
- testb %dl,%dl
- jz .done
+ testb chr,chr # if (chr == 0x0)
+ jz .done # goto done
# Continue to the next character:
- incq %rax
- incq %rsi
- jmp .loop
+ incq iaddr # ++iaddr
+ incq oaddr # ++oaddr
+ jmp .loop # goto loop
# Finish:
.done:
# Get the length of the (input) string:
- subq %rdi,%rax
- decq %rax # We do not count the null-terminator in the string length.
+ subq start,iaddr # iaddr -= start
+ decq iaddr # --iaddr // We do not count the null-terminator in the string length.
- ret
+ ret # return iaddr
diff --git a/zap/source/amd64/mem/streq.S b/zap/source/amd64/mem/streq.S
index 4270e7d..e054531 100644
--- a/zap/source/amd64/mem/streq.S
+++ b/zap/source/amd64/mem/streq.S
@@ -5,39 +5,43 @@
.globl zap_streq
zap_streq:
- # rdi: Address of the current left character.
- # rsi: Address of the current right character.
- # rax: Current left character.
- # rdx: Current right character.
+# Address of the current left character:
+#define laddr %rdi
+# Address of the current right character:
+#define raddr %rsi
+# Current left character:
+#define lchr %al
+# Current right character:
+#define rchr %dl
# Iterate over the strings:
.loop:
# Copy the characters into registers:
- movb (%rdi),%al
- movb (%rsi),%dl
+ movb (laddr),lchr # lchr = *laddr
+ movb (raddr),rchr # rchr = *raddr
# Check if the characters are equal:
- cmpb %al,%dl
- jne .neq # If not, the strings also aren't equal.
+ cmpb lchr,rchr # if (lchr != rchr)
+ jne .neq # goto neq // If not, the strings also aren't equal.
# Check if we have reached the null-terminator:
- testb %al,%al
- jz .eq # If so, all previous characters have compared equal, and the strings are equal.
+ testb lchr,lchr # if (lchr == 0x0)
+ jz .eq # goto eq // If so, all previous characters have compared equal, and the strings are equal.
# Continue to the next characters:
- incq %rdi
- incq %rsi
- jmp .loop
+ incq laddr # ++laddr
+ incq raddr # ++raddr
+ jmp .loop # goto loop
# The strings have compared equal:
.eq:
movb $0xFF,%al
- ret
+ ret # return FF
- /* The strings have compared unequal: */
+ # The strings have compared unequal:
.neq:
movb $0x0,%al
- ret
+ ret # return 0
diff --git a/zap/source/amd64/mem/strfill.S b/zap/source/amd64/mem/strfill.S
index 590b99f..f570a35 100644
--- a/zap/source/amd64/mem/strfill.S
+++ b/zap/source/amd64/mem/strfill.S
@@ -5,30 +5,33 @@
.globl zap_strfill
zap_strfill:
- # rdi: Address of the first character of the string.
- # rsi: Fill character.
- # rax: Address of the current character.
+# Address of the current character:
+#define addr %rax
+# Address of the first character of the string:
+#define start %rdi
+# Fill character:
+#define chr %sil
- movq %rdi,%rax
+ movq start,addr
# Iterate over string:
.loop:
# Check if we have reached the null-terminator:
- cmpb $0x0,(%rax)
- je .done # Exit loop if we have.
+ cmpb $0x0,(addr) # if (*addr == 0x0)
+ je .done # goto done
# Set the value of the current element:
- movb %sil,(%rax)
+ movb chr,(addr) # *addr = chr
# Continue to next character:
- incq %rax
- jmp .loop
+ incq addr # ++addr
+ jmp .loop # goto loop
# Finish:
.done:
# Get the length of the string:
- subq %rdi,%rax
+ subq start,addr # addr -= start
- ret
+ ret # return addr
diff --git a/zap/source/amd64/mem/strlen.S b/zap/source/amd64/mem/strlen.S
index e8739e2..a3f68b1 100644
--- a/zap/source/amd64/mem/strlen.S
+++ b/zap/source/amd64/mem/strlen.S
@@ -5,30 +5,33 @@
.globl zap_strlen
zap_strlen:
- # rax: Address of the current character.
- # rdi: Address of the first character.
- # rdx: Current character.
+# Address of the current character:
+#define addr %rax
+# Address of the first character:
+#define start %rdi
+# Current character:
+#define chr %dl
- movq %rdi,%rax
+ movq start,addr
# Iterate over the string:
.loop:
# Move the character into a register:
- movb (%rax),%dl
+ movb (addr),chr # chr = *addr
# Check if we have reached the null-terminator:
- testb %dl,%dl
- jz .done # If so, we are done.
+ testb chr,chr # if (chr == 0x0)
+ jz .done # goto done
# Continue to the next character:
- incq %rax
- jmp .loop
+ incq addr # ++addr
+ jmp .loop # goto loop
# Done:
.done:
# Get the length:
- subq %rdi,%rax
+ subq start,addr # addr -= start
- ret
+ ret # return addr
diff --git a/zap/source/amd64/mem/utf20len.S b/zap/source/amd64/mem/utf20len.S
index 5af352b..03f8254 100644
--- a/zap/source/amd64/mem/utf20len.S
+++ b/zap/source/amd64/mem/utf20len.S
@@ -5,31 +5,34 @@
.globl zap_utf20len
zap_utf20len:
- # rax: Address of the current character.
- # rdi: Address of the first character.
- # rdx: Current character.
+# Address of the current character:
+#define addr %rax
+# Address of the first character:
+#define start %rdi
+# Current character:
+#define chr %edx
- movq %rdi,%rax
+ movq start,addr
# Iterate over the string:
.loop:
# Move the character into a register:
- movl (%rax),%edx
+ movl (addr),chr # chr = *addr
# Check if we have reached the null-terminator:
- testl %edx,%edx
- jz .done # If so, we are done.
+ testl chr,chr # if (chr == 0x0)
+ jz .done # goto done
# Continue to the next character:
- addq $0x4,%rax
- jmp .loop
+ addq $0x4,addr # addr += 0x4
+ jmp .loop # goto loop
# Done:
.done:
# Get the length:
- subq %rdi,%rax
- shrq $0x2,%rax # Divide by four to get the number of doublewords rather than bytes.
+ subq start,addr # addr -= start
+ shrq $0x2,addr # addr /= 0x4 // Divide by four to get the number of doublewords rather than bytes.
- ret
+ ret # return addr