diff options
Diffstat (limited to 'zap')
-rw-r--r-- | zap/GNUmakefile | 2 | ||||
-rw-r--r-- | zap/source/amd64/mem/fndbyte.S | 42 | ||||
-rw-r--r-- | zap/source/amd64/mem/fndchr.S | 37 | ||||
-rw-r--r-- | zap/source/amd64/mem/foreach.S | 54 | ||||
-rw-r--r-- | zap/source/amd64/mem/memcp.S | 91 | ||||
-rw-r--r-- | zap/source/amd64/mem/memeq.S | 87 | ||||
-rw-r--r-- | zap/source/amd64/mem/memfill.S | 24 | ||||
-rw-r--r-- | zap/source/amd64/mem/strcp.S | 34 | ||||
-rw-r--r-- | zap/source/amd64/mem/streq.S | 36 | ||||
-rw-r--r-- | zap/source/amd64/mem/strfill.S | 25 | ||||
-rw-r--r-- | zap/source/amd64/mem/strlen.S | 25 | ||||
-rw-r--r-- | zap/source/amd64/mem/utf20len.S | 27 |
12 files changed, 266 insertions, 218 deletions
diff --git a/zap/GNUmakefile b/zap/GNUmakefile index d3d83ef..e0de7c4 100644 --- a/zap/GNUmakefile +++ b/zap/GNUmakefile @@ -65,7 +65,7 @@ LIB = libzap.a .PHONY: clean install purge $(LIB): $(OBJS) - $(AR) r $@ $^ + $(AR) r $(@) $(^) install: $(LIB) mkdir -pm755 "$(HDRDIR)/zap" diff --git a/zap/source/amd64/mem/fndbyte.S b/zap/source/amd64/mem/fndbyte.S index 9298b73..af2ee84 100644 --- a/zap/source/amd64/mem/fndbyte.S +++ b/zap/source/amd64/mem/fndbyte.S @@ -5,40 +5,46 @@ .globl zap_fndbyte zap_fndbyte: - # rax: Address of the current element. - # rdi: Address of the first element. - # rsi: Address of the element after the last element. - # rdx: Byte value. - # rcx: Current byte. +# Address of the current element: +#define addr %rax +# Address of the first element: +#define start %rdi +# Address of the element after the last element: +#define afterbuf %rsi +# Byte value: +#define cmp %dl +# Current byte: +#define val %cl - movq %rdi,%rax + movq start,addr # addr = start - addq %rdi,%rsi + addq start,afterbuf # afterbuf += start # Iterate over the array: .loop: # Check if we have reached the end of the array: - cmpq %rax,%rsi - je .nfnd + cmpq addr,afterbuf # if (addr == afterbuf) + je .nfnd # goto nfnd # Check if we have found the byte value: - movb (%rax),%cl - cmpb %cl,%dl - je .fnd + movb (addr),val # val = *addr + cmpb val,cmp # if (val == cmp) + je .fnd # goto fnd # Continue to the next byte: - incq %rax - jmp .loop + incq addr # ++addr + jmp .loop # goto loop # Found: .fnd: - subq %rdi,%rax - ret + # Get the offset of the byte: + subq start,addr # addr -= start + ret # return addr # Not found: .nfnd: - movq $0xFFFFFFFFFFFFFFFF,%rax - ret + movq $0xFFFFFFFFFFFFFFFF,addr # addr = FFFFFFFFFFFFFFFF + ret # return addr diff --git a/zap/source/amd64/mem/fndchr.S b/zap/source/amd64/mem/fndchr.S index 1078a10..67e0ea1 100644 --- a/zap/source/amd64/mem/fndchr.S +++ b/zap/source/amd64/mem/fndchr.S @@ -5,39 +5,44 @@ .globl zap_fndchr zap_fndchr: - # rdi: Address of the first character. - # rsi: Character to be found. - # rax: Address of the current character. - # rdx: Current character. +# Address of the first character: +#define start %rdi +# Character to be found: +#define cmp %sil +# Address of the current character: +#define addr %rax +# Current character: +#define chr %dl - movq %rdi,%rax + movq start,addr # addr = start # Iterate over the string: .loop: # Copy the character into a register: - movb (%rax),%dl + movb (addr),chr # chr = *addr # Check if we have found the character: - cmpb %dl,%sil - je .fnd + cmpb chr,cmp # if (chr == cmp) + je .fnd # goto fnd # Check if we have found the null-terminator: - testb %dl,%dl - jz .nfnd + testb chr,chr # if (chr == 0x0) + jz .nfnd # goto nfnd # Continue to the next character: - incq %rax - jmp .loop + incq addr # ++addr + jmp .loop # goto loop # Found: .fnd: - subq %rdi,%rax - ret + # Get the offset of the character: + subq start,addr # addr -= start + ret # return addr # Not found: .nfnd: - movq $0xFFFFFFFFFFFFFFFF,%rax - ret + movq $0xFFFFFFFFFFFFFFFF,addr # addr = FFFFFFFFFFFFFFFF + ret # return addr diff --git a/zap/source/amd64/mem/foreach.S b/zap/source/amd64/mem/foreach.S index f19bcfa..6766933 100644 --- a/zap/source/amd64/mem/foreach.S +++ b/zap/source/amd64/mem/foreach.S @@ -5,51 +5,55 @@ .globl zap_foreach zap_foreach: - # rbx: Address of the current element. - # r12: Address of the element after the last input element. - # r13: Size of each input element. - # r14: Address of the function. +# Address of the current element: +#define addr %rbx +# Address of the element after the last input element: +#define afterbuf %r12 +# Size of each input element: +#define sz %r13 +# Address of the function: +#define fn %r14 # We're gonna use callee-saved registers for storing values so they don't get overwritten with each function call. # Push the callee-saved registers: - pushq %rbx - pushq %r12 - pushq %r13 - pushq %r14 + pushq addr + pushq afterbuf + pushq sz + pushq fn # Move registers into place: - movq %rdi,%rbx - movq %rsi,%r13 - movq %rcx,%r14 + movq %rdi,addr + movq %rsi,sz + movq %rcx,fn # Get the one-past-the-end address: - movq %rdx,%r12 - imulq %r13,%r12 # Calculate the array size in bytes (sz * num). We're using signed multiply because the equivalent using the unsigned instruction would use more instructions. - addq %rbx,%r12 + movq %rdx,afterbuf + imulq sz,afterbuf # afterbuf *= sz // Calculate the array size in bytes (sz * num). We're using signed multiply because the equivalent using the unsigned instruction would use more instructions. + addq addr,afterbuf # afterbuf += addr # Iterate through the array: .loop: # Check if we have reached the one-past-the-end address: - cmpq %rbx,%r12 - je .done + cmpq addr,afterbuf # if (addr == afterbuf) + je .done # goto done # Call the provided function: - movq %rbx,%rdi # Provide the current address to the function. - call *%r14 # We don't need to save any registers for this as we only use callee-saved registers. + movq addr,%rdi # // Provide the current address to the function. + call *fn # fn(addr) // We don't need to save any registers for this as we only use callee-saved registers. # Continue to the next element: - addq %r13,%rbx - jmp .loop + addq sz,addr # addr += sz + jmp .loop # goto loop # Finish: .done: # Restore the callee-saved registers: - popq %r14 - popq %r13 - popq %r12 - popq %rbx + popq fn + popq sz + popq afterbuf + popq addr - ret + ret # return diff --git a/zap/source/amd64/mem/memcp.S b/zap/source/amd64/mem/memcp.S index 5691446..ac310ae 100644 --- a/zap/source/amd64/mem/memcp.S +++ b/zap/source/amd64/mem/memcp.S @@ -5,90 +5,95 @@ .globl zap_memcp zap_memcp: - # rdi: Address of the current input element. - # rsi: Number of remaining elements. - # rdx: Address of the current output element. - # rcx: Current element. - # xmm0: Current element. - # ymm0: Current element. +# Address of the current input element: +#define iaddr %rdi +# Number of remaining bytes: +#define rem %rsi +# Address of the current output element: +#define oaddr %rdx +# Current element: +#define val1 %cl +#define val8 %rcx +#define val01 %xmm0 +#define val02 %ymm0 #if defined(__AVX__) # AVX support 256-bit moves. # Copy 32 bytes: -.big20cp: +.big02cp: # Check if there are at least 32 bytes remaining: - cmpq $0x20,%rsi - jl .big10cp # If not, skip to the 10 byte copying. + cmpq $0x20,rem # if (rem < 20) + jl .big01cp # goto big01cp // If not, skip to the 10 byte copying. # Copy: - vmovups (%rdi),%ymm0 # Move into a register. - vmovups %ymm0,(%rdx) # And then back into memory. + vmovups (iaddr),val02 # val02 = *iaddr + vmovups val02,(oaddr) # *oaddr = val02 # Continue: - addq $0x20,%rdi - addq $0x20,%rdx - subq $0x20,%rsi - jmp .big20cp + addq $0x20,iaddr # iaddr += 0x20 + addq $0x20,oaddr # oaddr += 0x20 + subq $0x20,rem # rem -= 0x20 + jmp .big02cp # goto big02cp #endif - # AMD64 requires SSE(2). + # AMD64 requires SSE(2), so we don't have to test it. # Copy 16 bytes: -.big10cp: +.big01cp: # Check if there are at least 16 bytes remaining: - cmpq $0x10,%rsi - jl .wrdcp + cmpq $0x10,rem # if (rem < 10) + jl .wrdcp # goto wrdcp # Copy: - movdqu (%rdi),%xmm0 - movdqu %xmm0,(%rdx) + movdqu (iaddr),val01 # val01 = *iaddr + movdqu val01,(oaddr) # *oaddr = val01 # Continue: - addq $0x10,%rdi - addq $0x10,%rdx - subq $0x10,%rsi - jmp .big10cp + addq $0x10,iaddr # iaddr += 0x10 + addq $0x10,oaddr # oaddr += 0x10 + subq $0x10,rem # rem -= 0x10 + jmp .big01cp # goto big01cp # Copy one word (8 bytes): .wrdcp: # Check if there are at least 8 bytes remaining: - cmpq $0x8,%rsi - jl .bytecp + cmpq $0x8,rem # if (rem < 8) + jl .bytecp # goto bytecp # Copy: - movq (%rdi),%rcx - movq %rcx,(%rdx) + movq (iaddr),val8 # val8 = *iaddr + movq val8,(oaddr) # *oaddr = val8 # Continue: - addq $0x8,%rdi - addq $0x8,%rdx - subq $0x8,%rsi - jmp .wrdcp + addq $0x8,iaddr # iaddr += 0x8 + addq $0x8,oaddr # oaddr += 0x8 + subq $0x8,rem # rem -= 0x8 + jmp .wrdcp # goto wrdcp # Copy one byte: .bytecp: # Check if we have any bytes remaining: - testq %rsi,%rsi - jz .done + testq rem,rem # if (rem == 0x0) + jz .done # goto done # Copy: - movb (%rdi),%cl - movb %cl,(%rdx) + movb (iaddr),val1 # val1 = *iaddr + movb val1,(oaddr) # *oaddr = val1 # Continue: - incq %rdi - incq %rdx - decq %rsi - jmp .bytecp + incq iaddr # ++iaddr + incq oaddr # ++oaddr + decq rem # --rem + jmp .bytecp # goto bytecp - # Finish: + # Return: .done: - ret + ret # return
\ No newline at end of file diff --git a/zap/source/amd64/mem/memeq.S b/zap/source/amd64/mem/memeq.S index ba43dfc..b30a884 100644 --- a/zap/source/amd64/mem/memeq.S +++ b/zap/source/amd64/mem/memeq.S @@ -5,63 +5,70 @@ .globl zap_memeq zap_memeq: - /* rdi: Left pointer. */ - /* rsi: Number of remaining elements. */ - /* rdx: Right pointer. */ - /* rax: Current left element. */ - /* rcx: Current right element. */ +# Left pointer: +#define laddr %rdi +# Number of remaining elements: +#define rem %rsi +# Right pointer: +#define raddr %rdx +# Current left element: +#define lval8 %rax +#define lval1 %al +# Current right element: +#define rval8 %rcx +#define rval1 %cl - /* Compare words: */ + # Compare words: .wrdcmp: - /* Check if there's at least one word left: */ - cmpq $0x8,%rsi - jl .bytecmp /* If not, skip to byte checks: */ + # Check if there's at least one word left: + cmpq $0x8,rem # if (rem == 8) + jl .bytecmp # goto bytecmp - /* Copy the values into registers: */ - movq (%rdi),%rax - movq (%rdx),%rcx + # Copy the values into registers: + movq (laddr),lval8 # lval8 = *laddr + movq (raddr),rval8 # rval8 = *raddr - /* Check if the words are equal: */ - cmpq %rax,%rcx - jne .neq + # Check if the words are equal: + cmpq lval8,rval8 # if (lval8 != rval8) + jne .neq # goto neq - /* Mark eight more bytes as equal: */ - addq $0x8,%rdi - addq $0x8,%rdx - subq $0x8,%rsi + # Mark eight more bytes as equal: + addq $0x8,laddr # laddr += 0x8 + addq $0x8,raddr # raddr += 0x8 + subq $0x8,rem # rem -= 0x8 - /* Continue to the next word: */ - jmp .wrdcmp + # Continue to the next word: + jmp .wrdcmp # goto wrdcmp - /* Compare bytes: */ + # Compare bytes: .bytecmp: - /* Check if there are any bytes left: */ - testq %rsi,%rsi - jz .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */ + # Check if there are any bytes left: + testq rem,rem # if (rem == 0x0) + jz .eq # goto eq // If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. - /* Copy the values into registers: */ - movb (%rdi),%al - movb (%rdx),%cl + # Copy the values into registers: + movb (laddr),lval1 # lval1 = *laddr + movb (raddr),rval1 # rval1 = *raddr - cmpb %al,%cl - jne .neq + cmpb lval1,rval1 # if (lval1 != rval1) + jne .neq # goto neq - /* Mark another byte as equal: */ - incq %rdi - incq %rdx - decq %rsi + # Mark another byte as equal: + incq laddr # ++laddr + incq raddr # ++raddr + decq rem # --rem - /* Continue to the next byte: */ - jmp .bytecmp + # Continue to the next byte: + jmp .bytecmp # goto bytecmp - /* The memory sequences have compared equal: */ + # The memory sequences have compared equal: .eq: movb $0xFF,%al - ret + ret # return FF - /* The memory sequences have compared NOT equal: */ + # The memory sequences have compared NOT equal: .neq: movb $0x0,%al - ret + ret # return 0 diff --git a/zap/source/amd64/mem/memfill.S b/zap/source/amd64/mem/memfill.S index e563b55..c38eec8 100644 --- a/zap/source/amd64/mem/memfill.S +++ b/zap/source/amd64/mem/memfill.S @@ -5,26 +5,30 @@ .globl zap_memfill zap_memfill: - # rdi: Address of the current element. - # rsi: Address of the element after the last element. - # rdx: Byte value. +# Address of the current element: +#define addr %rdi +# Address of the element after the last element: +#define afterbuf %rsi +# Byte value: +#define val %dl - addq %rdi,%rsi + addq addr,afterbuf # afterbuf += addr // afterbuf contains the number of bytes # Iterate over buffer: .loop: # Check if we have reached the final element: - cmpq %rdi,%rsi - je .done # Exit loop if we have. + cmpq addr,afterbuf # if (addr == afterbuf) + je .done # goto done # Set the value of the current element: - movb %dl,(%rdi) + movb val,(addr) # *addr = val # Continue to next element: - incq %rdi - jmp .loop + incq addr # ++addr + jmp .loop # goto loop # Finish: .done: - ret + + ret # return diff --git a/zap/source/amd64/mem/strcp.S b/zap/source/amd64/mem/strcp.S index eb5c276..820ed96 100644 --- a/zap/source/amd64/mem/strcp.S +++ b/zap/source/amd64/mem/strcp.S @@ -5,35 +5,39 @@ .globl zap_strcp zap_strcp: - # rax: Address of the current input character. - # rdi: Address of the first input character. - # rsi: Address of the current output character. - # rdx: Current character. +# Address of the current input character: +#define iaddr %rax +# Address of the first input character: +#define start %rdi +# Address of the current output character: +#define oaddr %rsi +# Current character: +#define chr %dl - movq %rdi,%rax + movq start,iaddr # Iterate over the strings: .loop: # Copy character: - movb (%rax),%dl # Move it into a register... - movb %dl,(%rsi) # ... and then back into memory. + movb (iaddr),chr # chr = *iaddr + movb chr,(oaddr) # *oaddr = chr # Check if we have reached the null-terminator: - testb %dl,%dl - jz .done + testb chr,chr # if (chr == 0x0) + jz .done # goto done # Continue to the next character: - incq %rax - incq %rsi - jmp .loop + incq iaddr # ++iaddr + incq oaddr # ++oaddr + jmp .loop # goto loop # Finish: .done: # Get the length of the (input) string: - subq %rdi,%rax - decq %rax # We do not count the null-terminator in the string length. + subq start,iaddr # iaddr -= start + decq iaddr # --iaddr // We do not count the null-terminator in the string length. - ret + ret # return iaddr diff --git a/zap/source/amd64/mem/streq.S b/zap/source/amd64/mem/streq.S index 4270e7d..e054531 100644 --- a/zap/source/amd64/mem/streq.S +++ b/zap/source/amd64/mem/streq.S @@ -5,39 +5,43 @@ .globl zap_streq zap_streq: - # rdi: Address of the current left character. - # rsi: Address of the current right character. - # rax: Current left character. - # rdx: Current right character. +# Address of the current left character: +#define laddr %rdi +# Address of the current right character: +#define raddr %rsi +# Current left character: +#define lchr %al +# Current right character: +#define rchr %dl # Iterate over the strings: .loop: # Copy the characters into registers: - movb (%rdi),%al - movb (%rsi),%dl + movb (laddr),lchr # lchr = *laddr + movb (raddr),rchr # rchr = *raddr # Check if the characters are equal: - cmpb %al,%dl - jne .neq # If not, the strings also aren't equal. + cmpb lchr,rchr # if (lchr != rchr) + jne .neq # goto neq // If not, the strings also aren't equal. # Check if we have reached the null-terminator: - testb %al,%al - jz .eq # If so, all previous characters have compared equal, and the strings are equal. + testb lchr,lchr # if (lchr == 0x0) + jz .eq # goto eq // If so, all previous characters have compared equal, and the strings are equal. # Continue to the next characters: - incq %rdi - incq %rsi - jmp .loop + incq laddr # ++laddr + incq raddr # ++raddr + jmp .loop # goto loop # The strings have compared equal: .eq: movb $0xFF,%al - ret + ret # return FF - /* The strings have compared unequal: */ + # The strings have compared unequal: .neq: movb $0x0,%al - ret + ret # return 0 diff --git a/zap/source/amd64/mem/strfill.S b/zap/source/amd64/mem/strfill.S index 590b99f..f570a35 100644 --- a/zap/source/amd64/mem/strfill.S +++ b/zap/source/amd64/mem/strfill.S @@ -5,30 +5,33 @@ .globl zap_strfill zap_strfill: - # rdi: Address of the first character of the string. - # rsi: Fill character. - # rax: Address of the current character. +# Address of the current character: +#define addr %rax +# Address of the first character of the string: +#define start %rdi +# Fill character: +#define chr %sil - movq %rdi,%rax + movq start,addr # Iterate over string: .loop: # Check if we have reached the null-terminator: - cmpb $0x0,(%rax) - je .done # Exit loop if we have. + cmpb $0x0,(addr) # if (*addr == 0x0) + je .done # goto done # Set the value of the current element: - movb %sil,(%rax) + movb chr,(addr) # *addr = chr # Continue to next character: - incq %rax - jmp .loop + incq addr # ++addr + jmp .loop # goto loop # Finish: .done: # Get the length of the string: - subq %rdi,%rax + subq start,addr # addr -= start - ret + ret # return addr diff --git a/zap/source/amd64/mem/strlen.S b/zap/source/amd64/mem/strlen.S index e8739e2..a3f68b1 100644 --- a/zap/source/amd64/mem/strlen.S +++ b/zap/source/amd64/mem/strlen.S @@ -5,30 +5,33 @@ .globl zap_strlen zap_strlen: - # rax: Address of the current character. - # rdi: Address of the first character. - # rdx: Current character. +# Address of the current character: +#define addr %rax +# Address of the first character: +#define start %rdi +# Current character: +#define chr %dl - movq %rdi,%rax + movq start,addr # Iterate over the string: .loop: # Move the character into a register: - movb (%rax),%dl + movb (addr),chr # chr = *addr # Check if we have reached the null-terminator: - testb %dl,%dl - jz .done # If so, we are done. + testb chr,chr # if (chr == 0x0) + jz .done # goto done # Continue to the next character: - incq %rax - jmp .loop + incq addr # ++addr + jmp .loop # goto loop # Done: .done: # Get the length: - subq %rdi,%rax + subq start,addr # addr -= start - ret + ret # return addr diff --git a/zap/source/amd64/mem/utf20len.S b/zap/source/amd64/mem/utf20len.S index 5af352b..03f8254 100644 --- a/zap/source/amd64/mem/utf20len.S +++ b/zap/source/amd64/mem/utf20len.S @@ -5,31 +5,34 @@ .globl zap_utf20len zap_utf20len: - # rax: Address of the current character. - # rdi: Address of the first character. - # rdx: Current character. +# Address of the current character: +#define addr %rax +# Address of the first character: +#define start %rdi +# Current character: +#define chr %edx - movq %rdi,%rax + movq start,addr # Iterate over the string: .loop: # Move the character into a register: - movl (%rax),%edx + movl (addr),chr # chr = *addr # Check if we have reached the null-terminator: - testl %edx,%edx - jz .done # If so, we are done. + testl chr,chr # if (chr == 0x0) + jz .done # goto done # Continue to the next character: - addq $0x4,%rax - jmp .loop + addq $0x4,addr # addr += 0x4 + jmp .loop # goto loop # Done: .done: # Get the length: - subq %rdi,%rax - shrq $0x2,%rax # Divide by four to get the number of doublewords rather than bytes. + subq start,addr # addr -= start + shrq $0x2,addr # addr /= 0x4 // Divide by four to get the number of doublewords rather than bytes. - ret + ret # return addr |