diff options
Diffstat (limited to 'rgo')
-rw-r--r-- | rgo/Makefile | 2 | ||||
-rw-r--r-- | rgo/include/rgo.h | 8 | ||||
-rw-r--r-- | rgo/src/fndbyte.S | 21 | ||||
-rw-r--r-- | rgo/src/fndchr.S | 8 | ||||
-rw-r--r-- | rgo/src/memcpy.S | 101 | ||||
-rw-r--r-- | rgo/src/memeq.S | 102 | ||||
-rw-r--r-- | rgo/src/memfill.S | 41 | ||||
-rw-r--r-- | rgo/src/strcpy.S | 22 | ||||
-rw-r--r-- | rgo/src/streq.S | 30 | ||||
-rw-r--r-- | rgo/src/strlen.S | 17 |
10 files changed, 224 insertions, 128 deletions
diff --git a/rgo/Makefile b/rgo/Makefile index 76282be..02800e0 100644 --- a/rgo/Makefile +++ b/rgo/Makefile @@ -25,7 +25,7 @@ CFLAGS = \ -Iinclude \ -g -.PHONY: clean +.PHONY: clean purge $(LIB): $(OBJS) ar r $@ $^ diff --git a/rgo/include/rgo.h b/rgo/include/rgo.h index 38c4672..276c8dc 100644 --- a/rgo/include/rgo.h +++ b/rgo/include/rgo.h @@ -10,11 +10,15 @@ You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>. */ +#if !defined(__i386__) && !defined(__x86_64__) +#error Unsupported machine architecture! Support: AMD64, IA-32. +#endif + #if !defined(rgo_ver) #if defined(__ASSEMBLER__) -#define rgo_ver $0x1 +#define rgo_ver $0x2 #else -#define rgo_ver (0x1) +#define rgo_ver (0x2) #endif #if defined(__ASSEMBLER__) diff --git a/rgo/src/fndbyte.S b/rgo/src/fndbyte.S index 2e0ed8b..c0e4382 100644 --- a/rgo/src/fndbyte.S +++ b/rgo/src/fndbyte.S @@ -15,23 +15,22 @@ .global rgo_fndbyte rgo_fndbyte: -#if defined(__x86_64__) /* - rdi: void const * ptr - rsi: size_t num - dl: uint8_t byte + void const * ptr + size_t num + uint8_t byte */ +#if defined(__x86_64__) /* rax: Address of the current element. */ movq %rdi,%rax - /* rcx: Address of the element after the last element. */ - movq %rdi,%rcx - addq %rsi,%rcx - /* r8b: Current element. */ + /* rsi: Address of the element after the last element. */ + addq %rdi,%rsi + /* rcx: Current element. */ .loop: - cmpq %rax,%rcx + cmpq %rax,%rsi je .nfnd /* We have went through the entire array without finding the byte. */ - movb (%rax),%r8b - cmpb %r8b,%dl + movb (%rax),%cl + cmpb %cl,%dl je .fnd /* We have found the byte. */ incq %rax jmp .loop diff --git a/rgo/src/fndchr.S b/rgo/src/fndchr.S index cacea5e..1008e52 100644 --- a/rgo/src/fndchr.S +++ b/rgo/src/fndchr.S @@ -15,14 +15,14 @@ .global rgo_fndchr rgo_fndchr: -#if defined(__x86_64__) /* - rdi: char const * str - sil: char chr + char const * str + char chr */ +#if defined(__x86_64__) /* rax: Address of the current character. */ movq %rdi,%rax - /* dl: Current character. */ + /* rdx: Current character. */ .loop: movb (%rax),%dl cmpb %dl,%sil diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S index 51d82f9..475da57 100644 --- a/rgo/src/memcpy.S +++ b/rgo/src/memcpy.S @@ -3,7 +3,7 @@ This file is part of rgo. - rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + rgo is free software: you can reaxstribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. @@ -15,46 +15,85 @@ .global rgo_memcpy rgo_memcpy: -#if defined(__x86_64__) /* - rdi: void const * in - rsi: size_t num - rdx: void * out + void const * in + size_t num + void * out */ - /* rcx: Address of the current input element. */ - movq %rdi,%rcx - /* r8: Address of the current output element. */ - movq %rdx,%r8 - /* r9: Number of remaining elements. */ - movq %rsi,%r9 - /* r10: Temporary. */ - /* xmm0: Temporary. */ +#if defined(__i386__) + /* eax: Address of the current input element. */ + movl 0x4(%esp),%eax + /* ecx: Number of remaining elements. */ + movl 0x8(%esp),%ecx + /* edx: Address of the current output element. */ + movl 0xC(%esp),%edx + /* ebx: Current element. */ + pushl %ebx /* ebx must be restored. */ + /* xmm0: Current element. */ +#if defined(__SSE__) .big128cpy: - cmpq $0x10,%r9 + cmpl $0x10,%ecx jl .wrdcpy - movups (%rcx),%xmm0 - movups %xmm0,(%r8) - addq $0x10,%rcx - addq $0x10,%r8 - subq $0x10,%r9 + movups (%eax),%xmm0 + movups %xmm0,(%edx) + addl $0x10,%eax + addl $0x10,%edx + subl $0x10,%ecx + jmp .big128cpy +#endif +.wrdcpy: + cmpl $0x4,%ecx + jl .bytecpy + movl (%eax),%ebx + movl %ebx,(%edx) + addl $0x4,%eax + addl $0x4,%edx + subl $0x4,%ecx + jmp .wrdcpy +.bytecpy: + testl %ecx,%ecx + jz .done + movb (%eax),%bl + movb %bl,(%edx) + incl %eax + incl %edx + decl %ecx + jmp .bytecpy +.done: + popl %ebx + ret +#elif defined(__x86_64__) + /* rdi: Address of the current input element. */ + /* rsi: Number of remaining elements. */ + /* rdx: Address of the current output element. */ + /* rcx: Current element. */ + /* xmm0: Current element. */ +.big128cpy: + cmpq $0x10,%rsi + jl .wrdcpy + movups (%rdi),%xmm0 + movups %xmm0,(%rdx) + addq $0x10,%rdi + addq $0x10,%rdx + subq $0x10,%rsi jmp .big128cpy .wrdcpy: - cmpq $0x8,%r9 + cmpq $0x8,%rsi jl .bytecpy - movq (%rcx),%r10 - movq %r10,(%r8) - addq $0x8,%rcx - addq $0x8,%r8 - subq $0x8,%r9 + movq (%rdi),%rcx + movq %rcx,(%rdx) + addq $0x8,%rdi + addq $0x8,%rdx + subq $0x8,%rsi jmp .wrdcpy .bytecpy: - testq %r9,%r9 + testq %rsi,%rsi jz .done - movb (%rcx),%r10b - movb %r10b,(%r8) - incq %rcx - incq %r8 - decq %r9 + movb (%rdi),%cl + movb %cl,(%rdx) + incq %rdi + incq %rdx + decq %rsi jmp .bytecpy .done: ret diff --git a/rgo/src/memeq.S b/rgo/src/memeq.S index c3a9a63..d106804 100644 --- a/rgo/src/memeq.S +++ b/rgo/src/memeq.S @@ -3,7 +3,7 @@ This file is part of rgo. - rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + rgo is free software: you can reaxstribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. @@ -15,46 +15,86 @@ .global rgo_memeq rgo_memeq: -#if defined(__x86_64__) /* - rdi: void const * lptr - rsi: size_t num - rdx: void const * rptr + void const * lptr + size_t num + void const * rptr */ - /* rcx: Address of the current left element. */ - movq %rdi,%rcx - /* r8: Address of the current right element. */ - movq %rdx,%r8 - /* r9: Number of remaining elements. */ - movq %rsi,%r9 - /* r10: Temporary. */ - /* r11: Temporary. */ +#if defined(__i386__) + /* eax: Address of the current left element. */ + movl 0x4(%esp),%eax + /* ecx: Number of remaining elements. */ + movl 0x8(%esp),%ecx + /* edx: Address of the current right element. */ + movl 0xC(%esp),%edx + /* ebx: Current left element. */ + pushl %ebx + /* ebx/esi: Current right element. */ + pushl %esi .wrdeq: - cmpq $0x8,%r9 + cmpl $0x4,%ecx jl .byteeq - movq (%rcx),%r10 - movq (%r8),%r11 - cmpq %r10,%r11 - jz .neq - addq $0x8,%rcx - addq $0x8,%r8 - subq $0x8,%r9 + movl (%eax),%ebx + movl (%edx),%esi + cmpl %ebx,%esi + jne .neq + addl $0x4,%eax + addl $0x4,%edx + subl $0x4,%ecx + jmp .wrdeq +.byteeq: + testl %ecx,%ecx + jne .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */ + movb (%eax),%bl + movb (%edx),%bh + cmpb %bl,%bh + jne .neq + incl %eax + incl %edx + decl %ecx + jmp .byteeq +.eq: + popl %ebx + popl %esi + movb $0x1,%al + ret +.neq: + popl %ebx + popl %esi + movb $0x0,%al + ret +#elif defined(__x86_64__) + /* rdi: Address of the current left element. */ + /* rsi: Number of remaining elements. */ + /* rdx: Address of the current right element. */ + /* rax: Current left element. */ + /* rcx: Current right element. */ +.wrdeq: + cmpq $0x8,%rsi + jl .byteeq + movq (%rdi),%rax + movq (%rdx),%rcx + cmpq %rax,%rcx + jne .neq + addq $0x8,%rdi + addq $0x8,%rdx + subq $0x8,%rsi jmp .wrdeq .byteeq: - testq %r9,%r9 - jz .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */ - movb (%rcx),%r10b - movb (%r8),%r11b - cmpb %r10b,%r11b + testq %rsi,%rsi + jne .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */ + movb (%rdi),%al + movb (%rdx),%cl + cmpb %al,%cl jne .neq - incq %rcx - incq %r8 - decq %r9 + incq %rdi + incq %rdx + decq %rsi jmp .byteeq .eq: - mov $0x1,%rax + movb $0x1,%al ret .neq: - mov $0x0,%rax + movb $0x0,%al ret #endif diff --git a/rgo/src/memfill.S b/rgo/src/memfill.S index d131c48..c22547e 100644 --- a/rgo/src/memfill.S +++ b/rgo/src/memfill.S @@ -15,24 +15,39 @@ .global rgo_memfill rgo_memfill: -#if defined(__x86_64__) /* - rdi: void const * ptr - rsi: size_t num - dl: int_least8_t val + void const * ptr + size_t num + uint8_t val */ - /* We don't need to preserve any of the registers we use according to the ABI. */ - /* rcx: Address of the current element. */ +#if defined(__i386__) + /* eax: Address of the current element. */ + movl 0x4(%esp),%eax + /* ecx: Address of the element after the last element. */ + movl 0x4(%esp),%ecx + addl 0x8(%esp),%ecx + /* rdx: Byte value. */ + movb 0xC(%esp),%dl +.loop: + cmpl %eax,%ecx + je .done /* Exit loop if we have reached the final element. */ + movb %dl,(%eax) + incl %eax + jmp .loop /* Continue to next element. */ +.done: + ret +#elif defined(__x86_64__) + /* rax: Address of the current element. */ + movq %rdi,%rax + /* rax: Address of the element after the last element. */ movq %rdi,%rcx - /* rcx: Address of the element after the last element. */ - movq %rdi,%r8 - addq %rsi,%r8 + addq %rsi,%rcx .loop: - cmpq %r8,%rcx + cmpq %rcx,%rax je .done /* Exit loop if we have reached the final element. */ - movb %dl,(%rcx) - incq %rcx /* Continue to next element. */ - jmp .loop + movb %dl,(%rax) + incq %rax + jmp .loop /* Continue to next element. */ .done: ret #endif diff --git a/rgo/src/strcpy.S b/rgo/src/strcpy.S index f2fbc36..8750295 100644 --- a/rgo/src/strcpy.S +++ b/rgo/src/strcpy.S @@ -10,28 +10,28 @@ You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>. */ -#include <rgo.h> +#indlude <rgo.h> .global rgo_strcpy rgo_strcpy: -#if defined(__x86_64__) /* - rdi: char const * lstr - rsi: char const * rstr + char const * lstr + char const * rstr */ +#if defined(__x86_64__) /* rax: Address of the current input character. */ movq %rdi,%rax - /* rdx: Address of the current output character. */ - movq %rsi,%rdx - /* cl: Current character. */ + /* rsi: Address of the current output character. */ + movq %rsi,%rsi + /* rdx: Current character. */ .loop: - movb (%rax),%cl - movb %cl,(%rdx) - testb %cl,%cl + movb (%rax),%dl + movb %dl,(%rsi) + testb %dl,%dl jz .done incq %rax - incq %rdx + incq %rsi jmp .loop .done: subq %rdi,%rax diff --git a/rgo/src/streq.S b/rgo/src/streq.S index f530d54..8969e41 100644 --- a/rgo/src/streq.S +++ b/rgo/src/streq.S @@ -15,26 +15,26 @@ .global rgo_streq rgo_streq: -#if defined(__x86_64__) /* - rdi: char const * lstr - rsi: char const * rstr + char const * lstr + char const * rstr */ - /* rdx: Address of the current input character. */ - movq %rdi,%rdx - /* rcx: Address of the current output character. */ - movq %rsi,%rcx - /* r8b: Current input character. */ - /* r9b: Current output character. */ +#if defined(__x86_64__) + /* rax: Address of the current input character. */ + movq %rdi,%rax + /* rsi: Address of the current output character. */ + movq %rsi,%rsi + /* rdx: Current input character. */ + /* rcx: Current output character. */ .loop: - movb (%rdx),%r8b - movb (%rcx),%r9b - cmpb %r8b,%r9b + movb (%rax),%dl + movb (%rsi),%cl + cmpb %dl,%cl jne .neq - testb %r8b,%r8b /* Check if we have reached the null-terminator. */ + testb %dl,%dl /* Check if we have reached the null-terminator. */ jz .eq - incq %rdx - incq %rcx + incq %rax + incq %rsi jmp .loop .eq: mov $0x1,%rax diff --git a/rgo/src/strlen.S b/rgo/src/strlen.S index 7508be9..d7ad03e 100644 --- a/rgo/src/strlen.S +++ b/rgo/src/strlen.S @@ -3,7 +3,7 @@ This file is part of rgo. - rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either veraxon 3 of the License, or (at your option) any later veraxon. rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. @@ -15,21 +15,20 @@ .global rgo_strlen rgo_strlen: -#if defined(__x86_64__) /* - rdi: char const * str + char const * str */ - /* rsi: Address of the current character. */ - movq %rdi,%rsi - /* dl: Current character. */ +#if defined(__x86_64__) + /* rax: Address of the current character. */ + movq %rdi,%rax + /* rdx: Current character. */ .loop: - movb (%rsi),%dl + movb (%rax),%dl testb %dl,%dl jz .done /* Exit loop if we have reached the null-terminator. */ - incq %rsi /* Continue to the next character. */ + incq %rax /* Continue to the next character. */ jmp .loop .done: - movq %rsi,%rax subq %rdi,%rax ret #endif |