summaryrefslogtreecommitdiff
path: root/rgo
diff options
context:
space:
mode:
Diffstat (limited to 'rgo')
-rw-r--r--rgo/Makefile2
-rw-r--r--rgo/include/rgo.h8
-rw-r--r--rgo/src/fndbyte.S21
-rw-r--r--rgo/src/fndchr.S8
-rw-r--r--rgo/src/memcpy.S101
-rw-r--r--rgo/src/memeq.S102
-rw-r--r--rgo/src/memfill.S41
-rw-r--r--rgo/src/strcpy.S22
-rw-r--r--rgo/src/streq.S30
-rw-r--r--rgo/src/strlen.S17
10 files changed, 224 insertions, 128 deletions
diff --git a/rgo/Makefile b/rgo/Makefile
index 76282be..02800e0 100644
--- a/rgo/Makefile
+++ b/rgo/Makefile
@@ -25,7 +25,7 @@ CFLAGS = \
-Iinclude \
-g
-.PHONY: clean
+.PHONY: clean purge
$(LIB): $(OBJS)
ar r $@ $^
diff --git a/rgo/include/rgo.h b/rgo/include/rgo.h
index 38c4672..276c8dc 100644
--- a/rgo/include/rgo.h
+++ b/rgo/include/rgo.h
@@ -10,11 +10,15 @@
You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>.
*/
+#if !defined(__i386__) && !defined(__x86_64__)
+#error Unsupported machine architecture! Support: AMD64, IA-32.
+#endif
+
#if !defined(rgo_ver)
#if defined(__ASSEMBLER__)
-#define rgo_ver $0x1
+#define rgo_ver $0x2
#else
-#define rgo_ver (0x1)
+#define rgo_ver (0x2)
#endif
#if defined(__ASSEMBLER__)
diff --git a/rgo/src/fndbyte.S b/rgo/src/fndbyte.S
index 2e0ed8b..c0e4382 100644
--- a/rgo/src/fndbyte.S
+++ b/rgo/src/fndbyte.S
@@ -15,23 +15,22 @@
.global rgo_fndbyte
rgo_fndbyte:
-#if defined(__x86_64__)
/*
- rdi: void const * ptr
- rsi: size_t num
- dl: uint8_t byte
+ void const * ptr
+ size_t num
+ uint8_t byte
*/
+#if defined(__x86_64__)
/* rax: Address of the current element. */
movq %rdi,%rax
- /* rcx: Address of the element after the last element. */
- movq %rdi,%rcx
- addq %rsi,%rcx
- /* r8b: Current element. */
+ /* rsi: Address of the element after the last element. */
+ addq %rdi,%rsi
+ /* rcx: Current element. */
.loop:
- cmpq %rax,%rcx
+ cmpq %rax,%rsi
je .nfnd /* We have went through the entire array without finding the byte. */
- movb (%rax),%r8b
- cmpb %r8b,%dl
+ movb (%rax),%cl
+ cmpb %cl,%dl
je .fnd /* We have found the byte. */
incq %rax
jmp .loop
diff --git a/rgo/src/fndchr.S b/rgo/src/fndchr.S
index cacea5e..1008e52 100644
--- a/rgo/src/fndchr.S
+++ b/rgo/src/fndchr.S
@@ -15,14 +15,14 @@
.global rgo_fndchr
rgo_fndchr:
-#if defined(__x86_64__)
/*
- rdi: char const * str
- sil: char chr
+ char const * str
+ char chr
*/
+#if defined(__x86_64__)
/* rax: Address of the current character. */
movq %rdi,%rax
- /* dl: Current character. */
+ /* rdx: Current character. */
.loop:
movb (%rax),%dl
cmpb %dl,%sil
diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S
index 51d82f9..475da57 100644
--- a/rgo/src/memcpy.S
+++ b/rgo/src/memcpy.S
@@ -3,7 +3,7 @@
This file is part of rgo.
- rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ rgo is free software: you can reaxstribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
@@ -15,46 +15,85 @@
.global rgo_memcpy
rgo_memcpy:
-#if defined(__x86_64__)
/*
- rdi: void const * in
- rsi: size_t num
- rdx: void * out
+ void const * in
+ size_t num
+ void * out
*/
- /* rcx: Address of the current input element. */
- movq %rdi,%rcx
- /* r8: Address of the current output element. */
- movq %rdx,%r8
- /* r9: Number of remaining elements. */
- movq %rsi,%r9
- /* r10: Temporary. */
- /* xmm0: Temporary. */
+#if defined(__i386__)
+ /* eax: Address of the current input element. */
+ movl 0x4(%esp),%eax
+ /* ecx: Number of remaining elements. */
+ movl 0x8(%esp),%ecx
+ /* edx: Address of the current output element. */
+ movl 0xC(%esp),%edx
+ /* ebx: Current element. */
+ pushl %ebx /* ebx must be restored. */
+ /* xmm0: Current element. */
+#if defined(__SSE__)
.big128cpy:
- cmpq $0x10,%r9
+ cmpl $0x10,%ecx
jl .wrdcpy
- movups (%rcx),%xmm0
- movups %xmm0,(%r8)
- addq $0x10,%rcx
- addq $0x10,%r8
- subq $0x10,%r9
+ movups (%eax),%xmm0
+ movups %xmm0,(%edx)
+ addl $0x10,%eax
+ addl $0x10,%edx
+ subl $0x10,%ecx
+ jmp .big128cpy
+#endif
+.wrdcpy:
+ cmpl $0x4,%ecx
+ jl .bytecpy
+ movl (%eax),%ebx
+ movl %ebx,(%edx)
+ addl $0x4,%eax
+ addl $0x4,%edx
+ subl $0x4,%ecx
+ jmp .wrdcpy
+.bytecpy:
+ testl %ecx,%ecx
+ jz .done
+ movb (%eax),%bl
+ movb %bl,(%edx)
+ incl %eax
+ incl %edx
+ decl %ecx
+ jmp .bytecpy
+.done:
+ popl %ebx
+ ret
+#elif defined(__x86_64__)
+ /* rdi: Address of the current input element. */
+ /* rsi: Number of remaining elements. */
+ /* rdx: Address of the current output element. */
+ /* rcx: Current element. */
+ /* xmm0: Current element. */
+.big128cpy:
+ cmpq $0x10,%rsi
+ jl .wrdcpy
+ movups (%rdi),%xmm0
+ movups %xmm0,(%rdx)
+ addq $0x10,%rdi
+ addq $0x10,%rdx
+ subq $0x10,%rsi
jmp .big128cpy
.wrdcpy:
- cmpq $0x8,%r9
+ cmpq $0x8,%rsi
jl .bytecpy
- movq (%rcx),%r10
- movq %r10,(%r8)
- addq $0x8,%rcx
- addq $0x8,%r8
- subq $0x8,%r9
+ movq (%rdi),%rcx
+ movq %rcx,(%rdx)
+ addq $0x8,%rdi
+ addq $0x8,%rdx
+ subq $0x8,%rsi
jmp .wrdcpy
.bytecpy:
- testq %r9,%r9
+ testq %rsi,%rsi
jz .done
- movb (%rcx),%r10b
- movb %r10b,(%r8)
- incq %rcx
- incq %r8
- decq %r9
+ movb (%rdi),%cl
+ movb %cl,(%rdx)
+ incq %rdi
+ incq %rdx
+ decq %rsi
jmp .bytecpy
.done:
ret
diff --git a/rgo/src/memeq.S b/rgo/src/memeq.S
index c3a9a63..d106804 100644
--- a/rgo/src/memeq.S
+++ b/rgo/src/memeq.S
@@ -3,7 +3,7 @@
This file is part of rgo.
- rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ rgo is free software: you can reaxstribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
@@ -15,46 +15,86 @@
.global rgo_memeq
rgo_memeq:
-#if defined(__x86_64__)
/*
- rdi: void const * lptr
- rsi: size_t num
- rdx: void const * rptr
+ void const * lptr
+ size_t num
+ void const * rptr
*/
- /* rcx: Address of the current left element. */
- movq %rdi,%rcx
- /* r8: Address of the current right element. */
- movq %rdx,%r8
- /* r9: Number of remaining elements. */
- movq %rsi,%r9
- /* r10: Temporary. */
- /* r11: Temporary. */
+#if defined(__i386__)
+ /* eax: Address of the current left element. */
+ movl 0x4(%esp),%eax
+ /* ecx: Number of remaining elements. */
+ movl 0x8(%esp),%ecx
+ /* edx: Address of the current right element. */
+ movl 0xC(%esp),%edx
+ /* ebx: Current left element. */
+ pushl %ebx
+ /* ebx/esi: Current right element. */
+ pushl %esi
.wrdeq:
- cmpq $0x8,%r9
+ cmpl $0x4,%ecx
jl .byteeq
- movq (%rcx),%r10
- movq (%r8),%r11
- cmpq %r10,%r11
- jz .neq
- addq $0x8,%rcx
- addq $0x8,%r8
- subq $0x8,%r9
+ movl (%eax),%ebx
+ movl (%edx),%esi
+ cmpl %ebx,%esi
+ jne .neq
+ addl $0x4,%eax
+ addl $0x4,%edx
+ subl $0x4,%ecx
+ jmp .wrdeq
+.byteeq:
+ testl %ecx,%ecx
+ jne .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
+ movb (%eax),%bl
+ movb (%edx),%bh
+ cmpb %bl,%bh
+ jne .neq
+ incl %eax
+ incl %edx
+ decl %ecx
+ jmp .byteeq
+.eq:
+ popl %ebx
+ popl %esi
+ movb $0x1,%al
+ ret
+.neq:
+ popl %ebx
+ popl %esi
+ movb $0x0,%al
+ ret
+#elif defined(__x86_64__)
+ /* rdi: Address of the current left element. */
+ /* rsi: Number of remaining elements. */
+ /* rdx: Address of the current right element. */
+ /* rax: Current left element. */
+ /* rcx: Current right element. */
+.wrdeq:
+ cmpq $0x8,%rsi
+ jl .byteeq
+ movq (%rdi),%rax
+ movq (%rdx),%rcx
+ cmpq %rax,%rcx
+ jne .neq
+ addq $0x8,%rdi
+ addq $0x8,%rdx
+ subq $0x8,%rsi
jmp .wrdeq
.byteeq:
- testq %r9,%r9
- jz .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
- movb (%rcx),%r10b
- movb (%r8),%r11b
- cmpb %r10b,%r11b
+ testq %rsi,%rsi
+ jne .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
+ movb (%rdi),%al
+ movb (%rdx),%cl
+ cmpb %al,%cl
jne .neq
- incq %rcx
- incq %r8
- decq %r9
+ incq %rdi
+ incq %rdx
+ decq %rsi
jmp .byteeq
.eq:
- mov $0x1,%rax
+ movb $0x1,%al
ret
.neq:
- mov $0x0,%rax
+ movb $0x0,%al
ret
#endif
diff --git a/rgo/src/memfill.S b/rgo/src/memfill.S
index d131c48..c22547e 100644
--- a/rgo/src/memfill.S
+++ b/rgo/src/memfill.S
@@ -15,24 +15,39 @@
.global rgo_memfill
rgo_memfill:
-#if defined(__x86_64__)
/*
- rdi: void const * ptr
- rsi: size_t num
- dl: int_least8_t val
+ void const * ptr
+ size_t num
+ uint8_t val
*/
- /* We don't need to preserve any of the registers we use according to the ABI. */
- /* rcx: Address of the current element. */
+#if defined(__i386__)
+ /* eax: Address of the current element. */
+ movl 0x4(%esp),%eax
+ /* ecx: Address of the element after the last element. */
+ movl 0x4(%esp),%ecx
+ addl 0x8(%esp),%ecx
+ /* rdx: Byte value. */
+ movb 0xC(%esp),%dl
+.loop:
+ cmpl %eax,%ecx
+ je .done /* Exit loop if we have reached the final element. */
+ movb %dl,(%eax)
+ incl %eax
+ jmp .loop /* Continue to next element. */
+.done:
+ ret
+#elif defined(__x86_64__)
+ /* rax: Address of the current element. */
+ movq %rdi,%rax
+ /* rax: Address of the element after the last element. */
movq %rdi,%rcx
- /* rcx: Address of the element after the last element. */
- movq %rdi,%r8
- addq %rsi,%r8
+ addq %rsi,%rcx
.loop:
- cmpq %r8,%rcx
+ cmpq %rcx,%rax
je .done /* Exit loop if we have reached the final element. */
- movb %dl,(%rcx)
- incq %rcx /* Continue to next element. */
- jmp .loop
+ movb %dl,(%rax)
+ incq %rax
+ jmp .loop /* Continue to next element. */
.done:
ret
#endif
diff --git a/rgo/src/strcpy.S b/rgo/src/strcpy.S
index f2fbc36..8750295 100644
--- a/rgo/src/strcpy.S
+++ b/rgo/src/strcpy.S
@@ -10,28 +10,28 @@
You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>.
*/
-#include <rgo.h>
+#indlude <rgo.h>
.global rgo_strcpy
rgo_strcpy:
-#if defined(__x86_64__)
/*
- rdi: char const * lstr
- rsi: char const * rstr
+ char const * lstr
+ char const * rstr
*/
+#if defined(__x86_64__)
/* rax: Address of the current input character. */
movq %rdi,%rax
- /* rdx: Address of the current output character. */
- movq %rsi,%rdx
- /* cl: Current character. */
+ /* rsi: Address of the current output character. */
+ movq %rsi,%rsi
+ /* rdx: Current character. */
.loop:
- movb (%rax),%cl
- movb %cl,(%rdx)
- testb %cl,%cl
+ movb (%rax),%dl
+ movb %dl,(%rsi)
+ testb %dl,%dl
jz .done
incq %rax
- incq %rdx
+ incq %rsi
jmp .loop
.done:
subq %rdi,%rax
diff --git a/rgo/src/streq.S b/rgo/src/streq.S
index f530d54..8969e41 100644
--- a/rgo/src/streq.S
+++ b/rgo/src/streq.S
@@ -15,26 +15,26 @@
.global rgo_streq
rgo_streq:
-#if defined(__x86_64__)
/*
- rdi: char const * lstr
- rsi: char const * rstr
+ char const * lstr
+ char const * rstr
*/
- /* rdx: Address of the current input character. */
- movq %rdi,%rdx
- /* rcx: Address of the current output character. */
- movq %rsi,%rcx
- /* r8b: Current input character. */
- /* r9b: Current output character. */
+#if defined(__x86_64__)
+ /* rax: Address of the current input character. */
+ movq %rdi,%rax
+ /* rsi: Address of the current output character. */
+ movq %rsi,%rsi
+ /* rdx: Current input character. */
+ /* rcx: Current output character. */
.loop:
- movb (%rdx),%r8b
- movb (%rcx),%r9b
- cmpb %r8b,%r9b
+ movb (%rax),%dl
+ movb (%rsi),%cl
+ cmpb %dl,%cl
jne .neq
- testb %r8b,%r8b /* Check if we have reached the null-terminator. */
+ testb %dl,%dl /* Check if we have reached the null-terminator. */
jz .eq
- incq %rdx
- incq %rcx
+ incq %rax
+ incq %rsi
jmp .loop
.eq:
mov $0x1,%rax
diff --git a/rgo/src/strlen.S b/rgo/src/strlen.S
index 7508be9..d7ad03e 100644
--- a/rgo/src/strlen.S
+++ b/rgo/src/strlen.S
@@ -3,7 +3,7 @@
This file is part of rgo.
- rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either veraxon 3 of the License, or (at your option) any later veraxon.
rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
@@ -15,21 +15,20 @@
.global rgo_strlen
rgo_strlen:
-#if defined(__x86_64__)
/*
- rdi: char const * str
+ char const * str
*/
- /* rsi: Address of the current character. */
- movq %rdi,%rsi
- /* dl: Current character. */
+#if defined(__x86_64__)
+ /* rax: Address of the current character. */
+ movq %rdi,%rax
+ /* rdx: Current character. */
.loop:
- movb (%rsi),%dl
+ movb (%rax),%dl
testb %dl,%dl
jz .done /* Exit loop if we have reached the null-terminator. */
- incq %rsi /* Continue to the next character. */
+ incq %rax /* Continue to the next character. */
jmp .loop
.done:
- movq %rsi,%rax
subq %rdi,%rax
ret
#endif