summaryrefslogtreecommitdiff
path: root/rgo
diff options
context:
space:
mode:
Diffstat (limited to 'rgo')
-rw-r--r--rgo/Makefile23
-rw-r--r--rgo/include/rgo.h8
-rw-r--r--rgo/src/fndbyte.S28
-rw-r--r--rgo/src/fndchr.S30
-rw-r--r--rgo/src/memcpy.S38
-rw-r--r--rgo/src/memfill.S14
-rw-r--r--rgo/src/strlen.S17
7 files changed, 126 insertions, 32 deletions
diff --git a/rgo/Makefile b/rgo/Makefile
index 02800e0..4118fe4 100644
--- a/rgo/Makefile
+++ b/rgo/Makefile
@@ -1,29 +1,30 @@
-SRCS_ASM = \
+SRCS = \
src/fndbyte.S \
src/fndchr.S \
src/memcpy.S \
+ src/memdup.c \
src/memeq.S \
src/memfill.S \
+ src/strdup.c \
src/streq.S \
+ src/strfill.c \
src/strcpy.S \
src/strlen.S
-SRCS_C = \
- src/memdup.c \
- src/strdup.c \
- src/strfill.c
-OBJS_ASM := $(SRCS_ASM:.S=.o)
-OBJS_C := $(SRCS_C:.c=.o)
-OBJS := $(OBJS_ASM) $(OBJS_C)
-LIB := librgo.a
+OBJS := $(SRCS:.S=.o)
+OBJS := $(OBJS:.c=.o)
+LIB := librgo.a
ASFLAGS = \
-Iinclude \
- -g
+ -g \
+ -march=native
CFLAGS = \
-Iinclude \
- -g
+ -O3 \
+ -g \
+ -march=native
.PHONY: clean purge
diff --git a/rgo/include/rgo.h b/rgo/include/rgo.h
index 276c8dc..f575436 100644
--- a/rgo/include/rgo.h
+++ b/rgo/include/rgo.h
@@ -10,15 +10,15 @@
You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>.
*/
-#if !defined(__i386__) && !defined(__x86_64__)
-#error Unsupported machine architecture! Support: AMD64, IA-32.
+#if !defined(__x86_64__) && !defined(__i386__)
+#error Unsupported machine architecture! Supported: AMD64, IA-32;
#endif
#if !defined(rgo_ver)
#if defined(__ASSEMBLER__)
-#define rgo_ver $0x2
+#define rgo_ver $0x3
#else
-#define rgo_ver (0x2)
+#define rgo_ver (0x3)
#endif
#if defined(__ASSEMBLER__)
diff --git a/rgo/src/fndbyte.S b/rgo/src/fndbyte.S
index c0e4382..4d1e482 100644
--- a/rgo/src/fndbyte.S
+++ b/rgo/src/fndbyte.S
@@ -20,7 +20,33 @@ rgo_fndbyte:
size_t num
uint8_t byte
*/
-#if defined(__x86_64__)
+#if defined(__i386__)
+ /* eax: Address of the current element. */
+ movl 0x4(%esp),%eax
+ /* ecx: Address of the element after the last element. */
+ movl 0x8(%esp),%ecx
+ addl %eax,%ecx
+ /* edx: Byte value. */
+ movb 0xC(%esp),%dl
+ /* ebx: Current element. */
+ pushl %ebx
+.loop:
+ cmpl %eax,%ecx
+ je .nfnd /* We have went through the entire array without finding the byte. */
+ movb (%eax),%bl
+ cmpb %bl,%dl
+ je .fnd /* We have found the byte. */
+ incl %eax
+ jmp .loop
+.fnd:
+ popl %ebx
+ subl 0x4(%esp),%eax
+ ret
+.nfnd:
+ popl %ebx
+ movl $0xFFFFFFFF,%eax
+ ret
+#elif defined(__x86_64__)
/* rax: Address of the current element. */
movq %rdi,%rax
/* rsi: Address of the element after the last element. */
diff --git a/rgo/src/fndchr.S b/rgo/src/fndchr.S
index 1008e52..f12f4c5 100644
--- a/rgo/src/fndchr.S
+++ b/rgo/src/fndchr.S
@@ -19,22 +19,42 @@ rgo_fndchr:
char const * str
char chr
*/
-#if defined(__x86_64__)
+#if defined(__i386__)
+ /* eax: Address of the current character. */
+ movl 0x4(%esp),%eax
+ /* ecx: Character. */
+ movb 0x8(%esp),%cl
+ /* edx: Current character. */
+.loop:
+ movb (%eax),%dl
+ cmpb %dl,%cl
+ je .fnd /* Exit loop if we have found the character. */
+ testb %dl,%dl
+ je .nfnd /* We encountered the null-terminator but not the specified character. */
+ incl %eax
+ jmp .loop
+.fnd:
+ subl 0x4(%esp),%eax
+ ret
+.nfnd:
+ movl $0xFFFFFFFF,%eax
+ ret
+#elif defined(__x86_64__)
/* rax: Address of the current character. */
movq %rdi,%rax
/* rdx: Current character. */
.loop:
movb (%rax),%dl
cmpb %dl,%sil
- je .done /* Exit loop if we have found the character. */
+ je .fnd /* Exit loop if we have found the character. */
testb %dl,%dl
- je .err /* We encountered the null-terminator but not the specified character. */
+ je .nfnd /* We encountered the null-terminator but not the specified character. */
incq %rax
jmp .loop
-.done:
+.fnd:
subq %rdi,%rax
ret
-.err:
+.nfnd:
movq $0xFFFFFFFFFFFFFFFF,%rax
ret
#endif
diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S
index 475da57..820781d 100644
--- a/rgo/src/memcpy.S
+++ b/rgo/src/memcpy.S
@@ -30,12 +30,33 @@ rgo_memcpy:
/* ebx: Current element. */
pushl %ebx /* ebx must be restored. */
/* xmm0: Current element. */
+ /* ymm0: Current element. */
+#if defined(__AVX__)
+.big256cpy:
+ cmpl $0x20,%ecx
+#if defined(__SSE__)
+ jl .big128cpy
+#else
+ jl .wrdcpy
+#endif
+ vmovdqu (%eax),%ymm0
+ vmovdqu %ymm0,(%edx)
+ addl $0x20,%eax
+ addl $0x20,%edx
+ subl $0x20,%ecx
+ jmp .big256cpy
+#endif
#if defined(__SSE__)
.big128cpy:
cmpl $0x10,%ecx
jl .wrdcpy
+#if defined(__SSE2__)
+ movdqu (%eax),%xmm0
+ movdqu %xmm0,(%edx)
+#else
movups (%eax),%xmm0
movups %xmm0,(%edx)
+#endif
addl $0x10,%eax
addl $0x10,%edx
subl $0x10,%ecx
@@ -68,11 +89,24 @@ rgo_memcpy:
/* rdx: Address of the current output element. */
/* rcx: Current element. */
/* xmm0: Current element. */
+ /* ymm0: Current element. */
+#if defined(__AVX__)
+.big256cpy:
+ cmpq $0x20,%rsi
+ jl .big128cpy
+ vmovups (%rdi),%ymm0
+ vmovups %ymm0,(%rdx)
+ addq $0x20,%rdi
+ addq $0x20,%rdx
+ subq $0x20,%rsi
+ jmp .big256cpy
+#endif
.big128cpy:
+ ret
cmpq $0x10,%rsi
jl .wrdcpy
- movups (%rdi),%xmm0
- movups %xmm0,(%rdx)
+ movdqu (%rdi),%xmm0
+ movdqu %xmm0,(%rdx)
addq $0x10,%rdi
addq $0x10,%rdx
subq $0x10,%rsi
diff --git a/rgo/src/memfill.S b/rgo/src/memfill.S
index c22547e..7dc00c3 100644
--- a/rgo/src/memfill.S
+++ b/rgo/src/memfill.S
@@ -37,16 +37,14 @@ rgo_memfill:
.done:
ret
#elif defined(__x86_64__)
- /* rax: Address of the current element. */
- movq %rdi,%rax
- /* rax: Address of the element after the last element. */
- movq %rdi,%rcx
- addq %rsi,%rcx
+ /* rdi: Address of the current element. */
+ /* rsi: Address of the element after the last element. */
+ addq %rdi,%rsi
.loop:
- cmpq %rcx,%rax
+ cmpq %rsi,%rdi
je .done /* Exit loop if we have reached the final element. */
- movb %dl,(%rax)
- incq %rax
+ movb %dl,(%rdi)
+ incq %rdi
jmp .loop /* Continue to next element. */
.done:
ret
diff --git a/rgo/src/strlen.S b/rgo/src/strlen.S
index d7ad03e..37b5780 100644
--- a/rgo/src/strlen.S
+++ b/rgo/src/strlen.S
@@ -18,7 +18,22 @@ rgo_strlen:
/*
char const * str
*/
-#if defined(__x86_64__)
+#if defined(__i386__)
+ /* eax: Address of the current character. */
+ movl 0x4(%esp),%eax
+ /* ecx: Address of the first character. */
+ movl %eax,%ecx
+ /* edx: Current character. */
+.loop:
+ movb (%eax),%dl
+ testb %dl,%dl
+ jz .done /* Exit loop if we have reached the null-terminator. */
+ incl %eax /* Continue to the next character. */
+ jmp .loop
+.done:
+ subl %ecx,%eax
+ ret
+#elif defined(__x86_64__)
/* rax: Address of the current character. */
movq %rdi,%rax
/* rdx: Current character. */