summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.txt10
-rw-r--r--Makefile2
-rw-r--r--README.html15
-rw-r--r--rgo/Makefile2
-rw-r--r--rgo/include/rgo.h8
-rw-r--r--rgo/src/fndbyte.S21
-rw-r--r--rgo/src/fndchr.S8
-rw-r--r--rgo/src/memcpy.S101
-rw-r--r--rgo/src/memeq.S102
-rw-r--r--rgo/src/memfill.S41
-rw-r--r--rgo/src/strcpy.S22
-rw-r--r--rgo/src/streq.S30
-rw-r--r--rgo/src/strlen.S17
-rw-r--r--test.c8
14 files changed, 248 insertions, 139 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index face77e..656fdcd 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -1,3 +1,13 @@
+| 2
+
+- Fix target purge in makefile not being labeled phony;
+- Add machien architecture check in header;
+- Implement memcpy, memeq, memfill in IA-32;
+- Fix some incorrect comments;
+- Use a different register order for temporaries and optimise register usage;
+- Fix bug in memeq: Should jump if zero, not if equal;
+- Update readme;
+
| 1
- Fix indentation in license notices;
diff --git a/Makefile b/Makefile
index ba0209a..549b771 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: clean rgo
+.PHONY: clean purge rgo
rgo:
make -C rgo
diff --git a/README.html b/README.html
index 7c1a794..6a732ac 100644
--- a/README.html
+++ b/README.html
@@ -1,12 +1,12 @@
<!DOCTYPE html>
<html>
<h1>rgo</h1>
- <p>rgo (<b>R</b>untime-al<b>GO</b>rithmic, pronounced <i>are-go</i>) is a C/C++ library for runtime algorithmics on memory sequences.</p>
+ <p>rgo (<b>R</b>untime-al<b>GO</b>rithmic, pronounced as <i>are-go</i>) is a C/C++ library for runtime algorithmics on memory sequences.</p>
<p><i>Note: This library is still in it's early stages and is NOT anywhere near being fully optimised.</i></p>
<br />
<h2>Supported Platforms</h2>
<p>rgo is written (mostly) in assembly, and we therefore can't possibly support every platform in existence.</p>
- <p>Currently, it's only compatible with the UNIX System-V ABI. Systems using this ABI include FreeBSD, Linux, macOS, OpenBSD, and any other System-V derivative. Support for Windows NT will be reflected.</p>
+ <p>Currently, it's only compatible with the UNIX System-V ABI. Systems using this ABI include FreeBSD, Linux, macOS, OpenBSD, and any other System-V derivative. Support for Windows is being reflected for a future release.</p>
<br />
<p>rgo is written in GNU C and GNU assembly for the following machine architectures:</p>
<ul>
@@ -14,17 +14,18 @@
<p>AMD64 (x86-64), including (Planned) AVX;</p>
</li>
<li>
- <p><i>(Planned) Aarch64 (ARM64), including SVE;</i></p>
+ <p>IA-32 (i386), including SSE and (Planned) AVX;</p>
+ <p><i>Note: Support is currently limited to: memcpy, memeq, memfill.</i></p>
</li>
<li>
- <p><i>(Planned) IA-32 (i386), including SSE and AVX;</i></p>
+ <p><i>(Planned) Aarch64 (ARM64), including SVE;</i></p>
</li>
</ul>
<br />
<h2>Building</h2>
- <p>The provided makefile has been tested to work with GNU make and BSD make and should work on other make implementations.</p>
- <p>The target <i>rgo</i> builds the static library file (<i>rgo/librgo.a</i>). The target <i>clean</i> removes all object files, whilst <i>purge</i> removes all object files and the static library file.</p>
- <p>Instructions for building the test program may be found on the first line in <i>test.c</i>.</p>
+ <p>The provided makefile has been tested to work with GNU make and BSD make and should work with other make implementations.</p>
+ <p>The target <i>rgo</i> builds the static library file (located at <i>rgo/librgo.a</i>). The target <i>clean</i> removes all object files, whilst <i>purge</i> removes all object files and the static library file.</p>
+ <p>Instructions for building the test program may be found on the first line of <i>test.c</i>.</p>
<br />
<h2>Copyright and License</h2>
<p>Copyright 2022 Gabriel Jensen</p>
diff --git a/rgo/Makefile b/rgo/Makefile
index 76282be..02800e0 100644
--- a/rgo/Makefile
+++ b/rgo/Makefile
@@ -25,7 +25,7 @@ CFLAGS = \
-Iinclude \
-g
-.PHONY: clean
+.PHONY: clean purge
$(LIB): $(OBJS)
ar r $@ $^
diff --git a/rgo/include/rgo.h b/rgo/include/rgo.h
index 38c4672..276c8dc 100644
--- a/rgo/include/rgo.h
+++ b/rgo/include/rgo.h
@@ -10,11 +10,15 @@
You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>.
*/
+#if !defined(__i386__) && !defined(__x86_64__)
+#error Unsupported machine architecture! Support: AMD64, IA-32.
+#endif
+
#if !defined(rgo_ver)
#if defined(__ASSEMBLER__)
-#define rgo_ver $0x1
+#define rgo_ver $0x2
#else
-#define rgo_ver (0x1)
+#define rgo_ver (0x2)
#endif
#if defined(__ASSEMBLER__)
diff --git a/rgo/src/fndbyte.S b/rgo/src/fndbyte.S
index 2e0ed8b..c0e4382 100644
--- a/rgo/src/fndbyte.S
+++ b/rgo/src/fndbyte.S
@@ -15,23 +15,22 @@
.global rgo_fndbyte
rgo_fndbyte:
-#if defined(__x86_64__)
/*
- rdi: void const * ptr
- rsi: size_t num
- dl: uint8_t byte
+ void const * ptr
+ size_t num
+ uint8_t byte
*/
+#if defined(__x86_64__)
/* rax: Address of the current element. */
movq %rdi,%rax
- /* rcx: Address of the element after the last element. */
- movq %rdi,%rcx
- addq %rsi,%rcx
- /* r8b: Current element. */
+ /* rsi: Address of the element after the last element. */
+ addq %rdi,%rsi
+ /* rcx: Current element. */
.loop:
- cmpq %rax,%rcx
+ cmpq %rax,%rsi
je .nfnd /* We have went through the entire array without finding the byte. */
- movb (%rax),%r8b
- cmpb %r8b,%dl
+ movb (%rax),%cl
+ cmpb %cl,%dl
je .fnd /* We have found the byte. */
incq %rax
jmp .loop
diff --git a/rgo/src/fndchr.S b/rgo/src/fndchr.S
index cacea5e..1008e52 100644
--- a/rgo/src/fndchr.S
+++ b/rgo/src/fndchr.S
@@ -15,14 +15,14 @@
.global rgo_fndchr
rgo_fndchr:
-#if defined(__x86_64__)
/*
- rdi: char const * str
- sil: char chr
+ char const * str
+ char chr
*/
+#if defined(__x86_64__)
/* rax: Address of the current character. */
movq %rdi,%rax
- /* dl: Current character. */
+ /* rdx: Current character. */
.loop:
movb (%rax),%dl
cmpb %dl,%sil
diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S
index 51d82f9..475da57 100644
--- a/rgo/src/memcpy.S
+++ b/rgo/src/memcpy.S
@@ -3,7 +3,7 @@
This file is part of rgo.
- rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ rgo is free software: you can reaxstribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
@@ -15,46 +15,85 @@
.global rgo_memcpy
rgo_memcpy:
-#if defined(__x86_64__)
/*
- rdi: void const * in
- rsi: size_t num
- rdx: void * out
+ void const * in
+ size_t num
+ void * out
*/
- /* rcx: Address of the current input element. */
- movq %rdi,%rcx
- /* r8: Address of the current output element. */
- movq %rdx,%r8
- /* r9: Number of remaining elements. */
- movq %rsi,%r9
- /* r10: Temporary. */
- /* xmm0: Temporary. */
+#if defined(__i386__)
+ /* eax: Address of the current input element. */
+ movl 0x4(%esp),%eax
+ /* ecx: Number of remaining elements. */
+ movl 0x8(%esp),%ecx
+ /* edx: Address of the current output element. */
+ movl 0xC(%esp),%edx
+ /* ebx: Current element. */
+ pushl %ebx /* ebx must be restored. */
+ /* xmm0: Current element. */
+#if defined(__SSE__)
.big128cpy:
- cmpq $0x10,%r9
+ cmpl $0x10,%ecx
jl .wrdcpy
- movups (%rcx),%xmm0
- movups %xmm0,(%r8)
- addq $0x10,%rcx
- addq $0x10,%r8
- subq $0x10,%r9
+ movups (%eax),%xmm0
+ movups %xmm0,(%edx)
+ addl $0x10,%eax
+ addl $0x10,%edx
+ subl $0x10,%ecx
+ jmp .big128cpy
+#endif
+.wrdcpy:
+ cmpl $0x4,%ecx
+ jl .bytecpy
+ movl (%eax),%ebx
+ movl %ebx,(%edx)
+ addl $0x4,%eax
+ addl $0x4,%edx
+ subl $0x4,%ecx
+ jmp .wrdcpy
+.bytecpy:
+ testl %ecx,%ecx
+ jz .done
+ movb (%eax),%bl
+ movb %bl,(%edx)
+ incl %eax
+ incl %edx
+ decl %ecx
+ jmp .bytecpy
+.done:
+ popl %ebx
+ ret
+#elif defined(__x86_64__)
+ /* rdi: Address of the current input element. */
+ /* rsi: Number of remaining elements. */
+ /* rdx: Address of the current output element. */
+ /* rcx: Current element. */
+ /* xmm0: Current element. */
+.big128cpy:
+ cmpq $0x10,%rsi
+ jl .wrdcpy
+ movups (%rdi),%xmm0
+ movups %xmm0,(%rdx)
+ addq $0x10,%rdi
+ addq $0x10,%rdx
+ subq $0x10,%rsi
jmp .big128cpy
.wrdcpy:
- cmpq $0x8,%r9
+ cmpq $0x8,%rsi
jl .bytecpy
- movq (%rcx),%r10
- movq %r10,(%r8)
- addq $0x8,%rcx
- addq $0x8,%r8
- subq $0x8,%r9
+ movq (%rdi),%rcx
+ movq %rcx,(%rdx)
+ addq $0x8,%rdi
+ addq $0x8,%rdx
+ subq $0x8,%rsi
jmp .wrdcpy
.bytecpy:
- testq %r9,%r9
+ testq %rsi,%rsi
jz .done
- movb (%rcx),%r10b
- movb %r10b,(%r8)
- incq %rcx
- incq %r8
- decq %r9
+ movb (%rdi),%cl
+ movb %cl,(%rdx)
+ incq %rdi
+ incq %rdx
+ decq %rsi
jmp .bytecpy
.done:
ret
diff --git a/rgo/src/memeq.S b/rgo/src/memeq.S
index c3a9a63..d106804 100644
--- a/rgo/src/memeq.S
+++ b/rgo/src/memeq.S
@@ -3,7 +3,7 @@
This file is part of rgo.
- rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ rgo is free software: you can reaxstribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
@@ -15,46 +15,86 @@
.global rgo_memeq
rgo_memeq:
-#if defined(__x86_64__)
/*
- rdi: void const * lptr
- rsi: size_t num
- rdx: void const * rptr
+ void const * lptr
+ size_t num
+ void const * rptr
*/
- /* rcx: Address of the current left element. */
- movq %rdi,%rcx
- /* r8: Address of the current right element. */
- movq %rdx,%r8
- /* r9: Number of remaining elements. */
- movq %rsi,%r9
- /* r10: Temporary. */
- /* r11: Temporary. */
+#if defined(__i386__)
+ /* eax: Address of the current left element. */
+ movl 0x4(%esp),%eax
+ /* ecx: Number of remaining elements. */
+ movl 0x8(%esp),%ecx
+ /* edx: Address of the current right element. */
+ movl 0xC(%esp),%edx
+ /* ebx: Current left element. */
+ pushl %ebx
+ /* ebx/esi: Current right element. */
+ pushl %esi
.wrdeq:
- cmpq $0x8,%r9
+ cmpl $0x4,%ecx
jl .byteeq
- movq (%rcx),%r10
- movq (%r8),%r11
- cmpq %r10,%r11
- jz .neq
- addq $0x8,%rcx
- addq $0x8,%r8
- subq $0x8,%r9
+ movl (%eax),%ebx
+ movl (%edx),%esi
+ cmpl %ebx,%esi
+ jne .neq
+ addl $0x4,%eax
+ addl $0x4,%edx
+ subl $0x4,%ecx
+ jmp .wrdeq
+.byteeq:
+ testl %ecx,%ecx
+ jne .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
+ movb (%eax),%bl
+ movb (%edx),%bh
+ cmpb %bl,%bh
+ jne .neq
+ incl %eax
+ incl %edx
+ decl %ecx
+ jmp .byteeq
+.eq:
+ popl %ebx
+ popl %esi
+ movb $0x1,%al
+ ret
+.neq:
+ popl %ebx
+ popl %esi
+ movb $0x0,%al
+ ret
+#elif defined(__x86_64__)
+ /* rdi: Address of the current left element. */
+ /* rsi: Number of remaining elements. */
+ /* rdx: Address of the current right element. */
+ /* rax: Current left element. */
+ /* rcx: Current right element. */
+.wrdeq:
+ cmpq $0x8,%rsi
+ jl .byteeq
+ movq (%rdi),%rax
+ movq (%rdx),%rcx
+ cmpq %rax,%rcx
+ jne .neq
+ addq $0x8,%rdi
+ addq $0x8,%rdx
+ subq $0x8,%rsi
jmp .wrdeq
.byteeq:
- testq %r9,%r9
- jz .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
- movb (%rcx),%r10b
- movb (%r8),%r11b
- cmpb %r10b,%r11b
+ testq %rsi,%rsi
+ jne .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
+ movb (%rdi),%al
+ movb (%rdx),%cl
+ cmpb %al,%cl
jne .neq
- incq %rcx
- incq %r8
- decq %r9
+ incq %rdi
+ incq %rdx
+ decq %rsi
jmp .byteeq
.eq:
- mov $0x1,%rax
+ movb $0x1,%al
ret
.neq:
- mov $0x0,%rax
+ movb $0x0,%al
ret
#endif
diff --git a/rgo/src/memfill.S b/rgo/src/memfill.S
index d131c48..c22547e 100644
--- a/rgo/src/memfill.S
+++ b/rgo/src/memfill.S
@@ -15,24 +15,39 @@
.global rgo_memfill
rgo_memfill:
-#if defined(__x86_64__)
/*
- rdi: void const * ptr
- rsi: size_t num
- dl: int_least8_t val
+ void const * ptr
+ size_t num
+ uint8_t val
*/
- /* We don't need to preserve any of the registers we use according to the ABI. */
- /* rcx: Address of the current element. */
+#if defined(__i386__)
+ /* eax: Address of the current element. */
+ movl 0x4(%esp),%eax
+ /* ecx: Address of the element after the last element. */
+ movl 0x4(%esp),%ecx
+ addl 0x8(%esp),%ecx
+ /* rdx: Byte value. */
+ movb 0xC(%esp),%dl
+.loop:
+ cmpl %eax,%ecx
+ je .done /* Exit loop if we have reached the final element. */
+ movb %dl,(%eax)
+ incl %eax
+ jmp .loop /* Continue to next element. */
+.done:
+ ret
+#elif defined(__x86_64__)
+ /* rax: Address of the current element. */
+ movq %rdi,%rax
+ /* rax: Address of the element after the last element. */
movq %rdi,%rcx
- /* rcx: Address of the element after the last element. */
- movq %rdi,%r8
- addq %rsi,%r8
+ addq %rsi,%rcx
.loop:
- cmpq %r8,%rcx
+ cmpq %rcx,%rax
je .done /* Exit loop if we have reached the final element. */
- movb %dl,(%rcx)
- incq %rcx /* Continue to next element. */
- jmp .loop
+ movb %dl,(%rax)
+ incq %rax
+ jmp .loop /* Continue to next element. */
.done:
ret
#endif
diff --git a/rgo/src/strcpy.S b/rgo/src/strcpy.S
index f2fbc36..8750295 100644
--- a/rgo/src/strcpy.S
+++ b/rgo/src/strcpy.S
@@ -10,28 +10,28 @@
You should have received a copy of the GNU Lesser General Public License along with rgo. If not, see <https://www.gnu.org/licenses/>.
*/
-#include <rgo.h>
+#indlude <rgo.h>
.global rgo_strcpy
rgo_strcpy:
-#if defined(__x86_64__)
/*
- rdi: char const * lstr
- rsi: char const * rstr
+ char const * lstr
+ char const * rstr
*/
+#if defined(__x86_64__)
/* rax: Address of the current input character. */
movq %rdi,%rax
- /* rdx: Address of the current output character. */
- movq %rsi,%rdx
- /* cl: Current character. */
+ /* rsi: Address of the current output character. */
+ movq %rsi,%rsi
+ /* rdx: Current character. */
.loop:
- movb (%rax),%cl
- movb %cl,(%rdx)
- testb %cl,%cl
+ movb (%rax),%dl
+ movb %dl,(%rsi)
+ testb %dl,%dl
jz .done
incq %rax
- incq %rdx
+ incq %rsi
jmp .loop
.done:
subq %rdi,%rax
diff --git a/rgo/src/streq.S b/rgo/src/streq.S
index f530d54..8969e41 100644
--- a/rgo/src/streq.S
+++ b/rgo/src/streq.S
@@ -15,26 +15,26 @@
.global rgo_streq
rgo_streq:
-#if defined(__x86_64__)
/*
- rdi: char const * lstr
- rsi: char const * rstr
+ char const * lstr
+ char const * rstr
*/
- /* rdx: Address of the current input character. */
- movq %rdi,%rdx
- /* rcx: Address of the current output character. */
- movq %rsi,%rcx
- /* r8b: Current input character. */
- /* r9b: Current output character. */
+#if defined(__x86_64__)
+ /* rax: Address of the current input character. */
+ movq %rdi,%rax
+ /* rsi: Address of the current output character. */
+ movq %rsi,%rsi
+ /* rdx: Current input character. */
+ /* rcx: Current output character. */
.loop:
- movb (%rdx),%r8b
- movb (%rcx),%r9b
- cmpb %r8b,%r9b
+ movb (%rax),%dl
+ movb (%rsi),%cl
+ cmpb %dl,%cl
jne .neq
- testb %r8b,%r8b /* Check if we have reached the null-terminator. */
+ testb %dl,%dl /* Check if we have reached the null-terminator. */
jz .eq
- incq %rdx
- incq %rcx
+ incq %rax
+ incq %rsi
jmp .loop
.eq:
mov $0x1,%rax
diff --git a/rgo/src/strlen.S b/rgo/src/strlen.S
index 7508be9..d7ad03e 100644
--- a/rgo/src/strlen.S
+++ b/rgo/src/strlen.S
@@ -3,7 +3,7 @@
This file is part of rgo.
- rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ rgo is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either veraxon 3 of the License, or (at your option) any later veraxon.
rgo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
@@ -15,21 +15,20 @@
.global rgo_strlen
rgo_strlen:
-#if defined(__x86_64__)
/*
- rdi: char const * str
+ char const * str
*/
- /* rsi: Address of the current character. */
- movq %rdi,%rsi
- /* dl: Current character. */
+#if defined(__x86_64__)
+ /* rax: Address of the current character. */
+ movq %rdi,%rax
+ /* rdx: Current character. */
.loop:
- movb (%rsi),%dl
+ movb (%rax),%dl
testb %dl,%dl
jz .done /* Exit loop if we have reached the null-terminator. */
- incq %rsi /* Continue to the next character. */
+ incq %rax /* Continue to the next character. */
jmp .loop
.done:
- movq %rsi,%rax
subq %rdi,%rax
ret
#endif
diff --git a/test.c b/test.c
index 8a3126e..8892dea 100644
--- a/test.c
+++ b/test.c
@@ -59,12 +59,13 @@ int main(void) {
assert(arr1[(size_t)0x4] == arr0[(size_t)0x4]);
assert(arr1[(size_t)0x5] == arr0[(size_t)0x5]);
assert(arr1[(size_t)0x6] == arr0[(size_t)0x6]);
- uint8_t const cmp = rgo_memeq(arr1,arrsz,arr0);
- fprintf(stderr,"cmp: %u\n",cmp);
- assert(cmp);
+ uint8_t const eq = rgo_memeq(arr1,arrsz,arr0);
+ fprintf(stderr,"eq: %u\n",eq);
+ assert(eq);
#undef arrsz
}
fprintf(stderr,"\n");
+#if defined(__x86_64__)
{
char const * str0 = "Hello there! General Kenobi?";
fprintf(stderr,"str0: \"%s\"\n",str0);
@@ -125,5 +126,6 @@ int main(void) {
assert(rgo_streq(str0,str1));
}
fprintf(stderr,"\n");
+#endif
printf("All tests have passed!\n");
}