summaryrefslogtreecommitdiff
path: root/rgo/src
diff options
context:
space:
mode:
Diffstat (limited to 'rgo/src')
-rw-r--r--rgo/src/fastimpl.c15
-rw-r--r--rgo/src/fndbyte.S63
-rw-r--r--rgo/src/fndbyte.c74
-rw-r--r--rgo/src/fndchr.S54
-rw-r--r--rgo/src/fndchr.c68
-rw-r--r--rgo/src/memcpy.S127
-rw-r--r--rgo/src/memcpy.c138
-rw-r--r--rgo/src/memdup.c4
-rw-r--r--rgo/src/memeq.S94
-rw-r--r--rgo/src/memeq.c106
-rw-r--r--rgo/src/memfill.S45
-rw-r--r--rgo/src/memfill.c55
-rw-r--r--rgo/src/strcpy.S33
-rw-r--r--rgo/src/strcpy.c67
-rw-r--r--rgo/src/strdup.c4
-rw-r--r--rgo/src/streq.S39
-rw-r--r--rgo/src/streq.c78
-rw-r--r--rgo/src/strfill.c4
-rw-r--r--rgo/src/strlen.S43
-rw-r--r--rgo/src/strlen.c54
20 files changed, 661 insertions, 504 deletions
diff --git a/rgo/src/fastimpl.c b/rgo/src/fastimpl.c
new file mode 100644
index 0000000..c584119
--- /dev/null
+++ b/rgo/src/fastimpl.c
@@ -0,0 +1,15 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <rgo-priv.h>
+
+sus_typ_u8 rgo_fastimpl(void) {
+#if defined(rgo_priv_fastimpl)
+ return sus_typlit_u8(0x1);
+#else
+ return sus_typlit_u8(0x0);
+#endif
+}
diff --git a/rgo/src/fndbyte.S b/rgo/src/fndbyte.S
deleted file mode 100644
index 0d171cd..0000000
--- a/rgo/src/fndbyte.S
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <rgo.h>
-
-.global rgo_fndbyte
-
-rgo_fndbyte:
- /*
- void const * ptr
- size_t num
- uint8_t byte
- */
-#if defined(__i386__)
- /* eax: Address of the current element. */
- movl 0x4(%esp),%eax
- /* ecx: Address of the element after the last element. */
- movl 0x8(%esp),%ecx
- addl %eax,%ecx
- /* edx: Byte value. */
- movb 0xC(%esp),%dl
- /* ebx: Current element. */
- pushl %ebx
-.loop:
- cmpl %eax,%ecx
- je .nfnd /* We have went through the entire array without finding the byte. */
- movb (%eax),%bl
- cmpb %bl,%dl
- je .fnd /* We have found the byte. */
- incl %eax
- jmp .loop
-.fnd:
- popl %ebx
- subl 0x4(%esp),%eax
- ret
-.nfnd:
- popl %ebx
- movl $0xFFFFFFFF,%eax
- ret
-#elif defined(__x86_64__)
- /* rax: Address of the current element. */
- movq %rdi,%rax
- /* rsi: Address of the element after the last element. */
- addq %rdi,%rsi
- /* rcx: Current element. */
-.loop:
- cmpq %rax,%rsi
- je .nfnd /* We have went through the entire array without finding the byte. */
- movb (%rax),%cl
- cmpb %cl,%dl
- je .fnd /* We have found the byte. */
- incq %rax
- jmp .loop
-.fnd:
- subq %rdi,%rax
- ret
-.nfnd:
- movq $0xFFFFFFFFFFFFFFFF,%rax
- ret
-#endif
diff --git a/rgo/src/fndbyte.c b/rgo/src/fndbyte.c
new file mode 100644
index 0000000..29945a0
--- /dev/null
+++ b/rgo/src/fndbyte.c
@@ -0,0 +1,74 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <rgo-priv.h>
+
+#if defined(rgo_priv_fastimpl)
+__asm__ (
+ ".global rgo_fndbyte\n"
+
+ "rgo_fndbyte:\n"
+ /*
+ void const * ptr
+ sus_typ_usz num
+ sus_typ_u8 byte
+ */
+#if defined(sus_arch_amd64)
+ /* rax: Address of the current element. */
+ "movq %rdi,%rax\n"
+ /* rsi: Address of the element after the last element. */
+ "addq %rdi,%rsi\n"
+ /* rcx: Current element. */
+ ".loop:\n"
+ "cmpq %rax,%rsi\n"
+ "je .nfnd\n" /* We have went through the entire array without finding the byte. */
+ "movb (%rax),%cl\n"
+ "cmpb %cl,%dl\n"
+ "je .fnd\n" /* We have found the byte. */
+ "incq %rax\n"
+ "jmp .loop\n"
+ ".fnd:\n"
+ "subq %rdi,%rax\n"
+ "ret\n"
+ ".nfnd:\n"
+ "movq $0xFFFFFFFFFFFFFFFF,%rax\n"
+ "ret\n"
+#elif defined(sus_arch_ia32)
+ /* eax: Address of the current element. */
+ "movl 0x4(%esp),%eax\n"
+ /* ecx: Address of the element after the last element. */
+ "movl 0x8(%esp),%ecx\n"
+ "addl %eax,%ecx\n"
+ /* edx: Byte value. */
+ "movb 0xC(%esp),%dl\n"
+ /* ebx: Current element. */
+ "pushl %ebx\n"
+ ".loop:\n"
+ "cmpl %eax,%ecx\n"
+ "je .nfnd\n" /* We have went through the entire array without finding the byte. */
+ "movb (%eax),%bl\n"
+ "cmpb %bl,%dl\n"
+ "je .fnd\n" /* We have found the byte. */
+ "incl %eax\n"
+ "jmp .loop\n"
+ ".fnd:\n"
+ "popl %ebx\n"
+ "subl 0x4(%esp),%eax\n"
+ "ret\n"
+ ".nfnd:\n"
+ "popl %ebx\n"
+ "movl $0xFFFFFFFF,%eax\n"
+ "ret\n"
+#endif
+);
+#else
+sus_typ_usz rgo_fndbyte(void const * const sus_restr _ptr,sus_typ_usz const _num,sus_typ_u8 const _byte) {
+ sus_typ_u8 const * ptr = (sus_typ_u8 const *)_ptr;
+ sus_typ_u8 const * const afterbuf = ptr + _num;
+ for (;ptr != afterbuf;++ptr) {sus_unlikely (*ptr == _byte) {return ptr - (sus_typ_u8 const *)_ptr;}}
+ return sus_typlit_usz(-0x1);
+}
+#endif
diff --git a/rgo/src/fndchr.S b/rgo/src/fndchr.S
deleted file mode 100644
index 608916b..0000000
--- a/rgo/src/fndchr.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <rgo.h>
-
-.global rgo_fndchr
-
-rgo_fndchr:
- /*
- char const * str
- char chr
- */
-#if defined(__i386__)
- /* eax: Address of the current character. */
- movl 0x4(%esp),%eax
- /* ecx: Character. */
- movb 0x8(%esp),%cl
- /* edx: Current character. */
-.loop:
- movb (%eax),%dl
- cmpb %dl,%cl
- je .fnd /* Exit loop if we have found the character. */
- testb %dl,%dl
- je .nfnd /* We encountered the null-terminator but not the specified character. */
- incl %eax
- jmp .loop
-.fnd:
- subl 0x4(%esp),%eax
- ret
-.nfnd:
- movl $0xFFFFFFFF,%eax
- ret
-#elif defined(__x86_64__)
- /* rax: Address of the current character. */
- movq %rdi,%rax
- /* rdx: Current character. */
-.loop:
- movb (%rax),%dl
- cmpb %dl,%sil
- je .fnd /* Exit loop if we have found the character. */
- testb %dl,%dl
- je .nfnd /* We encountered the null-terminator but not the specified character. */
- incq %rax
- jmp .loop
-.fnd:
- subq %rdi,%rax
- ret
-.nfnd:
- movq $0xFFFFFFFFFFFFFFFF,%rax
- ret
-#endif
diff --git a/rgo/src/fndchr.c b/rgo/src/fndchr.c
new file mode 100644
index 0000000..12cd006
--- /dev/null
+++ b/rgo/src/fndchr.c
@@ -0,0 +1,68 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <rgo-priv.h>
+
+#if defined(rgo_priv_fastimpl)
+__asm__ (
+ ".global rgo_fndchr\n"
+
+ "rgo_fndchr:\n"
+ /*
+ char const * str
+ char chr
+ */
+#if defined(sus_arch_amd64)
+ /* rax: Address of the current character. */
+ "movq %rdi,%rax\n"
+ /* rdx: Current character. */
+ ".loop:\n"
+ "movb (%rax),%dl\n"
+ "cmpb %dl,%sil\n"
+ "je .fnd\n" /* Exit loop if we have found the character. */
+ "testb %dl,%dl\n"
+ "je .nfnd\n" /* We encountered the null-terminator but not the specified character. */
+ "incq %rax\n"
+ "jmp .loop\n"
+ ".fnd:\n"
+ "subq %rdi,%rax\n"
+ "ret\n"
+ ".nfnd:\n"
+ "movq $0xFFFFFFFFFFFFFFFF,%rax\n"
+ "ret\n"
+#elif defined(sus_arch_ia32)
+ /* eax: Address of the current character. */
+ "movl 0x4(%esp),%eax\n"
+ /* ecx: Character. */
+ "movb 0x8(%esp),%cl\n"
+ /* edx: Current character. */
+ ".loop:\n"
+ "movb (%eax),%dl\n"
+ "cmpb %dl,%cl\n"
+ "je .fnd\n" /* Exit loop if we have found the character. */
+ "testb %dl,%dl\n"
+ "je .nfnd\n" /* We encountered the null-terminator but not the specified character. */
+ "incl %eax\n"
+ "jmp .loop\n"
+ ".fnd:\n"
+ "subl 0x4(%esp),%eax\n"
+ "ret\n"
+ ".nfnd:\n"
+ "movl $0xFFFFFFFF,%eax\n"
+ "ret\n"
+#endif
+);
+#else
+sus_typ_usz rgo_fndchr(char const * const sus_restr _str,char const _chr) {
+ char const * sus_restr pos = _str;
+ for (;;++pos) {
+ char const chr = *pos;
+ sus_unlikely (chr == _chr) {return (sus_typ_usz)(pos - _str);}
+ sus_unlikely (chr == '\x0') {return sus_typlit_usz(-0x1);}
+ }
+ sus_unreach();
+}
+#endif
diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S
deleted file mode 100644
index fefa614..0000000
--- a/rgo/src/memcpy.S
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <rgo.h>
-
-.global rgo_memcpy
-
-rgo_memcpy:
- /*
- void const * in
- size_t num
- void * out
- */
-#if defined(__i386__)
- /* eax: Address of the current input element. */
- movl 0x4(%esp),%eax
- /* ecx: Number of remaining elements. */
- movl 0x8(%esp),%ecx
- /* edx: Address of the current output element. */
- movl 0xC(%esp),%edx
- /* ebx: Current element. */
- pushl %ebx /* ebx must be restored. */
- /* xmm0: Current element. */
- /* ymm0: Current element. */
-#if defined(__AVX__)
-.big256cpy:
- cmpl $0x20,%ecx
-#if defined(__SSE__)
- jl .big128cpy
-#else
- jl .wrdcpy
-#endif
- vmovdqu (%eax),%ymm0
- vmovdqu %ymm0,(%edx)
- addl $0x20,%eax
- addl $0x20,%edx
- subl $0x20,%ecx
- jmp .big256cpy
-#endif
-#if defined(__SSE__)
-.big128cpy:
- cmpl $0x10,%ecx
- jl .wrdcpy
-#if defined(__SSE2__)
- movdqu (%eax),%xmm0
- movdqu %xmm0,(%edx)
-#else
- movups (%eax),%xmm0
- movups %xmm0,(%edx)
-#endif
- addl $0x10,%eax
- addl $0x10,%edx
- subl $0x10,%ecx
- jmp .big128cpy
-#endif
-.wrdcpy:
- cmpl $0x4,%ecx
- jl .bytecpy
- movl (%eax),%ebx
- movl %ebx,(%edx)
- addl $0x4,%eax
- addl $0x4,%edx
- subl $0x4,%ecx
- jmp .wrdcpy
-.bytecpy:
- testl %ecx,%ecx
- jz .done
- movb (%eax),%bl
- movb %bl,(%edx)
- incl %eax
- incl %edx
- decl %ecx
- jmp .bytecpy
-.done:
- popl %ebx
- ret
-#elif defined(__x86_64__)
- /* rdi: Address of the current input element. */
- /* rsi: Number of remaining elements. */
- /* rdx: Address of the current output element. */
- /* rcx: Current element. */
- /* xmm0: Current element. */
- /* ymm0: Current element. */
-#if defined(__AVX__)
-.big256cpy:
- cmpq $0x20,%rsi
- jl .big128cpy
- vmovups (%rdi),%ymm0
- vmovups %ymm0,(%rdx)
- addq $0x20,%rdi
- addq $0x20,%rdx
- subq $0x20,%rsi
- jmp .big256cpy
-#endif
-.big128cpy:
- cmpq $0x10,%rsi
- jl .wrdcpy
- movdqu (%rdi),%xmm0
- movdqu %xmm0,(%rdx)
- addq $0x10,%rdi
- addq $0x10,%rdx
- subq $0x10,%rsi
- jmp .big128cpy
-.wrdcpy:
- cmpq $0x8,%rsi
- jl .bytecpy
- movq (%rdi),%rcx
- movq %rcx,(%rdx)
- addq $0x8,%rdi
- addq $0x8,%rdx
- subq $0x8,%rsi
- jmp .wrdcpy
-.bytecpy:
- testq %rsi,%rsi
- jz .done
- movb (%rdi),%cl
- movb %cl,(%rdx)
- incq %rdi
- incq %rdx
- decq %rsi
- jmp .bytecpy
-.done:
- ret
-#endif
diff --git a/rgo/src/memcpy.c b/rgo/src/memcpy.c
new file mode 100644
index 0000000..89adc14
--- /dev/null
+++ b/rgo/src/memcpy.c
@@ -0,0 +1,138 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <rgo-priv.h>
+
+#if defined(rgo_priv_fastimpl)
+__asm__ (
+ ".global rgo_memcpy\n"
+
+ "rgo_memcpy:\n"
+ /*
+ void const * in
+ sus_typ_usz num
+ void * out
+ */
+#if defined(sus_arch_amd64)
+ /* rdi: Address of the current input element. */
+ /* rsi: Number of remaining elements. */
+ /* rdx: Address of the current output element. */
+ /* rcx: Current element. */
+ /* xmm0: Current element. */
+ /* ymm0: Current element. */
+#if defined(sus_archfeat_avx)
+ ".big256cpy:\n"
+ "cmpq $0x20,%rsi\n"
+ "jl .big128cpy\n"
+ "vmovups (%rdi),%ymm0\n"
+ "vmovups %ymm0,(%rdx)\n"
+ "addq $0x20,%rdi\n"
+ "addq $0x20,%rdx\n"
+ "subq $0x20,%rsi\n"
+ "jmp .big256cpy\n"
+#endif
+ ".big128cpy:\n"
+ "cmpq $0x10,%rsi\n"
+ "jl .wrdcpy\n"
+ "movdqu (%rdi),%xmm0\n"
+ "movdqu %xmm0,(%rdx)\n"
+ "addq $0x10,%rdi\n"
+ "addq $0x10,%rdx\n"
+ "subq $0x10,%rsi\n"
+ "jmp .big128cpy\n"
+ ".wrdcpy:\n"
+ "cmpq $0x8,%rsi\n"
+ "jl .bytecpy\n"
+ "movq (%rdi),%rcx\n"
+ "movq %rcx,(%rdx)\n"
+ "addq $0x8,%rdi\n"
+ "addq $0x8,%rdx\n"
+ "subq $0x8,%rsi\n"
+ "jmp .wrdcpy\n"
+ ".bytecpy:\n"
+ "testq %rsi,%rsi\n"
+ "jz .done\n"
+ "movb (%rdi),%cl\n"
+ "movb %cl,(%rdx)\n"
+ "incq %rdi\n"
+ "incq %rdx\n"
+ "decq %rsi\n"
+ "jmp .bytecpy\n"
+ ".done:\n"
+ "ret\n"
+#elif defined(sus_arch_ia32)
+ /* eax: Address of the current input element. */
+ "movl 0x4(%esp),%eax\n"
+ /* ecx: Number of remaining elements. */
+ "movl 0x8(%esp),%ecx\n"
+ /* edx: Address of the current output element. */
+ "movl 0xC(%esp),%edx\n"
+ /* ebx: Current element. */
+ "pushl %ebx\n" /* ebx must be restored. */
+ /* xmm0: Current element. */
+ /* ymm0: Current element. */
+#if defined(sus_archfeat_avx)
+ ".big256cpy:\n"
+ "cmpl $0x20,%ecx\n"
+#if defined(sus_archfeat_sse)
+ "jl .big128cpy\n"
+#else
+ "jl .wrdcpy\n"
+#endif
+ "vmovdqu (%eax),%ymm0\n"
+ "vmovdqu %ymm0,(%edx)\n"
+ "addl $0x20,%eax\n"
+ "addl $0x20,%edx\n"
+ "subl $0x20,%ecx\n"
+ "jmp .big256cpy\n"
+#endif
+#if defined(sus_archfeat_sse)
+ ".big128cpy:\n"
+ "cmpl $0x10,%ecx\n"
+ "jl .wrdcpy\n"
+#if defined(sus_archfeat_sse2)
+ "movdqu (%eax),%xmm0\n"
+ "movdqu %xmm0,(%edx)\n"
+#else
+ "movups (%eax),%xmm0\n"
+ "movups %xmm0,(%edx)\n"
+#endif
+ "addl $0x10,%eax\n"
+ "addl $0x10,%edx\n"
+ "subl $0x10,%ecx\n"
+ "jmp .big128cpy\n"
+#endif
+ ".wrdcpy:\n"
+ "cmpl $0x4,%ecx\n"
+ "jl .bytecpy\n"
+ "movl (%eax),%ebx\n"
+ "movl %ebx,(%edx)\n"
+ "addl $0x4,%eax\n"
+ "addl $0x4,%edx\n"
+ "subl $0x4,%ecx\n"
+ "jmp .wrdcpy\n"
+ ".bytecpy:\n"
+ "testl %ecx,%ecx\n"
+ "jz .done\n"
+ "movb (%eax),%bl\n"
+ "movb %bl,(%edx)\n"
+ "incl %eax\n"
+ "incl %edx\n"
+ "decl %ecx\n"
+ "jmp .bytecpy\n"
+ ".done:\n"
+ "popl %ebx\n"
+ "ret\n"
+#endif
+);
+#else
+void rgo_memcpy(void const * const sus_restr _in,sus_typ_usz const _num,void * const sus_restr _out) {
+ sus_typ_u8 const * in = (sus_typ_u8 const *)_in;
+ sus_typ_u8 * sus_restr out = (sus_typ_u8 *)_out;
+ sus_typ_u8 const * const afterbuf = in + _num;
+ for (;in != afterbuf;++in,++out) {*out = *in;}
+}
+#endif
diff --git a/rgo/src/memdup.c b/rgo/src/memdup.c
index 45b7dda..fb247f3 100644
--- a/rgo/src/memdup.c
+++ b/rgo/src/memdup.c
@@ -4,11 +4,11 @@
If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
-#include <rgo.h>
+#include <rgo-priv.h>
#include <stdlib.h>
-void * __rgo_memdup(void const * const __restrict__ _ptr,size_t const _num) {
+void * __rgo_memdup(void const * const __restrict__ _ptr,sus_typ_usz const _num) {
void * const __restrict__ dup = malloc(_num);
if (__builtin_expect (dup == NULL,0x0l)) {return NULL;}
rgo_memcpy(_ptr,_num,dup);
diff --git a/rgo/src/memeq.S b/rgo/src/memeq.S
deleted file mode 100644
index bd57f43..0000000
--- a/rgo/src/memeq.S
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <rgo.h>
-
-.global rgo_memeq
-
-rgo_memeq:
- /*
- void const * lptr
- size_t num
- void const * rptr
- */
-#if defined(__i386__)
- /* eax: Address of the current left element. */
- movl 0x4(%esp),%eax
- /* ecx: Number of remaining elements. */
- movl 0x8(%esp),%ecx
- /* edx: Address of the current right element. */
- movl 0xC(%esp),%edx
- /* ebx: Current left element. */
- pushl %ebx
- /* ebx/esi: Current right element. */
- pushl %esi
-.wrdcmp:
- cmpl $0x4,%ecx
- jl .bytecmp
- movl (%eax),%ebx
- movl (%edx),%esi
- cmpl %ebx,%esi
- jne .neq
- addl $0x4,%eax
- addl $0x4,%edx
- subl $0x4,%ecx
- jmp .wrdcmp
-.bytecmp:
- testl %ecx,%ecx
- jne .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
- movb (%eax),%bl
- movb (%edx),%bh
- cmpb %bl,%bh
- jne .neq
- incl %eax
- incl %edx
- decl %ecx
- jmp .bytecmp
-.eq:
- popl %ebx
- popl %esi
- movb $0x1,%al
- ret
-.neq:
- popl %ebx
- popl %esi
- movb $0x0,%al
- ret
-#elif defined(__x86_64__)
- /* rdi: Address of the current left element. */
- /* rsi: Number of remaining elements. */
- /* rdx: Address of the current right element. */
- /* rax: Current left element. */
- /* rcx: Current right element. */
-.wrdcmp:
- cmpq $0x8,%rsi
- jl .bytecmp
- movq (%rdi),%rax
- movq (%rdx),%rcx
- cmpq %rax,%rcx
- jne .neq
- addq $0x8,%rdi
- addq $0x8,%rdx
- subq $0x8,%rsi
- jmp .wrdcmp
-.bytecmp:
- testq %rsi,%rsi
- jne .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
- movb (%rdi),%al
- movb (%rdx),%cl
- cmpb %al,%cl
- jne .neq
- incq %rdi
- incq %rdx
- decq %rsi
- jmp .bytecmp
-.eq:
- movb $0x1,%al
- ret
-.neq:
- movb $0x0,%al
- ret
-#endif
diff --git a/rgo/src/memeq.c b/rgo/src/memeq.c
new file mode 100644
index 0000000..03ae2c2
--- /dev/null
+++ b/rgo/src/memeq.c
@@ -0,0 +1,106 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <rgo-priv.h>
+
+#if defined(rgo_priv_fastimpl)
+__asm__ (
+ ".global rgo_memeq\n"
+
+ "rgo_memeq:\n"
+ /*
+ void const * lptr
+ sus_typ_usz num
+ void const * rptr
+ */
+#if defined(sus_arch_amd64)
+ /* rdi: Address of the current left element. */
+ /* rsi: Number of remaining elements. */
+ /* rdx: Address of the current right element. */
+ /* rax: Current left element. */
+ /* rcx: Current right element. */
+ ".wrdcmp:\n"
+ "cmpq $0x8,%rsi\n"
+ "jl .bytecmp\n"
+ "movq (%rdi),%rax\n"
+ "movq (%rdx),%rcx\n"
+ "cmpq %rax,%rcx\n"
+ "jne .neq\n"
+ "addq $0x8,%rdi\n"
+ "addq $0x8,%rdx\n"
+ "subq $0x8,%rsi\n"
+ "jmp .wrdcmp\n"
+ ".bytecmp:\n"
+ "testq %rsi,%rsi\n"
+ "jne .eq\n" /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
+ "movb (%rdi),%al\n"
+ "movb (%rdx),%cl\n"
+ "cmpb %al,%cl\n"
+ "jne .neq\n"
+ "incq %rdi\n"
+ "incq %rdx\n"
+ "decq %rsi\n"
+ "jmp .bytecmp\n"
+ ".eq:\n"
+ "movb $0x1,%al\n"
+ "ret\n"
+ ".neq:\n"
+ "movb $0x0,%al\n"
+ "ret\n"
+#elif defined(sus_arch_ia32)
+ /* eax: Address of the current left element. */
+ "movl 0x4(%esp),%eax\n"
+ /* ecx: Number of remaining elements. */
+ "movl 0x8(%esp),%ecx\n"
+ /* edx: Address of the current right element. */
+ "movl 0xC(%esp),%edx\n"
+ /* ebx: Current left element. */
+ "pushl %ebx\n"
+ /* ebx/esi: Current right element. */
+ "pushl %esi\n"
+ ".wrdcmp:\n"
+ "cmpl $0x4,%ecx\n"
+ "jl .bytecmp\n"
+ "movl (%eax),%ebx\n"
+ "movl (%edx),%esi\n"
+ "cmpl %ebx,%esi\n"
+ "jne .neq\n"
+ "addl $0x4,%eax\n"
+ "addl $0x4,%edx\n"
+ "subl $0x4,%ecx\n"
+ "jmp .wrdcmp\n"
+ ".bytecmp:\n"
+ "testl %ecx,%ecx\n"
+ "jne .eq\n" /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */
+ "movb (%eax),%bl\n"
+ "movb (%edx),%bh\n"
+ "cmpb %bl,%bh\n"
+ "jne .neq\n"
+ "incl %eax\n"
+ "incl %edx\n"
+ "decl %ecx\n"
+ "jmp .bytecmp\n"
+ ".eq:\n"
+ "popl %ebx\n"
+ "popl %esi\n"
+ "movb $0x1,%al\n"
+ "ret\n"
+ ".neq:\n"
+ "popl %ebx\n"
+ "popl %esi\n"
+ "movb $0x0,%al\n"
+ "ret\n"
+#endif
+);
+#else
+sus_typ_u8 rgo_memeq(void const * const sus_restr _lptr,sus_typ_usz const _num,void const * const sus_restr _rptr) {
+ sus_typ_u8 const * lpos = (sus_typ_u8 const *)_lptr;
+ sus_typ_u8 const * sus_restr rpos = (sus_typ_u8 const *)_rptr;
+ sus_typ_u8 const * const afterbuf = lpos + _num;
+ for (;lpos != afterbuf;++lpos,++rpos) {sus_likely (*lpos != *rpos) {return sus_typlit_u8(0x0);}}
+ return sus_typlit_u8(0x1);
+}
+#endif
diff --git a/rgo/src/memfill.S b/rgo/src/memfill.S
deleted file mode 100644
index f01cd65..0000000
--- a/rgo/src/memfill.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <rgo.h>
-
-.global rgo_memfill
-
-rgo_memfill:
- /*
- void const * ptr
- size_t num
- uint8_t val
- */
-#if defined(__i386__)
- /* eax: Address of the current element. */
- movl 0x4(%esp),%eax
- /* ecx: Address of the element after the last element. */
- movl 0x4(%esp),%ecx
- addl 0x8(%esp),%ecx
- /* rdx: Byte value. */
- movb 0xC(%esp),%dl
-.loop:
- cmpl %eax,%ecx
- je .done /* Exit loop if we have reached the final element. */
- movb %dl,(%eax)
- incl %eax
- jmp .loop /* Continue to next element. */
-.done:
- ret
-#elif defined(__x86_64__)
- /* rdi: Address of the current element. */
- /* rsi: Address of the element after the last element. */
- addq %rdi,%rsi
-.loop:
- cmpq %rsi,%rdi
- je .done /* Exit loop if we have reached the final element. */
- movb %dl,(%rdi)
- incq %rdi
- jmp .loop /* Continue to next element. */
-.done:
- ret
-#endif
diff --git a/rgo/src/memfill.c b/rgo/src/memfill.c
new file mode 100644
index 0000000..ec5e1fa
--- /dev/null
+++ b/rgo/src/memfill.c
@@ -0,0 +1,55 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <rgo-priv.h>
+
+#if defined(rgo_priv_fastimpl)
+__asm__ (
+ ".global rgo_memfill\n"
+
+ "rgo_memfill:\n"
+ /*
+ void const * ptr
+ sus_typ_usz num
+ sus_typ_u8 val
+ */
+#if defined(sus_arch_amd64)
+ /* rdi: Address of the current element. */
+ /* rsi: Address of the element after the last element. */
+ "addq %rdi,%rsi\n"
+ ".loop:\n"
+ "cmpq %rsi,%rdi\n"
+ "je .done\n" /* Exit loop if we have reached the final element. */
+ "movb %dl,(%rdi)\n"
+ "incq %rdi\n"
+ "jmp .loop\n" /* Continue to next element. */
+ ".done:\n"
+ "ret\n"
+#elif defined(sus_arch_ia32)
+ /* eax: Address of the current element. */
+ "movl 0x4(%esp),%eax\n"
+ /* ecx: Address of the element after the last element. */
+ "movl 0x4(%esp),%ecx\n"
+ "addl 0x8(%esp),%ecx\n"
+ /* edx: Byte value. */
+ "movb 0xC(%esp),%dl\n"
+ ".loop:\n"
+ "cmpl %eax,%ecx\n"
+ "je .done\n" /* Exit loop if we have reached the final element. */
+ "movb %dl,(%eax)\n"
+ "incl %eax\n"
+ "jmp .loop\n" /* Continue to next element. */
+ ".done:\n"
+ "ret\n"
+#endif
+);
+#else
+void rgo_memfill(void * const sus_restr _ptr,sus_typ_usz const _num,sus_typ_u8 const _byte) {
+ sus_typ_u8 * pos = (sus_typ_u8 *)_ptr;
+ sus_typ_u8 * const afterbuf = pos + _num;
+ for (;pos != afterbuf;++pos) {*pos = _byte;}
+}
+#endif
diff --git a/rgo/src/strcpy.S b/rgo/src/strcpy.S
deleted file mode 100644
index 1a03346..0000000
--- a/rgo/src/strcpy.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#indlude <rgo.h>
-
-.global rgo_strcpy
-
-rgo_strcpy:
- /*
- char const * lstr
- char const * rstr
- */
-#if defined(__x86_64__)
- /* rax: Address of the current input character. */
- movq %rdi,%rax
- /* rsi: Address of the current output character. */
- movq %rsi,%rsi
- /* rdx: Current character. */
-.loop:
- movb (%rax),%dl
- movb %dl,(%rsi)
- testb %dl,%dl
- jz .done
- incq %rax
- incq %rsi
- jmp .loop
-.done:
- subq %rdi,%rax
- ret
-#endif
diff --git a/rgo/src/strcpy.c b/rgo/src/strcpy.c
new file mode 100644
index 0000000..4e26312
--- /dev/null
+++ b/rgo/src/strcpy.c
@@ -0,0 +1,67 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <rgo-priv.h>
+
+#if defined(rgo_priv_fastimpl)
+__asm__ (
+ ".global rgo_strcpy\n"
+
+ "rgo_strcpy:\n"
+ /*
+ char const * in
+ char const * out
+ */
+#if defined(sus_arch_amd64)
+ /* rax: Address of the current input character. */
+ "movq %rdi,%rax\n"
+ /* rsi: Address of the current output character. */
+ "movq %rsi,%rsi\n"
+ /* rdx: Current character. */
+ ".loop:\n"
+ "movb (%rax),%dl\n" /* Move current the character into a register... */
+ "movb %dl,(%rsi)\n" /* ... and then back into memory. */
+ "testb %dl,%dl\n" /* Check if we have reached the null-terminator... */
+ "jz .done\n" /* ... and if so, we are finished copying. */
+ "incq %rax\n" /* Increment the positions. */
+ "incq %rsi\n"
+ "jmp .loop\n" /* Restart the loop. */
+ ".done:\n"
+ "subq %rdi,%rax\n" /* Get the length of the string we copyied. */
+ "decq %rdi\n" /* We do not count the null-terminator in the string length. */
+ "ret\n"
+#elif defined(sus_arch_ia32)
+ /* eax: Address of the current input character. */
+ "movl 0x4(%esp),%eax\n"
+ /* ecx: Address of the current output character. */
+ "movl 0x8(%esp),%ecx\n"
+ /* edx: Current character. */
+ ".loop:\n"
+ "movb (%eax),%dl\n" /* Move current the character into a register... */
+ "movb %dl,(%ecx)\n" /* ... and then back into memory. */
+ "testb %dl,%dl\n" /* Check if we have reached the null-terminator... */
+ "jz .done\n" /* ... and if so, we are finished copying. */
+ "incl %eax\n" /* Increment the positions. */
+ "incl %ecx\n"
+ "jmp .loop\n" /* Restart the loop. */
+ ".done:\n"
+ "subl 0x4(%esp),%eax\n" /* Get the length of the string we copyied. */
+ "decl %ecx \n" /* We do not count the null-terminator in the string length. */
+ "ret\n"
+#endif
+);
+#else
+sus_typ_usz rgo_strcpy(char const * const sus_restr _in,char * const sus_restr _out) {
+ char const * sus_restr inpos = _in;
+ char * sus_restr outpos = _out;
+ for (;;++inpos,++outpos) {
+ char const chr = *inpos;
+ *outpos = chr;
+ if (chr == '\x0') {return (sus_typ_usz)(inpos - _in);}
+ }
+ sus_unreach();
+}
+#endif
diff --git a/rgo/src/strdup.c b/rgo/src/strdup.c
index 5b8cc97..7937658 100644
--- a/rgo/src/strdup.c
+++ b/rgo/src/strdup.c
@@ -4,8 +4,8 @@
If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
-#include <rgo.h>
+#include <rgo-priv.h>
#include <stdlib.h>
-char * rgo_strdup(char const * const __restrict__ _str) {return rgo_memdup(_str,rgo_strlen(_str) + (size_t)0x1);}
+char * rgo_strdup(char const * const __restrict__ _str) {return rgo_memdup(_str,rgo_strlen(_str) + (sus_typ_usz)0x1);}
diff --git a/rgo/src/streq.S b/rgo/src/streq.S
deleted file mode 100644
index 2df763c..0000000
--- a/rgo/src/streq.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <rgo.h>
-
-.global rgo_streq
-
-rgo_streq:
- /*
- char const * lstr
- char const * rstr
- */
-#if defined(__x86_64__)
- /* rax: Address of the current input character. */
- movq %rdi,%rax
- /* rsi: Address of the current output character. */
- movq %rsi,%rsi
- /* rdx: Current input character. */
- /* rcx: Current output character. */
-.loop:
- movb (%rax),%dl
- movb (%rsi),%cl
- cmpb %dl,%cl
- jne .neq
- testb %dl,%dl /* Check if we have reached the null-terminator. */
- jz .eq
- incq %rax
- incq %rsi
- jmp .loop
-.eq:
- mov $0x1,%rax
- ret
-.neq:
- mov $0x0,%rax
- ret
-#endif
diff --git a/rgo/src/streq.c b/rgo/src/streq.c
new file mode 100644
index 0000000..7cf2f21
--- /dev/null
+++ b/rgo/src/streq.c
@@ -0,0 +1,78 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <rgo-priv.h>
+
+#if defined(rgo_priv_fastimpl)
+__asm__ (
+ ".global rgo_streq\n"
+
+ "rgo_streq:\n"
+ /*
+ char const * lstr
+ char const * rstr
+ */
+#if defined(sus_arch_amd64)
+ /* rax: Address of the current input character. */
+ "movq %rdi,%rax\n"
+ /* rsi: Address of the current output character. */
+ "movq %rsi,%rsi\n"
+ /* rdx: Current input character. */
+ /* rcx: Current output character. */
+ ".loop:\n"
+ "movb (%rax),%dl\n" /* Move the characters into registers. */
+ "movb (%rsi),%cl\n"
+ "cmpb %dl,%cl\n" /* Check if the characters are equal... */
+ "jne .neq\n" /* ... indicate inequality if they are not. */
+ "testb %dl,%dl\n" /* Check if we have reached the null-terminator... */
+ "jz .eq\n" /* ... indicate equality if we have. */
+ "incq %rax\n" /* Increment positions. */
+ "incq %rsi\n"
+ "jmp .loop\n" /* Restart loop. */
+ ".eq:\n" /* Indicate equality. */
+ "movb $0x1,%al\n"
+ "ret\n"
+ ".neq:\n" /* Indicate inequality. */
+ "movb $0x0,%al\n"
+ "ret\n"
+#elif defined(sus_arch_ia32)
+ /* eax: Address of the current input character. */
+ "movl 0x4(%esp),%eax\n"
+ /* ecx: Address of the current output character. */
+ "movl 0x8(%esp),%ecx\n"
+ /* edx: Current input character. */
+ /* edx: Current output character. */
+ ".loop:\n"
+ "movb (%eax),%dl\n" /* Move the characters into registers. */
+ "movb (%ecx),%dh\n"
+ "cmpb %dl,%dh\n" /* Check if the characters are equal... */
+ "jne .neq\n" /* ... indicate inequality if they are not. */
+ "testb %dl,%dl\n" /* Check if we have reached the null-terminator... */
+ "jz .eq\n" /* ... indicate equality if we have. */
+ "incl %eax\n" /* Increment positions. */
+ "incl %ecx\n"
+ "jmp .loop\n" /* Restart loop. */
+ ".eq:\n" /* Indicate equality. */
+ "movb $0x1,%al\n"
+ "ret\n"
+ ".neq:\n" /* Indicate inequality. */
+ "movb $0x0,%al\n"
+ "ret\n"
+#endif
+);
+#else
+sus_typ_u8 rgo_streq(char const * const sus_restr _lstr,char const * const sus_restr _rstr) {
+ char const * sus_restr lpos = _lstr;
+ char const * sus_restr rpos = _rstr;
+ for (;;++lpos,++rpos) {
+ char const lchr = *lpos;
+ char const rchr = *rpos;
+ sus_likely (lchr != rchr) {return sus_typlit_u8(0x0);}
+ if (lchr == '\x0') {return sus_typlit_u8(0x1);}
+ }
+ sus_unreach();
+}
+#endif
diff --git a/rgo/src/strfill.c b/rgo/src/strfill.c
index 0ba25c7..5d6270a 100644
--- a/rgo/src/strfill.c
+++ b/rgo/src/strfill.c
@@ -4,8 +4,8 @@
If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
-#include <rgo.h>
+#include <rgo-priv.h>
#include <stdint.h>
-void rgo_strfill(char const * const __restrict__ _str,char const _chr) {rgo_memfill(_str,rgo_strlen(_str),(uint8_t)_chr);}
+void rgo_strfill(char * const __restrict__ _str,char const _chr) {rgo_memfill(_str,rgo_strlen(_str),(sus_typ_u8)_chr);}
diff --git a/rgo/src/strlen.S b/rgo/src/strlen.S
deleted file mode 100644
index 19cb806..0000000
--- a/rgo/src/strlen.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- Copyright 2022 Gabriel Jensen.
- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
- If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-*/
-
-#include <rgo.h>
-
-.global rgo_strlen
-
-rgo_strlen:
- /*
- char const * str
- */
-#if defined(__i386__)
- /* eax: Address of the current character. */
- movl 0x4(%esp),%eax
- /* ecx: Address of the first character. */
- movl %eax,%ecx
- /* edx: Current character. */
-.loop:
- movb (%eax),%dl
- testb %dl,%dl
- jz .done /* Exit loop if we have reached the null-terminator. */
- incl %eax /* Continue to the next character. */
- jmp .loop
-.done:
- subl %ecx,%eax
- ret
-#elif defined(__x86_64__)
- /* rax: Address of the current character. */
- movq %rdi,%rax
- /* rdx: Current character. */
-.loop:
- movb (%rax),%dl
- testb %dl,%dl
- jz .done /* Exit loop if we have reached the null-terminator. */
- incq %rax /* Continue to the next character. */
- jmp .loop
-.done:
- subq %rdi,%rax
- ret
-#endif
diff --git a/rgo/src/strlen.c b/rgo/src/strlen.c
new file mode 100644
index 0000000..6ddbf8b
--- /dev/null
+++ b/rgo/src/strlen.c
@@ -0,0 +1,54 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <rgo-priv.h>
+
+#if defined(rgo_priv_fastimpl)
+__asm__ (
+ ".global rgo_strlen\n"
+
+ "rgo_strlen:\n"
+ /*
+ char const * str
+ */
+#if defined(sus_arch_amd64)
+ /* rax: Address of the current character. */
+ "movq %rdi,%rax\n"
+ /* rdx: Current character. */
+ ".loop:\n"
+ "movb (%rax),%dl\n"
+ "testb %dl,%dl\n"
+ "jz .done\n" /* Exit loop if we have reached the null-terminator. */
+ "incq %rax\n" /* Continue to the next character. */
+ "jmp .loop\n"
+ ".done:\n"
+ "subq %rdi,%rax\n"
+ "ret\n"
+#elif defined(sus_arch_ia32)
+ /* eax: Address of the current character. */
+ "movl 0x4(%esp),%eax\n"
+ /* ecx: Current character. */
+ ".loop:\n"
+ "movb (%eax),%cl\n"
+ "testb %cl,%cl\n"
+ "jz .done\n" /* Exit loop if we have reached the null-terminator. */
+ "incl %eax\n" /* Continue to the next character. */
+ "jmp .loop\n"
+ ".done:\n"
+ "subl 0x4(%esp),%eax\n"
+ "ret\n"
+#endif
+);
+#else
+sus_typ_usz rgo_strlen(char const * const sus_restr _str) {
+ char const * sus_restr pos = _str;
+ for (;;++pos) {
+ char const chr = *pos;
+ sus_unlikely (chr == '\x0') {return (sus_typ_usz)(pos - _str);}
+ }
+ sus_unreach();
+}
+#endif