diff options
-rw-r--r-- | CHANGELOG.txt | 16 | ||||
-rw-r--r-- | Makefile | 29 | ||||
-rw-r--r-- | README.html | 5 | ||||
-rw-r--r-- | rgo/include-priv/rgo-priv.h | 13 | ||||
-rw-r--r-- | rgo/include/rgo.h | 55 | ||||
-rw-r--r-- | rgo/src/fastimpl.c | 15 | ||||
-rw-r--r-- | rgo/src/fndbyte.S | 63 | ||||
-rw-r--r-- | rgo/src/fndbyte.c | 74 | ||||
-rw-r--r-- | rgo/src/fndchr.S | 54 | ||||
-rw-r--r-- | rgo/src/fndchr.c | 68 | ||||
-rw-r--r-- | rgo/src/memcpy.S | 127 | ||||
-rw-r--r-- | rgo/src/memcpy.c | 138 | ||||
-rw-r--r-- | rgo/src/memdup.c | 4 | ||||
-rw-r--r-- | rgo/src/memeq.S | 94 | ||||
-rw-r--r-- | rgo/src/memeq.c | 106 | ||||
-rw-r--r-- | rgo/src/memfill.S | 45 | ||||
-rw-r--r-- | rgo/src/memfill.c | 55 | ||||
-rw-r--r-- | rgo/src/strcpy.S | 33 | ||||
-rw-r--r-- | rgo/src/strcpy.c | 67 | ||||
-rw-r--r-- | rgo/src/strdup.c | 4 | ||||
-rw-r--r-- | rgo/src/streq.S | 39 | ||||
-rw-r--r-- | rgo/src/streq.c | 78 | ||||
-rw-r--r-- | rgo/src/strfill.c | 4 | ||||
-rw-r--r-- | rgo/src/strlen.S | 43 | ||||
-rw-r--r-- | rgo/src/strlen.c | 54 | ||||
-rw-r--r-- | test.c | 27 |
26 files changed, 746 insertions, 564 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 25a52b0..5cecddb 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,19 @@ +| 8 + +- Use susinfo; +- Implement algorithms in C (with inline assembly on supported platforms); +- Add comments; +- Fix strcpy counting the null-terminator; +- Fix strcpy taking a pointer-to-const as the output; +- Implement strcpy, streq in IA-32; +- Optimise streq: Only use the lower eight bytes for the return value; +- Optimise strlen: Use fewer registers; +- Fix memfill taking a pointer-to-const as the output; +- Fix strfill taking a pointer-to-const as the output; +- Update readme; +- Add private header; +- Remove assembly-specific version macro; + | 7 - Relicense under MPL2; @@ -1,31 +1,30 @@ SRCS = \ - rgo/src/fndbyte.S \ - rgo/src/fndchr.S \ - rgo/src/memcpy.S \ + rgo/src/fastimpl.c \ + rgo/src/fndbyte.c \ + rgo/src/fndchr.c \ + rgo/src/memcpy.c \ rgo/src/memdup.c \ - rgo/src/memeq.S \ - rgo/src/memfill.S \ + rgo/src/memeq.c \ + rgo/src/memfill.c \ rgo/src/strdup.c \ - rgo/src/streq.S \ + rgo/src/streq.c \ rgo/src/strfill.c \ - rgo/src/strcpy.S \ - rgo/src/strlen.S + rgo/src/strcpy.c \ + rgo/src/strlen.c -OBJS := $(SRCS:.S=.o) -OBJS := $(OBJS:.c=.o) +OBJS := $(SRCS:.c=.o) LIB := librgo.a -ASFLAGS = \ - -Irgo/include \ - -g \ - -march=native - CFLAGS = \ -Irgo/include \ + -Irgo/include-priv \ -O3 \ -g \ -march=native +# Uncomment to disable assembly algorithms: +#CFLAGS += -Drgo_priv_noasm + .PHONY: clean install purge $(LIB): $(OBJS) diff --git a/README.html b/README.html index 893354e..ce017af 100644 --- a/README.html +++ b/README.html @@ -6,7 +6,7 @@ <br /> <h2>Supported Platforms</h2> <p>rgo is written (mostly) in assembly, and we therefore can't possibly support every platform in existence.</p> - <p>Currently, it's only compatible with the UNIX System-V ABI. Systems using this ABI include FreeBSD, Linux, macOS, OpenBSD, and any other System-V derivative. Support for Windows is being reflected for a future release.</p> + <p>Currently, it's only compatible with the UNIX System-V ABI. Systems using this ABI include FreeBSD, Linux, macOS, OpenBSD, and other System-V derivative. Support for Windows is being reflected for a future release.</p> <br /> <p>rgo is written in GNU C and GNU assembly for the following machine architectures:</p> <ul> @@ -15,7 +15,6 @@ </li> <li> <p>IA-32, including SSE and AVX;</p> - <p><i>Note: Support is currently limited to: fndbyte, fndchr, memcpy, memeq, memfill, strlen.</i></p> </li> <li> <p><i>(Planned) Aarch64, including Neon and SVE;</i></p> @@ -27,7 +26,7 @@ <p><i>(Planned) Power ISA, including AltiVec;</i></p> </li> <li> - <p><i>(Planned) RISC-V, including Q extension;</i></p> + <p><i>(Planned) RISC-V;</i></p> </li> <li> <p><i>(Planned) Sparc;</i></p> diff --git a/rgo/include-priv/rgo-priv.h b/rgo/include-priv/rgo-priv.h new file mode 100644 index 0000000..8b0b65c --- /dev/null +++ b/rgo/include-priv/rgo-priv.h @@ -0,0 +1,13 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#pragma once + +#include <rgo.h> + +#if defined(__GNUC__) && defined(sus_os_unix) && !defined(rgo_priv_noasm) && (defined(sus_arch_amd64) || defined(sus_arch_ia32)) +#define rgo_priv_fastimpl +#endif diff --git a/rgo/include/rgo.h b/rgo/include/rgo.h index ff3c127..56893d6 100644 --- a/rgo/include/rgo.h +++ b/rgo/include/rgo.h @@ -4,20 +4,15 @@ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -#if !defined(__x86_64__) && !defined(__i386__) -#define rgo_noimpl -#endif +#include <sus.h> #if !defined(rgo_ver) -#if defined(__ASSEMBLER__) -#define rgo_ver $0x7 -#else -#define rgo_ver (0x7) -#endif +#define rgo_ver sus_typlit_u64(0x8) -#if defined(__ASSEMBLER__) +#if defined(sus_lang_asm) .extern rgo_fndbyte .extern rgo_fndchr +.extern rgo_getbinver .extern rgo_memcpy .extern rgo_memeq .extern rgo_memfill @@ -26,29 +21,31 @@ .extern rgo_strfill .extern rgo_strlen #else -#include <stddef.h> -#include <stdint.h> -#if defined(__cplusplus) -#define rgo_priv_externc extern "C" -#else -#define rgo_priv_externc +#if defined(sus_lang_cxx) +extern "C" { +#endif + +sus_attr_alloc sus_attr_allocsz(0x2) sus_attr_hot sus_attr_nothrw void * __rgo_memdup( void const * sus_restr ptr, sus_typ_usz num); +sus_attr_cold sus_attr_const sus_attr_nothrw sus_typ_u8 rgo_fastimpl( void); +sus_attr_hot sus_attr_nothrw sus_typ_usz rgo_fndbyte( void const * sus_restr ptr, sus_typ_usz num,sus_typ_u8 byte); +sus_attr_hot sus_attr_nothrw sus_typ_usz rgo_fndchr( char const * sus_restr str, char chr); +sus_attr_cold sus_attr_nothrw sus_typ_u64 rgo_getbinver(void); +sus_attr_hot sus_attr_nothrw void rgo_memcpy( void const * sus_restr in, sus_typ_usz num,void * sus_restr out); +sus_attr_hot sus_attr_nothrw sus_typ_u8 rgo_memeq( void const * sus_restr lptr,sus_typ_usz num,void const * sus_restr rptr); +sus_attr_hot sus_attr_nothrw void rgo_memfill( void * sus_restr ptr, sus_typ_usz num,sus_typ_u8 val); +sus_attr_hot sus_attr_nothrw sus_typ_usz rgo_strcpy( char const * sus_restr in, char * sus_restr out); +sus_attr_alloc sus_attr_hot sus_attr_nothrw char * rgo_strdup( char const * sus_restr str); +sus_attr_hot sus_attr_nothrw sus_typ_u8 rgo_streq( char const * sus_restr lstr,char const * sus_restr rstr); +sus_attr_hot sus_attr_nothrw void rgo_strfill( char * sus_restr lstr,char chr); +sus_attr_hot sus_attr_nothrw sus_typ_usz rgo_strlen( char const * sus_restr str); + +#if defined(sus_lang_cxx) +} #endif -rgo_priv_externc __attribute__((alloc_size(0x2),hot,malloc,nothrow)) void * __rgo_memdup(void const * __restrict__ ptr, size_t num); -rgo_priv_externc __attribute__((hot,nothrow)) size_t rgo_fndbyte( void const * __restrict__ ptr, size_t num,uint8_t byte); -rgo_priv_externc __attribute__((hot,nothrow)) size_t rgo_fndchr( char const * __restrict__ str, char chr); -rgo_priv_externc __attribute__((hot,nothrow)) void rgo_memcpy( void const * __restrict__ in, size_t num,void * __restrict__ out); -rgo_priv_externc __attribute__((hot,nothrow)) uint8_t rgo_memeq( void const * __restrict__ lptr,size_t num,void const * __restrict__ rptr); -rgo_priv_externc __attribute__((hot,nothrow)) void rgo_memfill( void const * __restrict__ ptr, size_t num,uint8_t val); -rgo_priv_externc __attribute__((hot,nothrow)) size_t rgo_strcpy( char const * __restrict__ lstr,char const * __restrict__ rstr); -rgo_priv_externc __attribute__((hot,malloc,nothrow)) char * rgo_strdup( char const * __restrict__ str); -rgo_priv_externc __attribute__((hot,nothrow)) uint8_t rgo_streq( char const * __restrict__ lstr,char const * __restrict__ rstr); -rgo_priv_externc __attribute__((hot,nothrow)) void rgo_strfill( char const * __restrict__ lstr,char chr); -rgo_priv_externc __attribute__((hot,nothrow)) size_t rgo_strlen( char const * __restrict__ str); - -#if defined(__cplusplus) -template<typename T> [[gnu::alloc_size(0x2),gnu::hot,gnu::malloc]] auto rgo_memdup(T const * __restrict__ const _ptr,::size_t const _num) noexcept -> T * {return static_cast<T *>(::__rgo_memdup(_ptr,_num));} +#if defined(sus_lang_cxx) +template<typename T> sus_attr_alloc sus_attr_allocsz(0x2) sus_attr_hot sus_attr_nothrw T * rgo_memdup(T const * sus_restr const _ptr,::sus_typ_usz const _num) -> T * {return static_cast<T *>(::__rgo_memdup(_ptr,_num));} #else #define rgo_memdup __rgo_memdup #endif diff --git a/rgo/src/fastimpl.c b/rgo/src/fastimpl.c new file mode 100644 index 0000000..c584119 --- /dev/null +++ b/rgo/src/fastimpl.c @@ -0,0 +1,15 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <rgo-priv.h> + +sus_typ_u8 rgo_fastimpl(void) { +#if defined(rgo_priv_fastimpl) + return sus_typlit_u8(0x1); +#else + return sus_typlit_u8(0x0); +#endif +} diff --git a/rgo/src/fndbyte.S b/rgo/src/fndbyte.S deleted file mode 100644 index 0d171cd..0000000 --- a/rgo/src/fndbyte.S +++ /dev/null @@ -1,63 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#include <rgo.h> - -.global rgo_fndbyte - -rgo_fndbyte: - /* - void const * ptr - size_t num - uint8_t byte - */ -#if defined(__i386__) - /* eax: Address of the current element. */ - movl 0x4(%esp),%eax - /* ecx: Address of the element after the last element. */ - movl 0x8(%esp),%ecx - addl %eax,%ecx - /* edx: Byte value. */ - movb 0xC(%esp),%dl - /* ebx: Current element. */ - pushl %ebx -.loop: - cmpl %eax,%ecx - je .nfnd /* We have went through the entire array without finding the byte. */ - movb (%eax),%bl - cmpb %bl,%dl - je .fnd /* We have found the byte. */ - incl %eax - jmp .loop -.fnd: - popl %ebx - subl 0x4(%esp),%eax - ret -.nfnd: - popl %ebx - movl $0xFFFFFFFF,%eax - ret -#elif defined(__x86_64__) - /* rax: Address of the current element. */ - movq %rdi,%rax - /* rsi: Address of the element after the last element. */ - addq %rdi,%rsi - /* rcx: Current element. */ -.loop: - cmpq %rax,%rsi - je .nfnd /* We have went through the entire array without finding the byte. */ - movb (%rax),%cl - cmpb %cl,%dl - je .fnd /* We have found the byte. */ - incq %rax - jmp .loop -.fnd: - subq %rdi,%rax - ret -.nfnd: - movq $0xFFFFFFFFFFFFFFFF,%rax - ret -#endif diff --git a/rgo/src/fndbyte.c b/rgo/src/fndbyte.c new file mode 100644 index 0000000..29945a0 --- /dev/null +++ b/rgo/src/fndbyte.c @@ -0,0 +1,74 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <rgo-priv.h> + +#if defined(rgo_priv_fastimpl) +__asm__ ( + ".global rgo_fndbyte\n" + + "rgo_fndbyte:\n" + /* + void const * ptr + sus_typ_usz num + sus_typ_u8 byte + */ +#if defined(sus_arch_amd64) + /* rax: Address of the current element. */ + "movq %rdi,%rax\n" + /* rsi: Address of the element after the last element. */ + "addq %rdi,%rsi\n" + /* rcx: Current element. */ + ".loop:\n" + "cmpq %rax,%rsi\n" + "je .nfnd\n" /* We have went through the entire array without finding the byte. */ + "movb (%rax),%cl\n" + "cmpb %cl,%dl\n" + "je .fnd\n" /* We have found the byte. */ + "incq %rax\n" + "jmp .loop\n" + ".fnd:\n" + "subq %rdi,%rax\n" + "ret\n" + ".nfnd:\n" + "movq $0xFFFFFFFFFFFFFFFF,%rax\n" + "ret\n" +#elif defined(sus_arch_ia32) + /* eax: Address of the current element. */ + "movl 0x4(%esp),%eax\n" + /* ecx: Address of the element after the last element. */ + "movl 0x8(%esp),%ecx\n" + "addl %eax,%ecx\n" + /* edx: Byte value. */ + "movb 0xC(%esp),%dl\n" + /* ebx: Current element. */ + "pushl %ebx\n" + ".loop:\n" + "cmpl %eax,%ecx\n" + "je .nfnd\n" /* We have went through the entire array without finding the byte. */ + "movb (%eax),%bl\n" + "cmpb %bl,%dl\n" + "je .fnd\n" /* We have found the byte. */ + "incl %eax\n" + "jmp .loop\n" + ".fnd:\n" + "popl %ebx\n" + "subl 0x4(%esp),%eax\n" + "ret\n" + ".nfnd:\n" + "popl %ebx\n" + "movl $0xFFFFFFFF,%eax\n" + "ret\n" +#endif +); +#else +sus_typ_usz rgo_fndbyte(void const * const sus_restr _ptr,sus_typ_usz const _num,sus_typ_u8 const _byte) { + sus_typ_u8 const * ptr = (sus_typ_u8 const *)_ptr; + sus_typ_u8 const * const afterbuf = ptr + _num; + for (;ptr != afterbuf;++ptr) {sus_unlikely (*ptr == _byte) {return ptr - (sus_typ_u8 const *)_ptr;}} + return sus_typlit_usz(-0x1); +} +#endif diff --git a/rgo/src/fndchr.S b/rgo/src/fndchr.S deleted file mode 100644 index 608916b..0000000 --- a/rgo/src/fndchr.S +++ /dev/null @@ -1,54 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#include <rgo.h> - -.global rgo_fndchr - -rgo_fndchr: - /* - char const * str - char chr - */ -#if defined(__i386__) - /* eax: Address of the current character. */ - movl 0x4(%esp),%eax - /* ecx: Character. */ - movb 0x8(%esp),%cl - /* edx: Current character. */ -.loop: - movb (%eax),%dl - cmpb %dl,%cl - je .fnd /* Exit loop if we have found the character. */ - testb %dl,%dl - je .nfnd /* We encountered the null-terminator but not the specified character. */ - incl %eax - jmp .loop -.fnd: - subl 0x4(%esp),%eax - ret -.nfnd: - movl $0xFFFFFFFF,%eax - ret -#elif defined(__x86_64__) - /* rax: Address of the current character. */ - movq %rdi,%rax - /* rdx: Current character. */ -.loop: - movb (%rax),%dl - cmpb %dl,%sil - je .fnd /* Exit loop if we have found the character. */ - testb %dl,%dl - je .nfnd /* We encountered the null-terminator but not the specified character. */ - incq %rax - jmp .loop -.fnd: - subq %rdi,%rax - ret -.nfnd: - movq $0xFFFFFFFFFFFFFFFF,%rax - ret -#endif diff --git a/rgo/src/fndchr.c b/rgo/src/fndchr.c new file mode 100644 index 0000000..12cd006 --- /dev/null +++ b/rgo/src/fndchr.c @@ -0,0 +1,68 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <rgo-priv.h> + +#if defined(rgo_priv_fastimpl) +__asm__ ( + ".global rgo_fndchr\n" + + "rgo_fndchr:\n" + /* + char const * str + char chr + */ +#if defined(sus_arch_amd64) + /* rax: Address of the current character. */ + "movq %rdi,%rax\n" + /* rdx: Current character. */ + ".loop:\n" + "movb (%rax),%dl\n" + "cmpb %dl,%sil\n" + "je .fnd\n" /* Exit loop if we have found the character. */ + "testb %dl,%dl\n" + "je .nfnd\n" /* We encountered the null-terminator but not the specified character. */ + "incq %rax\n" + "jmp .loop\n" + ".fnd:\n" + "subq %rdi,%rax\n" + "ret\n" + ".nfnd:\n" + "movq $0xFFFFFFFFFFFFFFFF,%rax\n" + "ret\n" +#elif defined(sus_arch_ia32) + /* eax: Address of the current character. */ + "movl 0x4(%esp),%eax\n" + /* ecx: Character. */ + "movb 0x8(%esp),%cl\n" + /* edx: Current character. */ + ".loop:\n" + "movb (%eax),%dl\n" + "cmpb %dl,%cl\n" + "je .fnd\n" /* Exit loop if we have found the character. */ + "testb %dl,%dl\n" + "je .nfnd\n" /* We encountered the null-terminator but not the specified character. */ + "incl %eax\n" + "jmp .loop\n" + ".fnd:\n" + "subl 0x4(%esp),%eax\n" + "ret\n" + ".nfnd:\n" + "movl $0xFFFFFFFF,%eax\n" + "ret\n" +#endif +); +#else +sus_typ_usz rgo_fndchr(char const * const sus_restr _str,char const _chr) { + char const * sus_restr pos = _str; + for (;;++pos) { + char const chr = *pos; + sus_unlikely (chr == _chr) {return (sus_typ_usz)(pos - _str);} + sus_unlikely (chr == '\x0') {return sus_typlit_usz(-0x1);} + } + sus_unreach(); +} +#endif diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S deleted file mode 100644 index fefa614..0000000 --- a/rgo/src/memcpy.S +++ /dev/null @@ -1,127 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#include <rgo.h> - -.global rgo_memcpy - -rgo_memcpy: - /* - void const * in - size_t num - void * out - */ -#if defined(__i386__) - /* eax: Address of the current input element. */ - movl 0x4(%esp),%eax - /* ecx: Number of remaining elements. */ - movl 0x8(%esp),%ecx - /* edx: Address of the current output element. */ - movl 0xC(%esp),%edx - /* ebx: Current element. */ - pushl %ebx /* ebx must be restored. */ - /* xmm0: Current element. */ - /* ymm0: Current element. */ -#if defined(__AVX__) -.big256cpy: - cmpl $0x20,%ecx -#if defined(__SSE__) - jl .big128cpy -#else - jl .wrdcpy -#endif - vmovdqu (%eax),%ymm0 - vmovdqu %ymm0,(%edx) - addl $0x20,%eax - addl $0x20,%edx - subl $0x20,%ecx - jmp .big256cpy -#endif -#if defined(__SSE__) -.big128cpy: - cmpl $0x10,%ecx - jl .wrdcpy -#if defined(__SSE2__) - movdqu (%eax),%xmm0 - movdqu %xmm0,(%edx) -#else - movups (%eax),%xmm0 - movups %xmm0,(%edx) -#endif - addl $0x10,%eax - addl $0x10,%edx - subl $0x10,%ecx - jmp .big128cpy -#endif -.wrdcpy: - cmpl $0x4,%ecx - jl .bytecpy - movl (%eax),%ebx - movl %ebx,(%edx) - addl $0x4,%eax - addl $0x4,%edx - subl $0x4,%ecx - jmp .wrdcpy -.bytecpy: - testl %ecx,%ecx - jz .done - movb (%eax),%bl - movb %bl,(%edx) - incl %eax - incl %edx - decl %ecx - jmp .bytecpy -.done: - popl %ebx - ret -#elif defined(__x86_64__) - /* rdi: Address of the current input element. */ - /* rsi: Number of remaining elements. */ - /* rdx: Address of the current output element. */ - /* rcx: Current element. */ - /* xmm0: Current element. */ - /* ymm0: Current element. */ -#if defined(__AVX__) -.big256cpy: - cmpq $0x20,%rsi - jl .big128cpy - vmovups (%rdi),%ymm0 - vmovups %ymm0,(%rdx) - addq $0x20,%rdi - addq $0x20,%rdx - subq $0x20,%rsi - jmp .big256cpy -#endif -.big128cpy: - cmpq $0x10,%rsi - jl .wrdcpy - movdqu (%rdi),%xmm0 - movdqu %xmm0,(%rdx) - addq $0x10,%rdi - addq $0x10,%rdx - subq $0x10,%rsi - jmp .big128cpy -.wrdcpy: - cmpq $0x8,%rsi - jl .bytecpy - movq (%rdi),%rcx - movq %rcx,(%rdx) - addq $0x8,%rdi - addq $0x8,%rdx - subq $0x8,%rsi - jmp .wrdcpy -.bytecpy: - testq %rsi,%rsi - jz .done - movb (%rdi),%cl - movb %cl,(%rdx) - incq %rdi - incq %rdx - decq %rsi - jmp .bytecpy -.done: - ret -#endif diff --git a/rgo/src/memcpy.c b/rgo/src/memcpy.c new file mode 100644 index 0000000..89adc14 --- /dev/null +++ b/rgo/src/memcpy.c @@ -0,0 +1,138 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <rgo-priv.h> + +#if defined(rgo_priv_fastimpl) +__asm__ ( + ".global rgo_memcpy\n" + + "rgo_memcpy:\n" + /* + void const * in + sus_typ_usz num + void * out + */ +#if defined(sus_arch_amd64) + /* rdi: Address of the current input element. */ + /* rsi: Number of remaining elements. */ + /* rdx: Address of the current output element. */ + /* rcx: Current element. */ + /* xmm0: Current element. */ + /* ymm0: Current element. */ +#if defined(sus_archfeat_avx) + ".big256cpy:\n" + "cmpq $0x20,%rsi\n" + "jl .big128cpy\n" + "vmovups (%rdi),%ymm0\n" + "vmovups %ymm0,(%rdx)\n" + "addq $0x20,%rdi\n" + "addq $0x20,%rdx\n" + "subq $0x20,%rsi\n" + "jmp .big256cpy\n" +#endif + ".big128cpy:\n" + "cmpq $0x10,%rsi\n" + "jl .wrdcpy\n" + "movdqu (%rdi),%xmm0\n" + "movdqu %xmm0,(%rdx)\n" + "addq $0x10,%rdi\n" + "addq $0x10,%rdx\n" + "subq $0x10,%rsi\n" + "jmp .big128cpy\n" + ".wrdcpy:\n" + "cmpq $0x8,%rsi\n" + "jl .bytecpy\n" + "movq (%rdi),%rcx\n" + "movq %rcx,(%rdx)\n" + "addq $0x8,%rdi\n" + "addq $0x8,%rdx\n" + "subq $0x8,%rsi\n" + "jmp .wrdcpy\n" + ".bytecpy:\n" + "testq %rsi,%rsi\n" + "jz .done\n" + "movb (%rdi),%cl\n" + "movb %cl,(%rdx)\n" + "incq %rdi\n" + "incq %rdx\n" + "decq %rsi\n" + "jmp .bytecpy\n" + ".done:\n" + "ret\n" +#elif defined(sus_arch_ia32) + /* eax: Address of the current input element. */ + "movl 0x4(%esp),%eax\n" + /* ecx: Number of remaining elements. */ + "movl 0x8(%esp),%ecx\n" + /* edx: Address of the current output element. */ + "movl 0xC(%esp),%edx\n" + /* ebx: Current element. */ + "pushl %ebx\n" /* ebx must be restored. */ + /* xmm0: Current element. */ + /* ymm0: Current element. */ +#if defined(sus_archfeat_avx) + ".big256cpy:\n" + "cmpl $0x20,%ecx\n" +#if defined(sus_archfeat_sse) + "jl .big128cpy\n" +#else + "jl .wrdcpy\n" +#endif + "vmovdqu (%eax),%ymm0\n" + "vmovdqu %ymm0,(%edx)\n" + "addl $0x20,%eax\n" + "addl $0x20,%edx\n" + "subl $0x20,%ecx\n" + "jmp .big256cpy\n" +#endif +#if defined(sus_archfeat_sse) + ".big128cpy:\n" + "cmpl $0x10,%ecx\n" + "jl .wrdcpy\n" +#if defined(sus_archfeat_sse2) + "movdqu (%eax),%xmm0\n" + "movdqu %xmm0,(%edx)\n" +#else + "movups (%eax),%xmm0\n" + "movups %xmm0,(%edx)\n" +#endif + "addl $0x10,%eax\n" + "addl $0x10,%edx\n" + "subl $0x10,%ecx\n" + "jmp .big128cpy\n" +#endif + ".wrdcpy:\n" + "cmpl $0x4,%ecx\n" + "jl .bytecpy\n" + "movl (%eax),%ebx\n" + "movl %ebx,(%edx)\n" + "addl $0x4,%eax\n" + "addl $0x4,%edx\n" + "subl $0x4,%ecx\n" + "jmp .wrdcpy\n" + ".bytecpy:\n" + "testl %ecx,%ecx\n" + "jz .done\n" + "movb (%eax),%bl\n" + "movb %bl,(%edx)\n" + "incl %eax\n" + "incl %edx\n" + "decl %ecx\n" + "jmp .bytecpy\n" + ".done:\n" + "popl %ebx\n" + "ret\n" +#endif +); +#else +void rgo_memcpy(void const * const sus_restr _in,sus_typ_usz const _num,void * const sus_restr _out) { + sus_typ_u8 const * in = (sus_typ_u8 const *)_in; + sus_typ_u8 * sus_restr out = (sus_typ_u8 *)_out; + sus_typ_u8 const * const afterbuf = in + _num; + for (;in != afterbuf;++in,++out) {*out = *in;} +} +#endif diff --git a/rgo/src/memdup.c b/rgo/src/memdup.c index 45b7dda..fb247f3 100644 --- a/rgo/src/memdup.c +++ b/rgo/src/memdup.c @@ -4,11 +4,11 @@ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -#include <rgo.h> +#include <rgo-priv.h> #include <stdlib.h> -void * __rgo_memdup(void const * const __restrict__ _ptr,size_t const _num) { +void * __rgo_memdup(void const * const __restrict__ _ptr,sus_typ_usz const _num) { void * const __restrict__ dup = malloc(_num); if (__builtin_expect (dup == NULL,0x0l)) {return NULL;} rgo_memcpy(_ptr,_num,dup); diff --git a/rgo/src/memeq.S b/rgo/src/memeq.S deleted file mode 100644 index bd57f43..0000000 --- a/rgo/src/memeq.S +++ /dev/null @@ -1,94 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#include <rgo.h> - -.global rgo_memeq - -rgo_memeq: - /* - void const * lptr - size_t num - void const * rptr - */ -#if defined(__i386__) - /* eax: Address of the current left element. */ - movl 0x4(%esp),%eax - /* ecx: Number of remaining elements. */ - movl 0x8(%esp),%ecx - /* edx: Address of the current right element. */ - movl 0xC(%esp),%edx - /* ebx: Current left element. */ - pushl %ebx - /* ebx/esi: Current right element. */ - pushl %esi -.wrdcmp: - cmpl $0x4,%ecx - jl .bytecmp - movl (%eax),%ebx - movl (%edx),%esi - cmpl %ebx,%esi - jne .neq - addl $0x4,%eax - addl $0x4,%edx - subl $0x4,%ecx - jmp .wrdcmp -.bytecmp: - testl %ecx,%ecx - jne .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */ - movb (%eax),%bl - movb (%edx),%bh - cmpb %bl,%bh - jne .neq - incl %eax - incl %edx - decl %ecx - jmp .bytecmp -.eq: - popl %ebx - popl %esi - movb $0x1,%al - ret -.neq: - popl %ebx - popl %esi - movb $0x0,%al - ret -#elif defined(__x86_64__) - /* rdi: Address of the current left element. */ - /* rsi: Number of remaining elements. */ - /* rdx: Address of the current right element. */ - /* rax: Current left element. */ - /* rcx: Current right element. */ -.wrdcmp: - cmpq $0x8,%rsi - jl .bytecmp - movq (%rdi),%rax - movq (%rdx),%rcx - cmpq %rax,%rcx - jne .neq - addq $0x8,%rdi - addq $0x8,%rdx - subq $0x8,%rsi - jmp .wrdcmp -.bytecmp: - testq %rsi,%rsi - jne .eq /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */ - movb (%rdi),%al - movb (%rdx),%cl - cmpb %al,%cl - jne .neq - incq %rdi - incq %rdx - decq %rsi - jmp .bytecmp -.eq: - movb $0x1,%al - ret -.neq: - movb $0x0,%al - ret -#endif diff --git a/rgo/src/memeq.c b/rgo/src/memeq.c new file mode 100644 index 0000000..03ae2c2 --- /dev/null +++ b/rgo/src/memeq.c @@ -0,0 +1,106 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <rgo-priv.h> + +#if defined(rgo_priv_fastimpl) +__asm__ ( + ".global rgo_memeq\n" + + "rgo_memeq:\n" + /* + void const * lptr + sus_typ_usz num + void const * rptr + */ +#if defined(sus_arch_amd64) + /* rdi: Address of the current left element. */ + /* rsi: Number of remaining elements. */ + /* rdx: Address of the current right element. */ + /* rax: Current left element. */ + /* rcx: Current right element. */ + ".wrdcmp:\n" + "cmpq $0x8,%rsi\n" + "jl .bytecmp\n" + "movq (%rdi),%rax\n" + "movq (%rdx),%rcx\n" + "cmpq %rax,%rcx\n" + "jne .neq\n" + "addq $0x8,%rdi\n" + "addq $0x8,%rdx\n" + "subq $0x8,%rsi\n" + "jmp .wrdcmp\n" + ".bytecmp:\n" + "testq %rsi,%rsi\n" + "jne .eq\n" /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */ + "movb (%rdi),%al\n" + "movb (%rdx),%cl\n" + "cmpb %al,%cl\n" + "jne .neq\n" + "incq %rdi\n" + "incq %rdx\n" + "decq %rsi\n" + "jmp .bytecmp\n" + ".eq:\n" + "movb $0x1,%al\n" + "ret\n" + ".neq:\n" + "movb $0x0,%al\n" + "ret\n" +#elif defined(sus_arch_ia32) + /* eax: Address of the current left element. */ + "movl 0x4(%esp),%eax\n" + /* ecx: Number of remaining elements. */ + "movl 0x8(%esp),%ecx\n" + /* edx: Address of the current right element. */ + "movl 0xC(%esp),%edx\n" + /* ebx: Current left element. */ + "pushl %ebx\n" + /* ebx/esi: Current right element. */ + "pushl %esi\n" + ".wrdcmp:\n" + "cmpl $0x4,%ecx\n" + "jl .bytecmp\n" + "movl (%eax),%ebx\n" + "movl (%edx),%esi\n" + "cmpl %ebx,%esi\n" + "jne .neq\n" + "addl $0x4,%eax\n" + "addl $0x4,%edx\n" + "subl $0x4,%ecx\n" + "jmp .wrdcmp\n" + ".bytecmp:\n" + "testl %ecx,%ecx\n" + "jne .eq\n" /* If we have reached the final element, all previous elements have compared equal, and the memory sequences are equal. */ + "movb (%eax),%bl\n" + "movb (%edx),%bh\n" + "cmpb %bl,%bh\n" + "jne .neq\n" + "incl %eax\n" + "incl %edx\n" + "decl %ecx\n" + "jmp .bytecmp\n" + ".eq:\n" + "popl %ebx\n" + "popl %esi\n" + "movb $0x1,%al\n" + "ret\n" + ".neq:\n" + "popl %ebx\n" + "popl %esi\n" + "movb $0x0,%al\n" + "ret\n" +#endif +); +#else +sus_typ_u8 rgo_memeq(void const * const sus_restr _lptr,sus_typ_usz const _num,void const * const sus_restr _rptr) { + sus_typ_u8 const * lpos = (sus_typ_u8 const *)_lptr; + sus_typ_u8 const * sus_restr rpos = (sus_typ_u8 const *)_rptr; + sus_typ_u8 const * const afterbuf = lpos + _num; + for (;lpos != afterbuf;++lpos,++rpos) {sus_likely (*lpos != *rpos) {return sus_typlit_u8(0x0);}} + return sus_typlit_u8(0x1); +} +#endif diff --git a/rgo/src/memfill.S b/rgo/src/memfill.S deleted file mode 100644 index f01cd65..0000000 --- a/rgo/src/memfill.S +++ /dev/null @@ -1,45 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#include <rgo.h> - -.global rgo_memfill - -rgo_memfill: - /* - void const * ptr - size_t num - uint8_t val - */ -#if defined(__i386__) - /* eax: Address of the current element. */ - movl 0x4(%esp),%eax - /* ecx: Address of the element after the last element. */ - movl 0x4(%esp),%ecx - addl 0x8(%esp),%ecx - /* rdx: Byte value. */ - movb 0xC(%esp),%dl -.loop: - cmpl %eax,%ecx - je .done /* Exit loop if we have reached the final element. */ - movb %dl,(%eax) - incl %eax - jmp .loop /* Continue to next element. */ -.done: - ret -#elif defined(__x86_64__) - /* rdi: Address of the current element. */ - /* rsi: Address of the element after the last element. */ - addq %rdi,%rsi -.loop: - cmpq %rsi,%rdi - je .done /* Exit loop if we have reached the final element. */ - movb %dl,(%rdi) - incq %rdi - jmp .loop /* Continue to next element. */ -.done: - ret -#endif diff --git a/rgo/src/memfill.c b/rgo/src/memfill.c new file mode 100644 index 0000000..ec5e1fa --- /dev/null +++ b/rgo/src/memfill.c @@ -0,0 +1,55 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <rgo-priv.h> + +#if defined(rgo_priv_fastimpl) +__asm__ ( + ".global rgo_memfill\n" + + "rgo_memfill:\n" + /* + void const * ptr + sus_typ_usz num + sus_typ_u8 val + */ +#if defined(sus_arch_amd64) + /* rdi: Address of the current element. */ + /* rsi: Address of the element after the last element. */ + "addq %rdi,%rsi\n" + ".loop:\n" + "cmpq %rsi,%rdi\n" + "je .done\n" /* Exit loop if we have reached the final element. */ + "movb %dl,(%rdi)\n" + "incq %rdi\n" + "jmp .loop\n" /* Continue to next element. */ + ".done:\n" + "ret\n" +#elif defined(sus_arch_ia32) + /* eax: Address of the current element. */ + "movl 0x4(%esp),%eax\n" + /* ecx: Address of the element after the last element. */ + "movl 0x4(%esp),%ecx\n" + "addl 0x8(%esp),%ecx\n" + /* edx: Byte value. */ + "movb 0xC(%esp),%dl\n" + ".loop:\n" + "cmpl %eax,%ecx\n" + "je .done\n" /* Exit loop if we have reached the final element. */ + "movb %dl,(%eax)\n" + "incl %eax\n" + "jmp .loop\n" /* Continue to next element. */ + ".done:\n" + "ret\n" +#endif +); +#else +void rgo_memfill(void * const sus_restr _ptr,sus_typ_usz const _num,sus_typ_u8 const _byte) { + sus_typ_u8 * pos = (sus_typ_u8 *)_ptr; + sus_typ_u8 * const afterbuf = pos + _num; + for (;pos != afterbuf;++pos) {*pos = _byte;} +} +#endif diff --git a/rgo/src/strcpy.S b/rgo/src/strcpy.S deleted file mode 100644 index 1a03346..0000000 --- a/rgo/src/strcpy.S +++ /dev/null @@ -1,33 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#indlude <rgo.h> - -.global rgo_strcpy - -rgo_strcpy: - /* - char const * lstr - char const * rstr - */ -#if defined(__x86_64__) - /* rax: Address of the current input character. */ - movq %rdi,%rax - /* rsi: Address of the current output character. */ - movq %rsi,%rsi - /* rdx: Current character. */ -.loop: - movb (%rax),%dl - movb %dl,(%rsi) - testb %dl,%dl - jz .done - incq %rax - incq %rsi - jmp .loop -.done: - subq %rdi,%rax - ret -#endif diff --git a/rgo/src/strcpy.c b/rgo/src/strcpy.c new file mode 100644 index 0000000..4e26312 --- /dev/null +++ b/rgo/src/strcpy.c @@ -0,0 +1,67 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <rgo-priv.h> + +#if defined(rgo_priv_fastimpl) +__asm__ ( + ".global rgo_strcpy\n" + + "rgo_strcpy:\n" + /* + char const * in + char const * out + */ +#if defined(sus_arch_amd64) + /* rax: Address of the current input character. */ + "movq %rdi,%rax\n" + /* rsi: Address of the current output character. */ + "movq %rsi,%rsi\n" + /* rdx: Current character. */ + ".loop:\n" + "movb (%rax),%dl\n" /* Move current the character into a register... */ + "movb %dl,(%rsi)\n" /* ... and then back into memory. */ + "testb %dl,%dl\n" /* Check if we have reached the null-terminator... */ + "jz .done\n" /* ... and if so, we are finished copying. */ + "incq %rax\n" /* Increment the positions. */ + "incq %rsi\n" + "jmp .loop\n" /* Restart the loop. */ + ".done:\n" + "subq %rdi,%rax\n" /* Get the length of the string we copyied. */ + "decq %rdi\n" /* We do not count the null-terminator in the string length. */ + "ret\n" +#elif defined(sus_arch_ia32) + /* eax: Address of the current input character. */ + "movl 0x4(%esp),%eax\n" + /* ecx: Address of the current output character. */ + "movl 0x8(%esp),%ecx\n" + /* edx: Current character. */ + ".loop:\n" + "movb (%eax),%dl\n" /* Move current the character into a register... */ + "movb %dl,(%ecx)\n" /* ... and then back into memory. */ + "testb %dl,%dl\n" /* Check if we have reached the null-terminator... */ + "jz .done\n" /* ... and if so, we are finished copying. */ + "incl %eax\n" /* Increment the positions. */ + "incl %ecx\n" + "jmp .loop\n" /* Restart the loop. */ + ".done:\n" + "subl 0x4(%esp),%eax\n" /* Get the length of the string we copyied. */ + "decl %ecx \n" /* We do not count the null-terminator in the string length. */ + "ret\n" +#endif +); +#else +sus_typ_usz rgo_strcpy(char const * const sus_restr _in,char * const sus_restr _out) { + char const * sus_restr inpos = _in; + char * sus_restr outpos = _out; + for (;;++inpos,++outpos) { + char const chr = *inpos; + *outpos = chr; + if (chr == '\x0') {return (sus_typ_usz)(inpos - _in);} + } + sus_unreach(); +} +#endif diff --git a/rgo/src/strdup.c b/rgo/src/strdup.c index 5b8cc97..7937658 100644 --- a/rgo/src/strdup.c +++ b/rgo/src/strdup.c @@ -4,8 +4,8 @@ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -#include <rgo.h> +#include <rgo-priv.h> #include <stdlib.h> -char * rgo_strdup(char const * const __restrict__ _str) {return rgo_memdup(_str,rgo_strlen(_str) + (size_t)0x1);} +char * rgo_strdup(char const * const __restrict__ _str) {return rgo_memdup(_str,rgo_strlen(_str) + (sus_typ_usz)0x1);} diff --git a/rgo/src/streq.S b/rgo/src/streq.S deleted file mode 100644 index 2df763c..0000000 --- a/rgo/src/streq.S +++ /dev/null @@ -1,39 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#include <rgo.h> - -.global rgo_streq - -rgo_streq: - /* - char const * lstr - char const * rstr - */ -#if defined(__x86_64__) - /* rax: Address of the current input character. */ - movq %rdi,%rax - /* rsi: Address of the current output character. */ - movq %rsi,%rsi - /* rdx: Current input character. */ - /* rcx: Current output character. */ -.loop: - movb (%rax),%dl - movb (%rsi),%cl - cmpb %dl,%cl - jne .neq - testb %dl,%dl /* Check if we have reached the null-terminator. */ - jz .eq - incq %rax - incq %rsi - jmp .loop -.eq: - mov $0x1,%rax - ret -.neq: - mov $0x0,%rax - ret -#endif diff --git a/rgo/src/streq.c b/rgo/src/streq.c new file mode 100644 index 0000000..7cf2f21 --- /dev/null +++ b/rgo/src/streq.c @@ -0,0 +1,78 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <rgo-priv.h> + +#if defined(rgo_priv_fastimpl) +__asm__ ( + ".global rgo_streq\n" + + "rgo_streq:\n" + /* + char const * lstr + char const * rstr + */ +#if defined(sus_arch_amd64) + /* rax: Address of the current input character. */ + "movq %rdi,%rax\n" + /* rsi: Address of the current output character. */ + "movq %rsi,%rsi\n" + /* rdx: Current input character. */ + /* rcx: Current output character. */ + ".loop:\n" + "movb (%rax),%dl\n" /* Move the characters into registers. */ + "movb (%rsi),%cl\n" + "cmpb %dl,%cl\n" /* Check if the characters are equal... */ + "jne .neq\n" /* ... indicate inequality if they are not. */ + "testb %dl,%dl\n" /* Check if we have reached the null-terminator... */ + "jz .eq\n" /* ... indicate equality if we have. */ + "incq %rax\n" /* Increment positions. */ + "incq %rsi\n" + "jmp .loop\n" /* Restart loop. */ + ".eq:\n" /* Indicate equality. */ + "movb $0x1,%al\n" + "ret\n" + ".neq:\n" /* Indicate inequality. */ + "movb $0x0,%al\n" + "ret\n" +#elif defined(sus_arch_ia32) + /* eax: Address of the current input character. */ + "movl 0x4(%esp),%eax\n" + /* ecx: Address of the current output character. */ + "movl 0x8(%esp),%ecx\n" + /* edx: Current input character. */ + /* edx: Current output character. */ + ".loop:\n" + "movb (%eax),%dl\n" /* Move the characters into registers. */ + "movb (%ecx),%dh\n" + "cmpb %dl,%dh\n" /* Check if the characters are equal... */ + "jne .neq\n" /* ... indicate inequality if they are not. */ + "testb %dl,%dl\n" /* Check if we have reached the null-terminator... */ + "jz .eq\n" /* ... indicate equality if we have. */ + "incl %eax\n" /* Increment positions. */ + "incl %ecx\n" + "jmp .loop\n" /* Restart loop. */ + ".eq:\n" /* Indicate equality. */ + "movb $0x1,%al\n" + "ret\n" + ".neq:\n" /* Indicate inequality. */ + "movb $0x0,%al\n" + "ret\n" +#endif +); +#else +sus_typ_u8 rgo_streq(char const * const sus_restr _lstr,char const * const sus_restr _rstr) { + char const * sus_restr lpos = _lstr; + char const * sus_restr rpos = _rstr; + for (;;++lpos,++rpos) { + char const lchr = *lpos; + char const rchr = *rpos; + sus_likely (lchr != rchr) {return sus_typlit_u8(0x0);} + if (lchr == '\x0') {return sus_typlit_u8(0x1);} + } + sus_unreach(); +} +#endif diff --git a/rgo/src/strfill.c b/rgo/src/strfill.c index 0ba25c7..5d6270a 100644 --- a/rgo/src/strfill.c +++ b/rgo/src/strfill.c @@ -4,8 +4,8 @@ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -#include <rgo.h> +#include <rgo-priv.h> #include <stdint.h> -void rgo_strfill(char const * const __restrict__ _str,char const _chr) {rgo_memfill(_str,rgo_strlen(_str),(uint8_t)_chr);} +void rgo_strfill(char * const __restrict__ _str,char const _chr) {rgo_memfill(_str,rgo_strlen(_str),(sus_typ_u8)_chr);} diff --git a/rgo/src/strlen.S b/rgo/src/strlen.S deleted file mode 100644 index 19cb806..0000000 --- a/rgo/src/strlen.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - Copyright 2022 Gabriel Jensen. - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. -*/ - -#include <rgo.h> - -.global rgo_strlen - -rgo_strlen: - /* - char const * str - */ -#if defined(__i386__) - /* eax: Address of the current character. */ - movl 0x4(%esp),%eax - /* ecx: Address of the first character. */ - movl %eax,%ecx - /* edx: Current character. */ -.loop: - movb (%eax),%dl - testb %dl,%dl - jz .done /* Exit loop if we have reached the null-terminator. */ - incl %eax /* Continue to the next character. */ - jmp .loop -.done: - subl %ecx,%eax - ret -#elif defined(__x86_64__) - /* rax: Address of the current character. */ - movq %rdi,%rax - /* rdx: Current character. */ -.loop: - movb (%rax),%dl - testb %dl,%dl - jz .done /* Exit loop if we have reached the null-terminator. */ - incq %rax /* Continue to the next character. */ - jmp .loop -.done: - subq %rdi,%rax - ret -#endif diff --git a/rgo/src/strlen.c b/rgo/src/strlen.c new file mode 100644 index 0000000..6ddbf8b --- /dev/null +++ b/rgo/src/strlen.c @@ -0,0 +1,54 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <rgo-priv.h> + +#if defined(rgo_priv_fastimpl) +__asm__ ( + ".global rgo_strlen\n" + + "rgo_strlen:\n" + /* + char const * str + */ +#if defined(sus_arch_amd64) + /* rax: Address of the current character. */ + "movq %rdi,%rax\n" + /* rdx: Current character. */ + ".loop:\n" + "movb (%rax),%dl\n" + "testb %dl,%dl\n" + "jz .done\n" /* Exit loop if we have reached the null-terminator. */ + "incq %rax\n" /* Continue to the next character. */ + "jmp .loop\n" + ".done:\n" + "subq %rdi,%rax\n" + "ret\n" +#elif defined(sus_arch_ia32) + /* eax: Address of the current character. */ + "movl 0x4(%esp),%eax\n" + /* ecx: Current character. */ + ".loop:\n" + "movb (%eax),%cl\n" + "testb %cl,%cl\n" + "jz .done\n" /* Exit loop if we have reached the null-terminator. */ + "incl %eax\n" /* Continue to the next character. */ + "jmp .loop\n" + ".done:\n" + "subl 0x4(%esp),%eax\n" + "ret\n" +#endif +); +#else +sus_typ_usz rgo_strlen(char const * const sus_restr _str) { + char const * sus_restr pos = _str; + for (;;++pos) { + char const chr = *pos; + sus_unlikely (chr == '\x0') {return (sus_typ_usz)(pos - _str);} + } + sus_unreach(); +} +#endif @@ -1,4 +1,4 @@ -/* Compile with: cc test.c -Irgo/include -Lrgo -lrgo -otest */ +/* cc test.c -Irgo/include -L. -lrgo -otest */ #include <assert.h> #include <inttypes.h> @@ -8,6 +8,9 @@ #include <stdio.h> int main(void) { + fprintf(stderr,"rgo test\n"); + fprintf(stderr,"arch: %s\n",sus_archstr); + fprintf(stderr,"fast: %s\n",rgo_fastimpl() ? "yes" : "no"); fprintf(stderr,"\n"); { #undef arrsz @@ -59,7 +62,7 @@ int main(void) { assert(arr1[(size_t)0x4] == arr0[(size_t)0x4]); assert(arr1[(size_t)0x5] == arr0[(size_t)0x5]); assert(arr1[(size_t)0x6] == arr0[(size_t)0x6]); - uint8_t const eq = rgo_memeq(arr1,arrsz,arr0); + sus_typ_u8 const eq = rgo_memeq(arr1,arrsz,arr0); fprintf(stderr,"eq: %u\n",eq); assert(eq); #undef arrsz @@ -79,7 +82,7 @@ int main(void) { size_t len = rgo_strlen(str); fprintf(stderr,"len: %zX\n",len); size_t pos0 = rgo_fndchr(str,' '); - size_t pos1 = rgo_fndbyte(str,len,(uint8_t)' '); + size_t pos1 = rgo_fndbyte(str,len,(sus_typ_u8)' '); fprintf(stderr,"pos0: %zX\n",pos0); fprintf(stderr,"pos1: %zX\n",pos1); assert(pos0 == (size_t)0x2); @@ -87,7 +90,7 @@ int main(void) { str += pos0 + (size_t)0x1; len = rgo_strlen(str); pos0 = rgo_fndchr(str,' '); - pos1 = rgo_fndbyte(str,len,(uint8_t)' '); + pos1 = rgo_fndbyte(str,len,(sus_typ_u8)' '); fprintf(stderr,"pos0: %zX\n",pos0); fprintf(stderr,"pos1: %zX\n",pos1); assert(pos0 == (size_t)0x2); @@ -95,14 +98,13 @@ int main(void) { str += pos0 + (size_t)0x1; len = rgo_strlen(str); pos0 = rgo_fndchr(str,' '); - pos1 = rgo_fndbyte(str,len,(uint8_t)' '); + pos1 = rgo_fndbyte(str,len,(sus_typ_u8)' '); fprintf(stderr,"pos0: %zX\n",pos0); fprintf(stderr,"pos1: %zX\n",pos1); assert(pos0 == (size_t)-0x1); assert(pos1 == pos0); } fprintf(stderr,"\n"); -#if defined(__x86_64__) { char const str0[] = "What's up, my guy?"; fprintf(stderr,"str0: \"%s\"\n",str0); @@ -110,9 +112,9 @@ int main(void) { fprintf(stderr,"str1: \"%s\"\n",str1); char const str2[] = "I don't know you!"; fprintf(stderr,"str2: \"%s\"\n",str2); - uint8_t const cmp0 = rgo_streq(str0,str1); - uint8_t const cmp1 = rgo_streq(str0,str2); - uint8_t const cmp2 = rgo_streq(str1,str2); + sus_typ_u8 const cmp0 = rgo_streq(str0,str1); + sus_typ_u8 const cmp1 = rgo_streq(str0,str2); + sus_typ_u8 const cmp2 = rgo_streq(str1,str2); fprintf(stderr,"cmp0: %u\n",cmp0); fprintf(stderr,"cmp1: %u\n",cmp1); fprintf(stderr,"cmp2: %u\n",cmp2); @@ -122,14 +124,13 @@ int main(void) { } fprintf(stderr,"\n"); { - char const str0[] = "What in the world?"; + char const str0[] = "What in the world are you doing?"; fprintf(stderr,"str0: \"%s\"\n",str0); - char const str1[sizeof (str0)]; - assert(rgo_strcpy(str0,str1) == (size_t)0x12); + char str1[sizeof (str0)]; + assert(rgo_strcpy(str0,str1) == (size_t)0x20); fprintf(stderr,"str1: \"%s\"\n",str1); assert(rgo_streq(str0,str1)); } fprintf(stderr,"\n"); -#endif printf("All tests have passed!\n"); } |