diff options
Diffstat (limited to 'zap/src')
-rw-r--r-- | zap/src/abs.c | 96 | ||||
-rw-r--r-- | zap/src/fastimpl.c | 6 | ||||
-rw-r--r-- | zap/src/fma.c | 171 | ||||
-rw-r--r-- | zap/src/fndbyte.c | 19 | ||||
-rw-r--r-- | zap/src/fndchr.c | 4 | ||||
-rw-r--r-- | zap/src/foreach.c | 10 | ||||
-rw-r--r-- | zap/src/memcmp.c | 8 | ||||
-rw-r--r-- | zap/src/memcpy.c | 10 | ||||
-rw-r--r-- | zap/src/memdup.c | 2 | ||||
-rw-r--r-- | zap/src/memeq.c | 10 | ||||
-rw-r--r-- | zap/src/memfill.c | 16 | ||||
-rw-r--r-- | zap/src/strcmp.c | 4 | ||||
-rw-r--r-- | zap/src/strcpy.c | 6 | ||||
-rw-r--r-- | zap/src/strdup.c | 2 | ||||
-rw-r--r-- | zap/src/streq.c | 4 | ||||
-rw-r--r-- | zap/src/strfill.c | 4 | ||||
-rw-r--r-- | zap/src/strlen.c | 4 |
17 files changed, 334 insertions, 42 deletions
diff --git a/zap/src/abs.c b/zap/src/abs.c new file mode 100644 index 0000000..8fe97e6 --- /dev/null +++ b/zap/src/abs.c @@ -0,0 +1,96 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <zap/priv.h> + +#include <zap/math.h> + +#include <stdint.h> + +#if zap_priv_fastimpl +__asm__ ( + ".globl zap_abs_c\n" + ".globl zap_abs_i\n" + ".globl zap_abs_l\n" + ".globl zap_abs_ll\n" + ".globl zap_abs_s\n" + + "zap_abs_c:\n" + /* + signed char val + */ +#if defined(sus_arch_amd64) || defined(sus_arch_ia32) + "movb %dil,%al\n" + "sarb $0x3F,%al\n" + "xorb %al,%dil\n" + "subb %al,%dil\n" + "movb %dil,%al\n" + "ret\n" +#endif + + "zap_abs_i:\n" + /* + int val + */ +#if defined(sus_arch_amd64) || defined(sus_arch_ia32) + "movl %edi,%eax\n" + "sarl $0x3F,%eax\n" + "xorl %eax,%edi\n" + "subl %eax,%edi\n" + "movl %edi,%eax\n" + "ret\n" +#endif + + "zap_abs_l:\n" + /* + long val + */ +#if defined(sus_arch_amd64) + "movq %rdi,%rax\n" + "sarq $0x3F,%rax\n" + "xorq %rax,%rdi\n" + "subq %rax,%rdi\n" + "movq %rdi,%rax\n" + "ret\n" +#endif + + "zap_abs_ll:\n" + /* + long long val + */ +#if defined(sus_arch_amd64) + "movq %rdi,%rax\n" + "sarq $0x3F,%rax\n" + "xorq %rax,%rdi\n" + "subq %rax,%rdi\n" + "movq %rdi,%rax\n" + "ret\n" +#endif + + "zap_abs_s:\n" + /* + short val + */ +#if defined(sus_arch_amd64) || defined(sus_arch_ia32) + "movw %di,%ax\n" + "sarw $0x3F,%ax\n" + "xorw %ax,%di\n" + "subw %ax,%di\n" + "movw %di,%ax\n" + "ret\n" +#endif +); +#else +#define zap_local_abs(_typ,_sufx) \ + _typ zap_abs_ ## _sufx (_typ const _val) {return _val > (_typ)0x0 ? _val : -_val;} + +zap_local_abs(signed char,c) +zap_local_abs(int,i) +zap_local_abs(long,l) +zap_local_abs(long long,ll) +zap_local_abs(short,s) + +#endif diff --git a/zap/src/fastimpl.c b/zap/src/fastimpl.c index 71ded00..2541a41 100644 --- a/zap/src/fastimpl.c +++ b/zap/src/fastimpl.c @@ -9,8 +9,4 @@ #include <stdbool.h> #include <stdint.h> -#if defined(zap_priv_fastimpl) -bool const zap_fastimpl = true; -#else -bool const zap_fastimpl = false; -#endif +bool const zap_fastimpl = zap_priv_fastimpl; diff --git a/zap/src/fma.c b/zap/src/fma.c new file mode 100644 index 0000000..b2f45ad --- /dev/null +++ b/zap/src/fma.c @@ -0,0 +1,171 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <zap/priv.h> + +#include <zap/math.h> + +#include <stdint.h> + +#if zap_priv_fastimpl +__asm__ ( + ".globl zap_fma_c\n" + ".globl zap_fma_i\n" + ".globl zap_fma_l\n" + ".globl zap_fma_ll\n" + ".globl zap_fma_s\n" + ".globl zap_fma_uc\n" + ".globl zap_fma_ui\n" + ".globl zap_fma_ul\n" + ".globl zap_fma_ull\n" + ".globl zap_fma_us\n" + + "zap_fma_c:\n" + /* + signed char a + signed char b + signed char c + */ +#if defined(sus_arch_amd64) + "movb %sil,%al\n" + "imulb %dl\n" + "addb %dil,%al\n" + "ret\n" +#endif + + "zap_fma_i:\n" + /* + int a + int b + int c + */ +#if defined(sus_arch_amd64) + "movl %edx,%eax\n" + "imull %esi\n" + "addl %edi,%eax\n" + "ret\n" +#endif + + "zap_fma_l:\n" + /* + long a + long b + long c + */ +#if defined(sus_arch_amd64) + "movq %rdx,%rax\n" + "imulq %rsi\n" + "addq %rdi,%rax\n" + "ret\n" +#endif + + "zap_fma_ll:\n" + /* + long long a + long long b + long long c + */ +#if defined(sus_arch_amd64) + "movq %rdx,%rax\n" + "imulq %rsi\n" + "addq %rdi,%rax\n" + "ret\n" +#endif + + "zap_fma_s:\n" + /* + short a + short b + short c + */ +#if defined(sus_arch_amd64) + "movw %dx,%ax\n" + "imulw %si\n" + "addw %di,%ax\n" + "ret\n" +#endif + + "zap_fma_uc:\n" + /* + unsigned char a + unsigned char b + unsigned char c + */ +#if defined(sus_arch_amd64) + "movb %sil,%al\n" /* mulb uses ax instead of al:dl (like the other variants), so we don't need to worry about it overwritting dl. */ + "mulb %dl\n" + "addb %dil,%al\n" + "ret\n" +#endif + + "zap_fma_ui:\n" + /* + unsigned int a + unsigned int b + unsigned int c + */ +#if defined(sus_arch_amd64) + "movl %edx,%eax\n" + "mull %esi\n" + "addl %edi,%eax\n" + "ret\n" +#endif + + "zap_fma_ul:\n" + /* + unsigned long a + unsigned long b + unsigned long c + */ +#if defined(sus_arch_amd64) + "movq %rdx,%rax\n" + "mulq %rsi\n" + "addq %rdi,%rax\n" + "ret\n" +#endif + + "zap_fma_ull:\n" + /* + unsigned long long a + unsigned long long b + unsigned long long c + */ +#if defined(sus_arch_amd64) + "movq %rdx,%rax\n" /* rdx get overwritten by mulq, so might as well just make it the first operand (in multiplication, the order is meaningless). */ + "mulq %rsi\n" + "addq %rdi,%rax\n" + "ret\n" +#endif + + "zap_fma_us:\n" + /* + unsigned short a + unsigned short b + unsigned short c + */ +#if defined(sus_arch_amd64) + "movw %dx,%ax\n" + "mulw %si\n" + "addw %di,%ax\n" + "ret\n" +#endif +); +#else +#define zap_local_fma(_typ,_sufx) \ + _typ zap_fma_ ## _sufx (_typ const _a,_typ const _b,_typ const _c) {return _a + _b * _c;} + +zap_local_fma(signed char,c) +zap_local_fma(int,i) +zap_local_fma(long,l) +zap_local_fma(long long,ll) +zap_local_fma(short,s) +zap_local_fma(unsigned char,uc) +zap_local_fma(unsigned int,ui) +zap_local_fma(unsigned long,ul) +zap_local_fma(unsigned long long,ull) +zap_local_fma(unsigned short,us) + +#endif diff --git a/zap/src/fndbyte.c b/zap/src/fndbyte.c index 3283eec..e6e6070 100644 --- a/zap/src/fndbyte.c +++ b/zap/src/fndbyte.c @@ -6,18 +6,20 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stddef.h> #include <stdint.h> -#if defined(zap_priv_fastimpl) +#if zap_priv_fastimpl __asm__ ( ".globl zap_fndbyte\n" "zap_fndbyte:\n" /* - void const * ptr - size_t num - uint_least8_t byte + void const * pos + size_t num + unsigned char byte */ #if defined(sus_arch_amd64) /* rax: Address of the current element. */ @@ -68,10 +70,11 @@ __asm__ ( #endif ); #else -size_t zap_fndbyte(void const * const _ptr,size_t const _num,uint_least8_t const _byte) { - uint_least8_t const * ptr = (uint_least8_t const *)_ptr; - uint_least8_t const * const afterbuf = ptr + _num; - for (;ptr != afterbuf;++ptr) {sus_unlikely (*ptr == _byte) {return ptr - (uint_least8_t const *)_ptr;}} +size_t zap_fndbyte(void const * const _ptr,size_t const _num,unsigned char const _byte) { + unsigned char const * startpos = _ptr; + unsigned char const * pos = startpos; + unsigned char const * const afterbuf = pos + _num; + for (;pos != afterbuf;++pos) {sus_unlikely (*pos == _byte) {return pos - startpos;}} return SIZE_MAX; } #endif diff --git a/zap/src/fndchr.c b/zap/src/fndchr.c index 5cf78f8..fc4eb2b 100644 --- a/zap/src/fndchr.c +++ b/zap/src/fndchr.c @@ -6,10 +6,12 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stddef.h> #include <stdint.h> -#if defined(zap_priv_fastimpl) +#if zap_priv_fastimpl __asm__ ( ".globl zap_fndchr\n" diff --git a/zap/src/foreach.c b/zap/src/foreach.c index 54911e9..5e281d4 100644 --- a/zap/src/foreach.c +++ b/zap/src/foreach.c @@ -6,11 +6,13 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stddef.h> void zap_foreach(void * const _ptr,size_t const _sz,size_t const _num,void (* const _fn)(void *)) { - unsigned char * ptr = _ptr; - size_t const numbyte = _sz * _num; - void * const afterbuf = ptr + numbyte; - for (;ptr != afterbuf;ptr += _sz) {_fn(ptr);} + unsigned char * pos = _ptr; + size_t const numbyte = _sz * _num; + unsigned char * const afterbuf = pos + numbyte; + for (;pos != afterbuf;pos += _sz) {_fn(pos);} } diff --git a/zap/src/memcmp.c b/zap/src/memcmp.c index 31e5161..0fdf13a 100644 --- a/zap/src/memcmp.c +++ b/zap/src/memcmp.c @@ -6,16 +6,18 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stddef.h> int_least8_t zap_memcmp(void const * const _lstr,size_t const _num,void const * const _rstr) { - unsigned char const * lpos = (unsigned char const *)_lstr; - unsigned char const * rpos = (unsigned char const *)_rstr; + unsigned char const * lpos = _lstr; + unsigned char const * rpos = _rstr; unsigned char const * const afterlbuf = lpos + _num; for (;lpos != afterlbuf;++lpos,++rpos) { unsigned char const lbyte = *lpos; unsigned char const rbyte = *rpos; - sus_likely (lbyte != rbyte) {return lbyte < rbyte ? INT8_MIN : INT8_MAX;} + sus_likely (lbyte != rbyte) {return lbyte < rbyte ? INT_LEAST8_MIN : INT_LEAST8_MAX;} } return 0x0; } diff --git a/zap/src/memcpy.c b/zap/src/memcpy.c index 8fa98ae..ae923c3 100644 --- a/zap/src/memcpy.c +++ b/zap/src/memcpy.c @@ -6,10 +6,12 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stddef.h> #include <stdint.h> -#if defined(zap_priv_fastimpl) +#if zap_priv_fastimpl __asm__ ( ".globl zap_memcpy\n" @@ -133,9 +135,9 @@ __asm__ ( ); #else void zap_memcpy(void const * const _in,size_t const _num,void * const _out) { - uint_least8_t const * in = (uint_least8_t const *)_in; - uint_least8_t * out = (uint_least8_t *)_out; - uint_least8_t const * const afterbuf = in + _num; + unsigned char const * in = _in; + unsigned char * out = _out; + unsigned char const * const afterbuf = in + _num; for (;in != afterbuf;++in,++out) {*out = *in;} } #endif diff --git a/zap/src/memdup.c b/zap/src/memdup.c index 3670eb3..9b56314 100644 --- a/zap/src/memdup.c +++ b/zap/src/memdup.c @@ -6,6 +6,8 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stdlib.h> void * zap_memdup(sus_attr_unused void const * const _ptr,sus_attr_unused size_t const _num) { diff --git a/zap/src/memeq.c b/zap/src/memeq.c index 75ecc12..7dce213 100644 --- a/zap/src/memeq.c +++ b/zap/src/memeq.c @@ -6,11 +6,13 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stdbool.h> #include <stddef.h> #include <stdint.h> -#if defined(zap_priv_fastimpl) +#if zap_priv_fastimpl __asm__ ( ".globl zap_memeq\n" @@ -101,9 +103,9 @@ __asm__ ( ); #else bool zap_memeq(void const * const _lptr,size_t const _num,void const * const _rptr) { - uint_least8_t const * lpos = (uint_least8_t const *)_lptr; - uint_least8_t const * rpos = (uint_least8_t const *)_rptr; - uint_least8_t const * const afterbuf = lpos + _num; + unsigned char const * lpos = _lptr; + unsigned char const * rpos = _rptr; + unsigned char const * const afterbuf = lpos + _num; for (;lpos != afterbuf;++lpos,++rpos) {sus_likely (*lpos != *rpos) {return false;}} return true; } diff --git a/zap/src/memfill.c b/zap/src/memfill.c index c9a9797..1aebd29 100644 --- a/zap/src/memfill.c +++ b/zap/src/memfill.c @@ -6,18 +6,20 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stddef.h> #include <stdint.h> -#if defined(zap_priv_fastimpl) +#if zap_priv_fastimpl __asm__ ( ".globl zap_memfill\n" "zap_memfill:\n" /* - void const * ptr - size_t num - uint_least8_t val + void const * ptr + size_t num + unsigned char val */ #if defined(sus_arch_amd64) /* rdi: Address of the current element. */ @@ -50,9 +52,9 @@ __asm__ ( #endif ); #else -void zap_memfill(void * const _ptr,size_t const _num,uint_least8_t const _byte) { - uint_least8_t * pos = (uint_least8_t *)_ptr; - uint_least8_t * const afterbuf = pos + _num; +void zap_memfill(void * const _ptr,size_t const _num,unsigned char const _byte) { + unsigned char * pos = _ptr; + unsigned char * const afterbuf = pos + _num; for (;pos != afterbuf;++pos) {*pos = _byte;} } #endif diff --git a/zap/src/strcmp.c b/zap/src/strcmp.c index 101f7dc..0ed0a59 100644 --- a/zap/src/strcmp.c +++ b/zap/src/strcmp.c @@ -6,6 +6,8 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stdint.h> int_least8_t zap_strcmp(char const * const _lstr,char const * const _rstr) { @@ -14,7 +16,7 @@ int_least8_t zap_strcmp(char const * const _lstr,char const * const _rstr) { for (;;++lpos,++rpos) { unsigned char const lchr = *lpos; unsigned char const rchr = *rpos; - sus_likely (lchr != rchr) {return lchr < rchr ? INT8_MIN : INT8_MAX;} + sus_likely (lchr != rchr) {return lchr < rchr ? INT_LEAST8_MIN : INT_LEAST8_MAX;} sus_unlikely (lchr == (unsigned char)0x0) {return 0x0;} } sus_unreach(); diff --git a/zap/src/strcpy.c b/zap/src/strcpy.c index 943cb2c..616af7f 100644 --- a/zap/src/strcpy.c +++ b/zap/src/strcpy.c @@ -6,9 +6,11 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stddef.h> -#if defined(zap_priv_fastimpl) +#if zap_priv_fastimpl __asm__ ( ".globl zap_strcpy\n" @@ -62,7 +64,7 @@ size_t zap_strcpy(char const * const _in,char * const _out) { for (;;++inpos,++outpos) { char const chr = *inpos; *outpos = chr; - if (chr == '\x0') {return (size_t)(inpos - _in);} + if (chr == '\x0') {return inpos - _in;} } sus_unreach(); } diff --git a/zap/src/strdup.c b/zap/src/strdup.c index a7ab6e6..183a909 100644 --- a/zap/src/strdup.c +++ b/zap/src/strdup.c @@ -6,6 +6,8 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stdlib.h> char * zap_strdup(sus_attr_unused char const * const _str) { diff --git a/zap/src/streq.c b/zap/src/streq.c index 9221cec..1ff4420 100644 --- a/zap/src/streq.c +++ b/zap/src/streq.c @@ -6,10 +6,12 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stdbool.h> #include <stdint.h> -#if defined(zap_priv_fastimpl) +#if zap_priv_fastimpl __asm__ ( ".globl zap_streq\n" diff --git a/zap/src/strfill.c b/zap/src/strfill.c index a113094..bd0af33 100644 --- a/zap/src/strfill.c +++ b/zap/src/strfill.c @@ -6,6 +6,8 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stdint.h> -void zap_strfill(char * const _str,char const _chr) {zap_memfill(_str,zap_strlen(_str),(uint_least8_t)_chr);} +void zap_strfill(char * const _str,char const _chr) {zap_memfill(_str,zap_strlen(_str),(unsigned char)_chr);} diff --git a/zap/src/strlen.c b/zap/src/strlen.c index eab12e6..84b7d28 100644 --- a/zap/src/strlen.c +++ b/zap/src/strlen.c @@ -6,9 +6,11 @@ #include <zap/priv.h> +#include <zap/mem.h> + #include <stddef.h> -#if defined(zap_priv_fastimpl) +#if zap_priv_fastimpl __asm__ ( ".globl zap_strlen\n" |