summaryrefslogtreecommitdiff
path: root/zap/src
diff options
context:
space:
mode:
Diffstat (limited to 'zap/src')
-rw-r--r--zap/src/abs.c96
-rw-r--r--zap/src/fastimpl.c6
-rw-r--r--zap/src/fma.c171
-rw-r--r--zap/src/fndbyte.c19
-rw-r--r--zap/src/fndchr.c4
-rw-r--r--zap/src/foreach.c10
-rw-r--r--zap/src/memcmp.c8
-rw-r--r--zap/src/memcpy.c10
-rw-r--r--zap/src/memdup.c2
-rw-r--r--zap/src/memeq.c10
-rw-r--r--zap/src/memfill.c16
-rw-r--r--zap/src/strcmp.c4
-rw-r--r--zap/src/strcpy.c6
-rw-r--r--zap/src/strdup.c2
-rw-r--r--zap/src/streq.c4
-rw-r--r--zap/src/strfill.c4
-rw-r--r--zap/src/strlen.c4
17 files changed, 334 insertions, 42 deletions
diff --git a/zap/src/abs.c b/zap/src/abs.c
new file mode 100644
index 0000000..8fe97e6
--- /dev/null
+++ b/zap/src/abs.c
@@ -0,0 +1,96 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <zap/priv.h>
+
+#include <zap/math.h>
+
+#include <stdint.h>
+
+#if zap_priv_fastimpl
+__asm__ (
+ ".globl zap_abs_c\n"
+ ".globl zap_abs_i\n"
+ ".globl zap_abs_l\n"
+ ".globl zap_abs_ll\n"
+ ".globl zap_abs_s\n"
+
+ "zap_abs_c:\n"
+ /*
+ signed char val
+ */
+#if defined(sus_arch_amd64) || defined(sus_arch_ia32)
+ "movb %dil,%al\n"
+ "sarb $0x3F,%al\n"
+ "xorb %al,%dil\n"
+ "subb %al,%dil\n"
+ "movb %dil,%al\n"
+ "ret\n"
+#endif
+
+ "zap_abs_i:\n"
+ /*
+ int val
+ */
+#if defined(sus_arch_amd64) || defined(sus_arch_ia32)
+ "movl %edi,%eax\n"
+ "sarl $0x3F,%eax\n"
+ "xorl %eax,%edi\n"
+ "subl %eax,%edi\n"
+ "movl %edi,%eax\n"
+ "ret\n"
+#endif
+
+ "zap_abs_l:\n"
+ /*
+ long val
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdi,%rax\n"
+ "sarq $0x3F,%rax\n"
+ "xorq %rax,%rdi\n"
+ "subq %rax,%rdi\n"
+ "movq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_abs_ll:\n"
+ /*
+ long long val
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdi,%rax\n"
+ "sarq $0x3F,%rax\n"
+ "xorq %rax,%rdi\n"
+ "subq %rax,%rdi\n"
+ "movq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_abs_s:\n"
+ /*
+ short val
+ */
+#if defined(sus_arch_amd64) || defined(sus_arch_ia32)
+ "movw %di,%ax\n"
+ "sarw $0x3F,%ax\n"
+ "xorw %ax,%di\n"
+ "subw %ax,%di\n"
+ "movw %di,%ax\n"
+ "ret\n"
+#endif
+);
+#else
+#define zap_local_abs(_typ,_sufx) \
+ _typ zap_abs_ ## _sufx (_typ const _val) {return _val > (_typ)0x0 ? _val : -_val;}
+
+zap_local_abs(signed char,c)
+zap_local_abs(int,i)
+zap_local_abs(long,l)
+zap_local_abs(long long,ll)
+zap_local_abs(short,s)
+
+#endif
diff --git a/zap/src/fastimpl.c b/zap/src/fastimpl.c
index 71ded00..2541a41 100644
--- a/zap/src/fastimpl.c
+++ b/zap/src/fastimpl.c
@@ -9,8 +9,4 @@
#include <stdbool.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
-bool const zap_fastimpl = true;
-#else
-bool const zap_fastimpl = false;
-#endif
+bool const zap_fastimpl = zap_priv_fastimpl;
diff --git a/zap/src/fma.c b/zap/src/fma.c
new file mode 100644
index 0000000..b2f45ad
--- /dev/null
+++ b/zap/src/fma.c
@@ -0,0 +1,171 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <zap/priv.h>
+
+#include <zap/math.h>
+
+#include <stdint.h>
+
+#if zap_priv_fastimpl
+__asm__ (
+ ".globl zap_fma_c\n"
+ ".globl zap_fma_i\n"
+ ".globl zap_fma_l\n"
+ ".globl zap_fma_ll\n"
+ ".globl zap_fma_s\n"
+ ".globl zap_fma_uc\n"
+ ".globl zap_fma_ui\n"
+ ".globl zap_fma_ul\n"
+ ".globl zap_fma_ull\n"
+ ".globl zap_fma_us\n"
+
+ "zap_fma_c:\n"
+ /*
+ signed char a
+ signed char b
+ signed char c
+ */
+#if defined(sus_arch_amd64)
+ "movb %sil,%al\n"
+ "imulb %dl\n"
+ "addb %dil,%al\n"
+ "ret\n"
+#endif
+
+ "zap_fma_i:\n"
+ /*
+ int a
+ int b
+ int c
+ */
+#if defined(sus_arch_amd64)
+ "movl %edx,%eax\n"
+ "imull %esi\n"
+ "addl %edi,%eax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_l:\n"
+ /*
+ long a
+ long b
+ long c
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdx,%rax\n"
+ "imulq %rsi\n"
+ "addq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_ll:\n"
+ /*
+ long long a
+ long long b
+ long long c
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdx,%rax\n"
+ "imulq %rsi\n"
+ "addq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_s:\n"
+ /*
+ short a
+ short b
+ short c
+ */
+#if defined(sus_arch_amd64)
+ "movw %dx,%ax\n"
+ "imulw %si\n"
+ "addw %di,%ax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_uc:\n"
+ /*
+ unsigned char a
+ unsigned char b
+ unsigned char c
+ */
+#if defined(sus_arch_amd64)
+ "movb %sil,%al\n" /* mulb uses ax instead of al:dl (like the other variants), so we don't need to worry about it overwritting dl. */
+ "mulb %dl\n"
+ "addb %dil,%al\n"
+ "ret\n"
+#endif
+
+ "zap_fma_ui:\n"
+ /*
+ unsigned int a
+ unsigned int b
+ unsigned int c
+ */
+#if defined(sus_arch_amd64)
+ "movl %edx,%eax\n"
+ "mull %esi\n"
+ "addl %edi,%eax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_ul:\n"
+ /*
+ unsigned long a
+ unsigned long b
+ unsigned long c
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdx,%rax\n"
+ "mulq %rsi\n"
+ "addq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_ull:\n"
+ /*
+ unsigned long long a
+ unsigned long long b
+ unsigned long long c
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdx,%rax\n" /* rdx get overwritten by mulq, so might as well just make it the first operand (in multiplication, the order is meaningless). */
+ "mulq %rsi\n"
+ "addq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_us:\n"
+ /*
+ unsigned short a
+ unsigned short b
+ unsigned short c
+ */
+#if defined(sus_arch_amd64)
+ "movw %dx,%ax\n"
+ "mulw %si\n"
+ "addw %di,%ax\n"
+ "ret\n"
+#endif
+);
+#else
+#define zap_local_fma(_typ,_sufx) \
+ _typ zap_fma_ ## _sufx (_typ const _a,_typ const _b,_typ const _c) {return _a + _b * _c;}
+
+zap_local_fma(signed char,c)
+zap_local_fma(int,i)
+zap_local_fma(long,l)
+zap_local_fma(long long,ll)
+zap_local_fma(short,s)
+zap_local_fma(unsigned char,uc)
+zap_local_fma(unsigned int,ui)
+zap_local_fma(unsigned long,ul)
+zap_local_fma(unsigned long long,ull)
+zap_local_fma(unsigned short,us)
+
+#endif
diff --git a/zap/src/fndbyte.c b/zap/src/fndbyte.c
index 3283eec..e6e6070 100644
--- a/zap/src/fndbyte.c
+++ b/zap/src/fndbyte.c
@@ -6,18 +6,20 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_fndbyte\n"
"zap_fndbyte:\n"
/*
- void const * ptr
- size_t num
- uint_least8_t byte
+ void const * pos
+ size_t num
+ unsigned char byte
*/
#if defined(sus_arch_amd64)
/* rax: Address of the current element. */
@@ -68,10 +70,11 @@ __asm__ (
#endif
);
#else
-size_t zap_fndbyte(void const * const _ptr,size_t const _num,uint_least8_t const _byte) {
- uint_least8_t const * ptr = (uint_least8_t const *)_ptr;
- uint_least8_t const * const afterbuf = ptr + _num;
- for (;ptr != afterbuf;++ptr) {sus_unlikely (*ptr == _byte) {return ptr - (uint_least8_t const *)_ptr;}}
+size_t zap_fndbyte(void const * const _ptr,size_t const _num,unsigned char const _byte) {
+ unsigned char const * startpos = _ptr;
+ unsigned char const * pos = startpos;
+ unsigned char const * const afterbuf = pos + _num;
+ for (;pos != afterbuf;++pos) {sus_unlikely (*pos == _byte) {return pos - startpos;}}
return SIZE_MAX;
}
#endif
diff --git a/zap/src/fndchr.c b/zap/src/fndchr.c
index 5cf78f8..fc4eb2b 100644
--- a/zap/src/fndchr.c
+++ b/zap/src/fndchr.c
@@ -6,10 +6,12 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_fndchr\n"
diff --git a/zap/src/foreach.c b/zap/src/foreach.c
index 54911e9..5e281d4 100644
--- a/zap/src/foreach.c
+++ b/zap/src/foreach.c
@@ -6,11 +6,13 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
void zap_foreach(void * const _ptr,size_t const _sz,size_t const _num,void (* const _fn)(void *)) {
- unsigned char * ptr = _ptr;
- size_t const numbyte = _sz * _num;
- void * const afterbuf = ptr + numbyte;
- for (;ptr != afterbuf;ptr += _sz) {_fn(ptr);}
+ unsigned char * pos = _ptr;
+ size_t const numbyte = _sz * _num;
+ unsigned char * const afterbuf = pos + numbyte;
+ for (;pos != afterbuf;pos += _sz) {_fn(pos);}
}
diff --git a/zap/src/memcmp.c b/zap/src/memcmp.c
index 31e5161..0fdf13a 100644
--- a/zap/src/memcmp.c
+++ b/zap/src/memcmp.c
@@ -6,16 +6,18 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
int_least8_t zap_memcmp(void const * const _lstr,size_t const _num,void const * const _rstr) {
- unsigned char const * lpos = (unsigned char const *)_lstr;
- unsigned char const * rpos = (unsigned char const *)_rstr;
+ unsigned char const * lpos = _lstr;
+ unsigned char const * rpos = _rstr;
unsigned char const * const afterlbuf = lpos + _num;
for (;lpos != afterlbuf;++lpos,++rpos) {
unsigned char const lbyte = *lpos;
unsigned char const rbyte = *rpos;
- sus_likely (lbyte != rbyte) {return lbyte < rbyte ? INT8_MIN : INT8_MAX;}
+ sus_likely (lbyte != rbyte) {return lbyte < rbyte ? INT_LEAST8_MIN : INT_LEAST8_MAX;}
}
return 0x0;
}
diff --git a/zap/src/memcpy.c b/zap/src/memcpy.c
index 8fa98ae..ae923c3 100644
--- a/zap/src/memcpy.c
+++ b/zap/src/memcpy.c
@@ -6,10 +6,12 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_memcpy\n"
@@ -133,9 +135,9 @@ __asm__ (
);
#else
void zap_memcpy(void const * const _in,size_t const _num,void * const _out) {
- uint_least8_t const * in = (uint_least8_t const *)_in;
- uint_least8_t * out = (uint_least8_t *)_out;
- uint_least8_t const * const afterbuf = in + _num;
+ unsigned char const * in = _in;
+ unsigned char * out = _out;
+ unsigned char const * const afterbuf = in + _num;
for (;in != afterbuf;++in,++out) {*out = *in;}
}
#endif
diff --git a/zap/src/memdup.c b/zap/src/memdup.c
index 3670eb3..9b56314 100644
--- a/zap/src/memdup.c
+++ b/zap/src/memdup.c
@@ -6,6 +6,8 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdlib.h>
void * zap_memdup(sus_attr_unused void const * const _ptr,sus_attr_unused size_t const _num) {
diff --git a/zap/src/memeq.c b/zap/src/memeq.c
index 75ecc12..7dce213 100644
--- a/zap/src/memeq.c
+++ b/zap/src/memeq.c
@@ -6,11 +6,13 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_memeq\n"
@@ -101,9 +103,9 @@ __asm__ (
);
#else
bool zap_memeq(void const * const _lptr,size_t const _num,void const * const _rptr) {
- uint_least8_t const * lpos = (uint_least8_t const *)_lptr;
- uint_least8_t const * rpos = (uint_least8_t const *)_rptr;
- uint_least8_t const * const afterbuf = lpos + _num;
+ unsigned char const * lpos = _lptr;
+ unsigned char const * rpos = _rptr;
+ unsigned char const * const afterbuf = lpos + _num;
for (;lpos != afterbuf;++lpos,++rpos) {sus_likely (*lpos != *rpos) {return false;}}
return true;
}
diff --git a/zap/src/memfill.c b/zap/src/memfill.c
index c9a9797..1aebd29 100644
--- a/zap/src/memfill.c
+++ b/zap/src/memfill.c
@@ -6,18 +6,20 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_memfill\n"
"zap_memfill:\n"
/*
- void const * ptr
- size_t num
- uint_least8_t val
+ void const * ptr
+ size_t num
+ unsigned char val
*/
#if defined(sus_arch_amd64)
/* rdi: Address of the current element. */
@@ -50,9 +52,9 @@ __asm__ (
#endif
);
#else
-void zap_memfill(void * const _ptr,size_t const _num,uint_least8_t const _byte) {
- uint_least8_t * pos = (uint_least8_t *)_ptr;
- uint_least8_t * const afterbuf = pos + _num;
+void zap_memfill(void * const _ptr,size_t const _num,unsigned char const _byte) {
+ unsigned char * pos = _ptr;
+ unsigned char * const afterbuf = pos + _num;
for (;pos != afterbuf;++pos) {*pos = _byte;}
}
#endif
diff --git a/zap/src/strcmp.c b/zap/src/strcmp.c
index 101f7dc..0ed0a59 100644
--- a/zap/src/strcmp.c
+++ b/zap/src/strcmp.c
@@ -6,6 +6,8 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdint.h>
int_least8_t zap_strcmp(char const * const _lstr,char const * const _rstr) {
@@ -14,7 +16,7 @@ int_least8_t zap_strcmp(char const * const _lstr,char const * const _rstr) {
for (;;++lpos,++rpos) {
unsigned char const lchr = *lpos;
unsigned char const rchr = *rpos;
- sus_likely (lchr != rchr) {return lchr < rchr ? INT8_MIN : INT8_MAX;}
+ sus_likely (lchr != rchr) {return lchr < rchr ? INT_LEAST8_MIN : INT_LEAST8_MAX;}
sus_unlikely (lchr == (unsigned char)0x0) {return 0x0;}
}
sus_unreach();
diff --git a/zap/src/strcpy.c b/zap/src/strcpy.c
index 943cb2c..616af7f 100644
--- a/zap/src/strcpy.c
+++ b/zap/src/strcpy.c
@@ -6,9 +6,11 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_strcpy\n"
@@ -62,7 +64,7 @@ size_t zap_strcpy(char const * const _in,char * const _out) {
for (;;++inpos,++outpos) {
char const chr = *inpos;
*outpos = chr;
- if (chr == '\x0') {return (size_t)(inpos - _in);}
+ if (chr == '\x0') {return inpos - _in;}
}
sus_unreach();
}
diff --git a/zap/src/strdup.c b/zap/src/strdup.c
index a7ab6e6..183a909 100644
--- a/zap/src/strdup.c
+++ b/zap/src/strdup.c
@@ -6,6 +6,8 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdlib.h>
char * zap_strdup(sus_attr_unused char const * const _str) {
diff --git a/zap/src/streq.c b/zap/src/streq.c
index 9221cec..1ff4420 100644
--- a/zap/src/streq.c
+++ b/zap/src/streq.c
@@ -6,10 +6,12 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdbool.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_streq\n"
diff --git a/zap/src/strfill.c b/zap/src/strfill.c
index a113094..bd0af33 100644
--- a/zap/src/strfill.c
+++ b/zap/src/strfill.c
@@ -6,6 +6,8 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdint.h>
-void zap_strfill(char * const _str,char const _chr) {zap_memfill(_str,zap_strlen(_str),(uint_least8_t)_chr);}
+void zap_strfill(char * const _str,char const _chr) {zap_memfill(_str,zap_strlen(_str),(unsigned char)_chr);}
diff --git a/zap/src/strlen.c b/zap/src/strlen.c
index eab12e6..84b7d28 100644
--- a/zap/src/strlen.c
+++ b/zap/src/strlen.c
@@ -6,9 +6,11 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_strlen\n"