summaryrefslogtreecommitdiff
path: root/zap
diff options
context:
space:
mode:
Diffstat (limited to 'zap')
-rw-r--r--zap/Makefile64
-rw-r--r--zap/include-priv/zap/priv.h11
-rw-r--r--zap/include/zap/base.h42
-rw-r--r--zap/include/zap/math.h94
-rw-r--r--zap/include/zap/mem.h59
-rw-r--r--zap/src/abs.c96
-rw-r--r--zap/src/fastimpl.c6
-rw-r--r--zap/src/fma.c171
-rw-r--r--zap/src/fndbyte.c19
-rw-r--r--zap/src/fndchr.c4
-rw-r--r--zap/src/foreach.c10
-rw-r--r--zap/src/memcmp.c8
-rw-r--r--zap/src/memcpy.c10
-rw-r--r--zap/src/memdup.c2
-rw-r--r--zap/src/memeq.c10
-rw-r--r--zap/src/memfill.c16
-rw-r--r--zap/src/strcmp.c4
-rw-r--r--zap/src/strcpy.c6
-rw-r--r--zap/src/strdup.c2
-rw-r--r--zap/src/streq.c4
-rw-r--r--zap/src/strfill.c4
-rw-r--r--zap/src/strlen.c4
22 files changed, 559 insertions, 87 deletions
diff --git a/zap/Makefile b/zap/Makefile
new file mode 100644
index 0000000..b7db3dc
--- /dev/null
+++ b/zap/Makefile
@@ -0,0 +1,64 @@
+# TOOLS
+
+#CC = clang
+#CC = gcc
+
+# TOOL FLAGS
+
+CFLAGS = \
+ -Iinclude \
+ -Iinclude-priv \
+ -O3 \
+ -fPIC \
+ -g \
+ -march=native \
+ -std=c99 \
+ -Wall \
+ -Wextra \
+ -Wpedantic
+
+# ARTIFACTS
+
+OBJS = \
+ src/abs.o \
+ src/fastimpl.o \
+ src/fma.o \
+ src/fndbyte.o \
+ src/fndchr.o \
+ src/foreach.o \
+ src/memcmp.o \
+ src/memcpy.o \
+ src/memdup.o \
+ src/memeq.o \
+ src/memfill.o \
+ src/strcmp.o \
+ src/strdup.o \
+ src/streq.o \
+ src/strfill.o \
+ src/strcpy.o \
+ src/strlen.o
+
+LIB = libzap.a
+
+# OPTIONS
+
+# Uncomment to disable assembly algorithms:
+#CFLAGS += -Dzap_priv_noasm
+
+# Uncomment to enable freestanding mode (requries no runtime):
+#CFLAGS += \
+ -Dzap_priv_nostdlib \
+ -ffreestanding
+
+# TARGETS
+
+.PHONY: clean purge
+
+$(LIB): $(OBJS)
+ ar r $@ $(OBJS)
+
+clean:
+ rm -fr $(OBJS)
+
+purge: clean
+ rm -fr $(LIB)
diff --git a/zap/include-priv/zap/priv.h b/zap/include-priv/zap/priv.h
index 24718bb..8ebbd39 100644
--- a/zap/include-priv/zap/priv.h
+++ b/zap/include-priv/zap/priv.h
@@ -4,16 +4,15 @@
If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
-#if !defined(zap_hdr_priv)
-#define zap_hdr_priv
+#if !defined(zap_priv_hdr_priv)
+#define zap_priv_hdr_priv
#include <zap/base.h>
-#include <stddef.h>
-#include <stdint.h>
-
#if (defined(sus_comp_gnu) || defined(sus_comp_llvm)) && defined(sus_os_unix) && !defined(zap_priv_noasm) && (defined(sus_arch_amd64) || defined(sus_arch_ia32))
-#define zap_priv_fastimpl
+#define zap_priv_fastimpl (0x1u)
+#else
+#define zap_priv_fastimpl (0x0u)
#endif
#endif
diff --git a/zap/include/zap/base.h b/zap/include/zap/base.h
index 4866051..a52414b 100644
--- a/zap/include/zap/base.h
+++ b/zap/include/zap/base.h
@@ -5,31 +5,12 @@
*/
#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
#include <susinfo.h>
-#if !defined(zap_hdr_base)
-#define zap_hdr_base
+#if !defined(zap_priv_hdr_base)
+#define zap_priv_hdr_base
-#define zap_ver ((uint_Least64_t)0xEu)
-
-#if defined(sus_lang_asm)
-
-.extern zap_fndbyte
-.extern zap_fndchr
-.extern zap_foreach
-.extern zap_memcmp
-.extern zap_memcpy
-.extern zap_memeq
-.extern zap_memfill
-.extern zap_strcmp
-.extern zap_strcpy
-.extern zap_streq
-.extern zap_strfill
-.extern zap_strlen
-
-#else
+#define zap_ver (0xFu)
#if defined(sus_lang_cxx)
extern "C" {
@@ -37,23 +18,6 @@ extern "C" {
extern bool const zap_fastimpl;
-/* Memory sequence functions: */
-sus_attr_hot sus_attr_nothrw sus_attr_useret size_t zap_fndbyte( void const * ptr, size_t num, unsigned char byte);
-sus_attr_hot sus_attr_nothrw sus_attr_useret size_t zap_fndchr( char const * str, char chr);
-sus_attr_hot sus_attr_nothrw void zap_foreach( void * ptr, size_t sz, size_t num, void (* fn)(void *));
-sus_attr_hot sus_attr_nothrw sus_attr_useret int_least8_t zap_memcmp( void const * lstr,size_t num, void const * rstr);
-sus_attr_hot sus_attr_nothrw void zap_memcpy( void const * in, size_t num, void * out);
-sus_attr_alloc sus_attr_allocsz(0x2) sus_attr_hot sus_attr_nothrw sus_attr_useret void * zap_memdup( void const * ptr, size_t num);
-sus_attr_hot sus_attr_nothrw sus_attr_useret bool zap_memeq( void const * lptr,size_t num, void const * rptr);
-sus_attr_hot sus_attr_nothrw void zap_memfill( void * ptr, size_t num, unsigned char val);
-sus_attr_hot sus_attr_nothrw sus_attr_useret int_least8_t zap_strcmp( char const * lstr,char const * rstr);
-sus_attr_hot sus_attr_nothrw sus_attr_useret size_t zap_strcpy( char const * in, char * out);
-sus_attr_alloc sus_attr_hot sus_attr_nothrw sus_attr_useret char * zap_strdup( char const * str);
-sus_attr_hot sus_attr_nothrw sus_attr_useret bool zap_streq( char const * lstr,char const * rstr);
-sus_attr_hot sus_attr_nothrw void zap_strfill( char * lstr,char chr);
-sus_attr_hot sus_attr_nothrw sus_attr_useret size_t zap_strlen( char const * str);
-
-#endif
#if defined(sus_lang_cxx)
}
#endif
diff --git a/zap/include/zap/math.h b/zap/include/zap/math.h
new file mode 100644
index 0000000..97afb66
--- /dev/null
+++ b/zap/include/zap/math.h
@@ -0,0 +1,94 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <zap/base.h>
+
+#include <susinfo.h>
+
+#if !defined(zap_priv_hdr_math)
+#define zap_priv_hdr_math
+
+#if !defined(sus_langfeat_c_llng) && !defined(sus_langfeat_cxx_llng)
+#error The (long long) type is required but unsupported by the implementation !
+#endif
+
+#if defined(sus_lang_cxx)
+extern "C" {
+#endif
+
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret signed char zap_abs_c( signed char val);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret int zap_abs_i( int val);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret long zap_abs_l( long val);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret long long zap_abs_ll( long long val);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret short zap_abs_s( short val);
+
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret signed char zap_fma_c( signed char a,signed char b,signed char c);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret int zap_fma_i( int a,int b,int c);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret long zap_fma_l( long a,long b,long c);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret long long zap_fma_ll( long long a,long long b,long long c);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret short zap_fma_s( short a,short b,short c);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret unsigned char zap_fma_uc( unsigned char a,unsigned char b,unsigned char c);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret unsigned int zap_fma_ui( unsigned int a,unsigned int b,unsigned int c);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret unsigned long zap_fma_ul( unsigned long a,unsigned long b,unsigned long c);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret unsigned long long zap_fma_ull(unsigned long long a,unsigned long long b,unsigned long long c);
+sus_attr_const sus_attr_hot sus_attr_nothrw sus_attr_useret unsigned short zap_fma_us( unsigned short a,unsigned short b,unsigned short c);
+
+#if defined(sus_langfeat_c_generic)
+
+#define zap_abs_tg(_val) \
+ (_Generic((_val), \
+ signed char:zap_abs_c, \
+ int: zap_abs_i, \
+ long: zap_abs_l, \
+ long long: zap_abs_ll,\
+ short: zap_abs_s, \
+ )((_val)))
+
+#define zap_fma_tg(_a,_b,_c) \
+ (_Generic((_a), \
+ signed char: zap_fma_c, \
+ int: zap_fma_i, \
+ long: zap_fma_l, \
+ long long: zap_fma_ll, \
+ short: zap_fma_s, \
+ unsigned char: zap_fma_uc, \
+ unsigned int: zap_fma_ui, \
+ unsigned long: zap_fma_ul, \
+ unsigned long long:zap_fma_ull,\
+ unsigned short: zap_fma_us, \
+ )((_a),(_b),(_c)))
+
+#elif defined(sus_lang_cxx)
+
+extern "C++" {
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline signed char zap_priv_cxxtg_abs(signed char const _val) {return ::zap_abs_c( _val);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline int zap_priv_cxxtg_abs(int const _val) {return ::zap_abs_i( _val);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline long zap_priv_cxxtg_abs(long const _val) {return ::zap_abs_l( _val);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline long long zap_priv_cxxtg_abs(long long const _val) {return ::zap_abs_ll(_val);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline short zap_priv_cxxtg_abs(short const _val) {return ::zap_abs_s( _val);};
+
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline signed char zap_priv_cxxtg_fma(signed char const _a,signed char const _b,signed char const _c) {return ::zap_fma_c( _a,_b,_c);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline int zap_priv_cxxtg_fma(int const _a,int const _b,int const _c) {return ::zap_fma_i( _a,_b,_c);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline long zap_priv_cxxtg_fma(long const _a,long const _b,long const _c) {return ::zap_fma_l( _a,_b,_c);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline long long zap_priv_cxxtg_fma(long long const _a,long long const _b,long long const _c) {return ::zap_fma_ll( _a,_b,_c);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline short zap_priv_cxxtg_fma(short const _a,short const _b,short const _c) {return ::zap_fma_s( _a,_b,_c);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline unsigned char zap_priv_cxxtg_fma(unsigned char const _a,unsigned char const _b,unsigned char const _c) {return ::zap_fma_uc( _a,_b,_c);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline unsigned int zap_priv_cxxtg_fma(unsigned int const _a,unsigned int const _b,unsigned int const _c) {return ::zap_fma_ui( _a,_b,_c);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline unsigned long zap_priv_cxxtg_fma(unsigned long const _a,unsigned long const _b,unsigned long const _c) {return ::zap_fma_ul( _a,_b,_c);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline unsigned long long zap_priv_cxxtg_fma(unsigned long long const _a,unsigned long long const _b,unsigned long long const _c) {return ::zap_fma_ull(_a,_b,_c);};
+ sus_attr_const sus_attr_inline sus_attr_useret sus_inline unsigned short zap_priv_cxxtg_fma(unsigned short const _a,unsigned short const _b,unsigned short const _c) {return ::zap_fma_us( _a,_b,_c);};
+}
+
+#define zap_abs_tg(_val) (::zap_priv_cxxtg_abs( _val))
+#define zap_fma_tg(_a, _b,_c) (::zap_priv_cxxtg_fma(_a, _b,_c))
+
+#endif
+
+#if defined(sus_lang_cxx)
+}
+#endif
+
+#endif
diff --git a/zap/include/zap/mem.h b/zap/include/zap/mem.h
new file mode 100644
index 0000000..20d7326
--- /dev/null
+++ b/zap/include/zap/mem.h
@@ -0,0 +1,59 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <zap/base.h>
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <susinfo.h>
+
+#if !defined(zap_priv_hdr_mem)
+#define zap_priv_hdr_mem
+
+#if defined(sus_lang_asm)
+
+.extern zap_fndbyte
+.extern zap_fndchr
+.extern zap_foreach
+.extern zap_memcmp
+.extern zap_memcpy
+.extern zap_memeq
+.extern zap_memfill
+.extern zap_memfill
+.extern zap_strcmp
+.extern zap_strcpy
+.extern zap_streq
+.extern zap_strfill
+.extern zap_strlen
+
+#else
+
+#if defined(sus_lang_cxx)
+extern "C" {
+#endif
+
+sus_attr_hot sus_attr_nothrw sus_attr_useret size_t zap_fndbyte( void const * ptr, size_t num, unsigned char byte);
+sus_attr_hot sus_attr_nothrw sus_attr_useret size_t zap_fndchr( char const * str, char chr);
+sus_attr_hot void zap_foreach( void * ptr, size_t sz, size_t num, void (* fn)(void *));
+sus_attr_hot sus_attr_nothrw sus_attr_useret int_least8_t zap_memcmp( void const * lstr,size_t num, void const * rstr);
+sus_attr_hot sus_attr_nothrw void zap_memcpy( void const * in, size_t num, void * out);
+sus_attr_alloc sus_attr_allocsz(0x2) sus_attr_hot sus_attr_nothrw sus_attr_useret void * zap_memdup( void const * ptr, size_t num);
+sus_attr_hot sus_attr_nothrw sus_attr_useret bool zap_memeq( void const * lptr,size_t num, void const * rptr);
+sus_attr_hot sus_attr_nothrw void zap_memfill( void * ptr, size_t num, unsigned char val);
+sus_attr_hot sus_attr_nothrw sus_attr_useret int_least8_t zap_strcmp( char const * lstr,char const * rstr);
+sus_attr_hot sus_attr_nothrw sus_attr_useret size_t zap_strcpy( char const * in, char * out);
+sus_attr_alloc sus_attr_hot sus_attr_nothrw sus_attr_useret char * zap_strdup( char const * str);
+sus_attr_hot sus_attr_nothrw sus_attr_useret bool zap_streq( char const * lstr,char const * rstr);
+sus_attr_hot sus_attr_nothrw void zap_strfill( char * lstr,char chr);
+sus_attr_hot sus_attr_nothrw sus_attr_useret size_t zap_strlen( char const * str);
+
+#endif
+#if defined(sus_lang_cxx)
+}
+#endif
+
+#endif
diff --git a/zap/src/abs.c b/zap/src/abs.c
new file mode 100644
index 0000000..8fe97e6
--- /dev/null
+++ b/zap/src/abs.c
@@ -0,0 +1,96 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <zap/priv.h>
+
+#include <zap/math.h>
+
+#include <stdint.h>
+
+#if zap_priv_fastimpl
+__asm__ (
+ ".globl zap_abs_c\n"
+ ".globl zap_abs_i\n"
+ ".globl zap_abs_l\n"
+ ".globl zap_abs_ll\n"
+ ".globl zap_abs_s\n"
+
+ "zap_abs_c:\n"
+ /*
+ signed char val
+ */
+#if defined(sus_arch_amd64) || defined(sus_arch_ia32)
+ "movb %dil,%al\n"
+ "sarb $0x3F,%al\n"
+ "xorb %al,%dil\n"
+ "subb %al,%dil\n"
+ "movb %dil,%al\n"
+ "ret\n"
+#endif
+
+ "zap_abs_i:\n"
+ /*
+ int val
+ */
+#if defined(sus_arch_amd64) || defined(sus_arch_ia32)
+ "movl %edi,%eax\n"
+ "sarl $0x3F,%eax\n"
+ "xorl %eax,%edi\n"
+ "subl %eax,%edi\n"
+ "movl %edi,%eax\n"
+ "ret\n"
+#endif
+
+ "zap_abs_l:\n"
+ /*
+ long val
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdi,%rax\n"
+ "sarq $0x3F,%rax\n"
+ "xorq %rax,%rdi\n"
+ "subq %rax,%rdi\n"
+ "movq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_abs_ll:\n"
+ /*
+ long long val
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdi,%rax\n"
+ "sarq $0x3F,%rax\n"
+ "xorq %rax,%rdi\n"
+ "subq %rax,%rdi\n"
+ "movq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_abs_s:\n"
+ /*
+ short val
+ */
+#if defined(sus_arch_amd64) || defined(sus_arch_ia32)
+ "movw %di,%ax\n"
+ "sarw $0x3F,%ax\n"
+ "xorw %ax,%di\n"
+ "subw %ax,%di\n"
+ "movw %di,%ax\n"
+ "ret\n"
+#endif
+);
+#else
+#define zap_local_abs(_typ,_sufx) \
+ _typ zap_abs_ ## _sufx (_typ const _val) {return _val > (_typ)0x0 ? _val : -_val;}
+
+zap_local_abs(signed char,c)
+zap_local_abs(int,i)
+zap_local_abs(long,l)
+zap_local_abs(long long,ll)
+zap_local_abs(short,s)
+
+#endif
diff --git a/zap/src/fastimpl.c b/zap/src/fastimpl.c
index 71ded00..2541a41 100644
--- a/zap/src/fastimpl.c
+++ b/zap/src/fastimpl.c
@@ -9,8 +9,4 @@
#include <stdbool.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
-bool const zap_fastimpl = true;
-#else
-bool const zap_fastimpl = false;
-#endif
+bool const zap_fastimpl = zap_priv_fastimpl;
diff --git a/zap/src/fma.c b/zap/src/fma.c
new file mode 100644
index 0000000..b2f45ad
--- /dev/null
+++ b/zap/src/fma.c
@@ -0,0 +1,171 @@
+/*
+ Copyright 2022 Gabriel Jensen.
+ This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+ If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+*/
+
+#include <zap/priv.h>
+
+#include <zap/math.h>
+
+#include <stdint.h>
+
+#if zap_priv_fastimpl
+__asm__ (
+ ".globl zap_fma_c\n"
+ ".globl zap_fma_i\n"
+ ".globl zap_fma_l\n"
+ ".globl zap_fma_ll\n"
+ ".globl zap_fma_s\n"
+ ".globl zap_fma_uc\n"
+ ".globl zap_fma_ui\n"
+ ".globl zap_fma_ul\n"
+ ".globl zap_fma_ull\n"
+ ".globl zap_fma_us\n"
+
+ "zap_fma_c:\n"
+ /*
+ signed char a
+ signed char b
+ signed char c
+ */
+#if defined(sus_arch_amd64)
+ "movb %sil,%al\n"
+ "imulb %dl\n"
+ "addb %dil,%al\n"
+ "ret\n"
+#endif
+
+ "zap_fma_i:\n"
+ /*
+ int a
+ int b
+ int c
+ */
+#if defined(sus_arch_amd64)
+ "movl %edx,%eax\n"
+ "imull %esi\n"
+ "addl %edi,%eax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_l:\n"
+ /*
+ long a
+ long b
+ long c
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdx,%rax\n"
+ "imulq %rsi\n"
+ "addq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_ll:\n"
+ /*
+ long long a
+ long long b
+ long long c
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdx,%rax\n"
+ "imulq %rsi\n"
+ "addq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_s:\n"
+ /*
+ short a
+ short b
+ short c
+ */
+#if defined(sus_arch_amd64)
+ "movw %dx,%ax\n"
+ "imulw %si\n"
+ "addw %di,%ax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_uc:\n"
+ /*
+ unsigned char a
+ unsigned char b
+ unsigned char c
+ */
+#if defined(sus_arch_amd64)
+ "movb %sil,%al\n" /* mulb uses ax instead of al:dl (like the other variants), so we don't need to worry about it overwritting dl. */
+ "mulb %dl\n"
+ "addb %dil,%al\n"
+ "ret\n"
+#endif
+
+ "zap_fma_ui:\n"
+ /*
+ unsigned int a
+ unsigned int b
+ unsigned int c
+ */
+#if defined(sus_arch_amd64)
+ "movl %edx,%eax\n"
+ "mull %esi\n"
+ "addl %edi,%eax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_ul:\n"
+ /*
+ unsigned long a
+ unsigned long b
+ unsigned long c
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdx,%rax\n"
+ "mulq %rsi\n"
+ "addq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_ull:\n"
+ /*
+ unsigned long long a
+ unsigned long long b
+ unsigned long long c
+ */
+#if defined(sus_arch_amd64)
+ "movq %rdx,%rax\n" /* rdx get overwritten by mulq, so might as well just make it the first operand (in multiplication, the order is meaningless). */
+ "mulq %rsi\n"
+ "addq %rdi,%rax\n"
+ "ret\n"
+#endif
+
+ "zap_fma_us:\n"
+ /*
+ unsigned short a
+ unsigned short b
+ unsigned short c
+ */
+#if defined(sus_arch_amd64)
+ "movw %dx,%ax\n"
+ "mulw %si\n"
+ "addw %di,%ax\n"
+ "ret\n"
+#endif
+);
+#else
+#define zap_local_fma(_typ,_sufx) \
+ _typ zap_fma_ ## _sufx (_typ const _a,_typ const _b,_typ const _c) {return _a + _b * _c;}
+
+zap_local_fma(signed char,c)
+zap_local_fma(int,i)
+zap_local_fma(long,l)
+zap_local_fma(long long,ll)
+zap_local_fma(short,s)
+zap_local_fma(unsigned char,uc)
+zap_local_fma(unsigned int,ui)
+zap_local_fma(unsigned long,ul)
+zap_local_fma(unsigned long long,ull)
+zap_local_fma(unsigned short,us)
+
+#endif
diff --git a/zap/src/fndbyte.c b/zap/src/fndbyte.c
index 3283eec..e6e6070 100644
--- a/zap/src/fndbyte.c
+++ b/zap/src/fndbyte.c
@@ -6,18 +6,20 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_fndbyte\n"
"zap_fndbyte:\n"
/*
- void const * ptr
- size_t num
- uint_least8_t byte
+ void const * pos
+ size_t num
+ unsigned char byte
*/
#if defined(sus_arch_amd64)
/* rax: Address of the current element. */
@@ -68,10 +70,11 @@ __asm__ (
#endif
);
#else
-size_t zap_fndbyte(void const * const _ptr,size_t const _num,uint_least8_t const _byte) {
- uint_least8_t const * ptr = (uint_least8_t const *)_ptr;
- uint_least8_t const * const afterbuf = ptr + _num;
- for (;ptr != afterbuf;++ptr) {sus_unlikely (*ptr == _byte) {return ptr - (uint_least8_t const *)_ptr;}}
+size_t zap_fndbyte(void const * const _ptr,size_t const _num,unsigned char const _byte) {
+ unsigned char const * startpos = _ptr;
+ unsigned char const * pos = startpos;
+ unsigned char const * const afterbuf = pos + _num;
+ for (;pos != afterbuf;++pos) {sus_unlikely (*pos == _byte) {return pos - startpos;}}
return SIZE_MAX;
}
#endif
diff --git a/zap/src/fndchr.c b/zap/src/fndchr.c
index 5cf78f8..fc4eb2b 100644
--- a/zap/src/fndchr.c
+++ b/zap/src/fndchr.c
@@ -6,10 +6,12 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_fndchr\n"
diff --git a/zap/src/foreach.c b/zap/src/foreach.c
index 54911e9..5e281d4 100644
--- a/zap/src/foreach.c
+++ b/zap/src/foreach.c
@@ -6,11 +6,13 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
void zap_foreach(void * const _ptr,size_t const _sz,size_t const _num,void (* const _fn)(void *)) {
- unsigned char * ptr = _ptr;
- size_t const numbyte = _sz * _num;
- void * const afterbuf = ptr + numbyte;
- for (;ptr != afterbuf;ptr += _sz) {_fn(ptr);}
+ unsigned char * pos = _ptr;
+ size_t const numbyte = _sz * _num;
+ unsigned char * const afterbuf = pos + numbyte;
+ for (;pos != afterbuf;pos += _sz) {_fn(pos);}
}
diff --git a/zap/src/memcmp.c b/zap/src/memcmp.c
index 31e5161..0fdf13a 100644
--- a/zap/src/memcmp.c
+++ b/zap/src/memcmp.c
@@ -6,16 +6,18 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
int_least8_t zap_memcmp(void const * const _lstr,size_t const _num,void const * const _rstr) {
- unsigned char const * lpos = (unsigned char const *)_lstr;
- unsigned char const * rpos = (unsigned char const *)_rstr;
+ unsigned char const * lpos = _lstr;
+ unsigned char const * rpos = _rstr;
unsigned char const * const afterlbuf = lpos + _num;
for (;lpos != afterlbuf;++lpos,++rpos) {
unsigned char const lbyte = *lpos;
unsigned char const rbyte = *rpos;
- sus_likely (lbyte != rbyte) {return lbyte < rbyte ? INT8_MIN : INT8_MAX;}
+ sus_likely (lbyte != rbyte) {return lbyte < rbyte ? INT_LEAST8_MIN : INT_LEAST8_MAX;}
}
return 0x0;
}
diff --git a/zap/src/memcpy.c b/zap/src/memcpy.c
index 8fa98ae..ae923c3 100644
--- a/zap/src/memcpy.c
+++ b/zap/src/memcpy.c
@@ -6,10 +6,12 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_memcpy\n"
@@ -133,9 +135,9 @@ __asm__ (
);
#else
void zap_memcpy(void const * const _in,size_t const _num,void * const _out) {
- uint_least8_t const * in = (uint_least8_t const *)_in;
- uint_least8_t * out = (uint_least8_t *)_out;
- uint_least8_t const * const afterbuf = in + _num;
+ unsigned char const * in = _in;
+ unsigned char * out = _out;
+ unsigned char const * const afterbuf = in + _num;
for (;in != afterbuf;++in,++out) {*out = *in;}
}
#endif
diff --git a/zap/src/memdup.c b/zap/src/memdup.c
index 3670eb3..9b56314 100644
--- a/zap/src/memdup.c
+++ b/zap/src/memdup.c
@@ -6,6 +6,8 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdlib.h>
void * zap_memdup(sus_attr_unused void const * const _ptr,sus_attr_unused size_t const _num) {
diff --git a/zap/src/memeq.c b/zap/src/memeq.c
index 75ecc12..7dce213 100644
--- a/zap/src/memeq.c
+++ b/zap/src/memeq.c
@@ -6,11 +6,13 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_memeq\n"
@@ -101,9 +103,9 @@ __asm__ (
);
#else
bool zap_memeq(void const * const _lptr,size_t const _num,void const * const _rptr) {
- uint_least8_t const * lpos = (uint_least8_t const *)_lptr;
- uint_least8_t const * rpos = (uint_least8_t const *)_rptr;
- uint_least8_t const * const afterbuf = lpos + _num;
+ unsigned char const * lpos = _lptr;
+ unsigned char const * rpos = _rptr;
+ unsigned char const * const afterbuf = lpos + _num;
for (;lpos != afterbuf;++lpos,++rpos) {sus_likely (*lpos != *rpos) {return false;}}
return true;
}
diff --git a/zap/src/memfill.c b/zap/src/memfill.c
index c9a9797..1aebd29 100644
--- a/zap/src/memfill.c
+++ b/zap/src/memfill.c
@@ -6,18 +6,20 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_memfill\n"
"zap_memfill:\n"
/*
- void const * ptr
- size_t num
- uint_least8_t val
+ void const * ptr
+ size_t num
+ unsigned char val
*/
#if defined(sus_arch_amd64)
/* rdi: Address of the current element. */
@@ -50,9 +52,9 @@ __asm__ (
#endif
);
#else
-void zap_memfill(void * const _ptr,size_t const _num,uint_least8_t const _byte) {
- uint_least8_t * pos = (uint_least8_t *)_ptr;
- uint_least8_t * const afterbuf = pos + _num;
+void zap_memfill(void * const _ptr,size_t const _num,unsigned char const _byte) {
+ unsigned char * pos = _ptr;
+ unsigned char * const afterbuf = pos + _num;
for (;pos != afterbuf;++pos) {*pos = _byte;}
}
#endif
diff --git a/zap/src/strcmp.c b/zap/src/strcmp.c
index 101f7dc..0ed0a59 100644
--- a/zap/src/strcmp.c
+++ b/zap/src/strcmp.c
@@ -6,6 +6,8 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdint.h>
int_least8_t zap_strcmp(char const * const _lstr,char const * const _rstr) {
@@ -14,7 +16,7 @@ int_least8_t zap_strcmp(char const * const _lstr,char const * const _rstr) {
for (;;++lpos,++rpos) {
unsigned char const lchr = *lpos;
unsigned char const rchr = *rpos;
- sus_likely (lchr != rchr) {return lchr < rchr ? INT8_MIN : INT8_MAX;}
+ sus_likely (lchr != rchr) {return lchr < rchr ? INT_LEAST8_MIN : INT_LEAST8_MAX;}
sus_unlikely (lchr == (unsigned char)0x0) {return 0x0;}
}
sus_unreach();
diff --git a/zap/src/strcpy.c b/zap/src/strcpy.c
index 943cb2c..616af7f 100644
--- a/zap/src/strcpy.c
+++ b/zap/src/strcpy.c
@@ -6,9 +6,11 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_strcpy\n"
@@ -62,7 +64,7 @@ size_t zap_strcpy(char const * const _in,char * const _out) {
for (;;++inpos,++outpos) {
char const chr = *inpos;
*outpos = chr;
- if (chr == '\x0') {return (size_t)(inpos - _in);}
+ if (chr == '\x0') {return inpos - _in;}
}
sus_unreach();
}
diff --git a/zap/src/strdup.c b/zap/src/strdup.c
index a7ab6e6..183a909 100644
--- a/zap/src/strdup.c
+++ b/zap/src/strdup.c
@@ -6,6 +6,8 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdlib.h>
char * zap_strdup(sus_attr_unused char const * const _str) {
diff --git a/zap/src/streq.c b/zap/src/streq.c
index 9221cec..1ff4420 100644
--- a/zap/src/streq.c
+++ b/zap/src/streq.c
@@ -6,10 +6,12 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdbool.h>
#include <stdint.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_streq\n"
diff --git a/zap/src/strfill.c b/zap/src/strfill.c
index a113094..bd0af33 100644
--- a/zap/src/strfill.c
+++ b/zap/src/strfill.c
@@ -6,6 +6,8 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stdint.h>
-void zap_strfill(char * const _str,char const _chr) {zap_memfill(_str,zap_strlen(_str),(uint_least8_t)_chr);}
+void zap_strfill(char * const _str,char const _chr) {zap_memfill(_str,zap_strlen(_str),(unsigned char)_chr);}
diff --git a/zap/src/strlen.c b/zap/src/strlen.c
index eab12e6..84b7d28 100644
--- a/zap/src/strlen.c
+++ b/zap/src/strlen.c
@@ -6,9 +6,11 @@
#include <zap/priv.h>
+#include <zap/mem.h>
+
#include <stddef.h>
-#if defined(zap_priv_fastimpl)
+#if zap_priv_fastimpl
__asm__ (
".globl zap_strlen\n"