summaryrefslogtreecommitdiff
path: root/zap
diff options
context:
space:
mode:
Diffstat (limited to 'zap')
-rw-r--r--zap/GNUmakefile7
-rw-r--r--zap/source/amd64/bs/trap.s10
-rw-r--r--zap/source/amd64/mem/cp.s11
-rw-r--r--zap/source/amd64/mem/fill.s51
-rw-r--r--zap/source/any/math/div0.c9
-rw-r--r--zap/source/any/math/exp.c5
-rw-r--r--zap/source/arm64/mem/cp.s4
7 files changed, 77 insertions, 20 deletions
diff --git a/zap/GNUmakefile b/zap/GNUmakefile
index c8993ff..473fb0c 100644
--- a/zap/GNUmakefile
+++ b/zap/GNUmakefile
@@ -27,8 +27,10 @@ OBJ_MEM_WIN1252DEC := source/any/mem/win1252dec.o
OBJ_MEM_WIN1252ENC := source/any/mem/win1252enc.o
ifeq "$(arch)" "amd64"
+OBJ_BS_TRAP := source/$(arch)/bs/trap.o
OBJ_MATH_ABS := source/$(arch)/math/abs.o
OBJ_MEM_CP := source/$(arch)/mem/cp.o
+OBJ_MEM_FILL := source/$(arch)/mem/fill.o
else ifeq "$(arch)" "arm64"
OBJ_MATH_ABS := source/$(arch)/math/abs
OBJ_MEM_CP := source/$(arch)/mem/cp.o
@@ -72,11 +74,10 @@ CFLAGS := \
-pipe \
-std=c99
-ifeq "$(signaldiv0)" "true"
-$(info signaldiv0 has been enabled! Remember to link with csys!)
+ifeq "$(trapdiv0)" "true"
CFLAGS := \
$(CFLAGS) \
- -Dzap_priv_signaldiv0
+ -Dzap_priv_trapdiv0
endif
.PHONY: clean install purge
diff --git a/zap/source/amd64/bs/trap.s b/zap/source/amd64/bs/trap.s
new file mode 100644
index 0000000..93138e5
--- /dev/null
+++ b/zap/source/amd64/bs/trap.s
@@ -0,0 +1,10 @@
+# Copyright 2022-2023 Gabriel Jensen.
+# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>.
+
+.intel_syntax noprefix
+
+.globl zap_priv_trap
+
+zap_priv_trap:
+ ud2
diff --git a/zap/source/amd64/mem/cp.s b/zap/source/amd64/mem/cp.s
index 5985f04..57e716f 100644
--- a/zap/source/amd64/mem/cp.s
+++ b/zap/source/amd64/mem/cp.s
@@ -9,21 +9,22 @@
zap_cp:
# zap_i8 val1;
# zap_i04 val8;
+ # unsigned int128_t val01;
# unsigned int256_t val02;
.big02cp:
cmp rdx,0x20
jl .big01cp # if (num < 0x20u) goto big01cp;
- vmovups ymm0,[rsi] # val01 = *src;
- vmovups [rdi],ymm0 # *dest = val01;
+ vmovups ymm0,[rsi] # val02 = *src;
+ vmovups [rdi],ymm0 # *dest = val02;
add rsi,0x20 # dest += 0x20u;
add rdi,0x20 # src += 0x20u;
sub rdx,0x20 # num -= 0x20u;
jmp .big02cp # goto big02cp;
-.big01cp:
+.big01cp: # big01cp:;
cmp rdx,0x10
jl .wrdcp # if (num < 0x10u) goto wrdcp;
@@ -35,7 +36,7 @@ zap_cp:
sub rdx,0x10 # num -= 0x10u;
jmp .big01cp # goto big01cp;
-.wrdcp:
+.wrdcp: # wrdcp:;
cmp rdx,0x8
jl .bytecp # if (num < 0x8u) goto bytecp;
@@ -47,7 +48,7 @@ zap_cp:
sub rdx,0x8 # num -= 0x8u;
jmp .wrdcp # goto wrdcp
-.bytecp:
+.bytecp: # bytecp:;
test rdx,rdx # if (rem == 0x0)
jz .done # goto done
diff --git a/zap/source/amd64/mem/fill.s b/zap/source/amd64/mem/fill.s
new file mode 100644
index 0000000..0abd2be
--- /dev/null
+++ b/zap/source/amd64/mem/fill.s
@@ -0,0 +1,51 @@
+# Copyright 2022-2023 Gabriel Jensen.
+# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>.
+
+.intel_syntax noprefix
+
+.globl zap_fill
+
+zap_fill:
+ # zap_i8 val1;
+ # zap_i04 val8;
+
+ movzx rax,sil # val8 = val;
+ shl rax,0x8 # val8 <<= 0x8u;
+ mov al,sil # val8 |= val;
+ shl rax,0x8 # val8 <<= 0x8u;
+ mov al,sil # val8 |= val;
+ shl rax,0x8 # val8 <<= 0x8u;
+ mov al,sil # val8 |= val;
+ shl rax,0x8 # val8 <<= 0x8u;
+ mov al,sil # val8 |= val;
+ shl rax,0x8 # val8 <<= 0x8u;
+ mov al,sil # val8 |= val;
+ shl rax,0x8 # val8 <<= 0x8u;
+ mov al,sil # val8 |= val;
+ shl rax,0x8 # val8 <<= 0x8u;
+ mov al,sil # val8 |= val;
+ # val1 = val8;
+
+.wrdfill: # wrdfill:;
+ cmp rdx,0x8
+ jl .bytefill # if (num < 0x8u) goto bytefill;
+
+ mov [rdi],rax # *dest = val8;
+
+ add rdi,0x8 # dest += 0x8u;
+ sub rdx,0x8 # num -= 0x8u;
+ jmp .wrdfill # goto wrdfill
+
+.bytefill: # bytefill:;
+ test rdx,rdx # if (rem == 0x0)
+ jz .done # goto done
+
+ mov [rsi],al # *dest = val1;
+
+ inc rdi # ++dest;
+ dec rdx # --rem;
+ jmp .bytefill # goto bytefill;
+
+.done:
+ ret # return
diff --git a/zap/source/any/math/div0.c b/zap/source/any/math/div0.c
index 87fcfa0..07c15a4 100644
--- a/zap/source/any/math/div0.c
+++ b/zap/source/any/math/div0.c
@@ -8,15 +8,6 @@
#include <csys.h>
-#if csys_os_linux
-#include <asm/unistd.h>
-#endif
-
zap_i04 zap_priv_div0(void) {
-#if defined(zap_priv_signaldiv0)
-#if csys_os_linux
- csys_syscall(__NR_kill,csys_syscall(__NR_getpid),0x8u);
-#endif
-#endif
return (zap_i04)-0x1;
}
diff --git a/zap/source/any/math/exp.c b/zap/source/any/math/exp.c
index 6c16213..bf0869d 100644
--- a/zap/source/any/math/exp.c
+++ b/zap/source/any/math/exp.c
@@ -8,9 +8,12 @@
#define zap_priv_exp(_wdth) \
zap_i##_wdth zap_exp##_wdth(zap_i##_wdth const _val,zap_i##_wdth const _n) { \
- if (_val == 0x0) { \
+ if (__builtin_expect(_n == 0x0u,0x0)) { \
return 0x1u; \
} \
+ if (__builtin_expect(_val == 0x0u,0x0)) { \
+ return 0x0u; \
+ } \
zap_i##_wdth val = _val; \
for (zap_i##_wdth i = 0x1u;i < _n;++i) { \
val *= _val; \
diff --git a/zap/source/arm64/mem/cp.s b/zap/source/arm64/mem/cp.s
index b581487..1eb1569 100644
--- a/zap/source/arm64/mem/cp.s
+++ b/zap/source/arm64/mem/cp.s
@@ -14,8 +14,8 @@ zap_cp:
cmp x2,0x8
blt .bytecp // if (num < 0x8u) goto bytecp;
- ldr x3,[x1] // tmp8 = *(zap_i02 *)src;
- str x3,[x0] // *(zap_i02 *)dest = tmp8;
+ ldr x3,[x1] // tmp8 = *(zap_i04 *)src;
+ str x3,[x0] // *(zap_i04 *)dest = tmp8;
add x0,x0,0x8 // dest += 0x8u;
add x1,x1,0x8 // src += 0x8u;