diff options
Diffstat (limited to 'zap')
-rw-r--r-- | zap/GNUmakefile | 7 | ||||
-rw-r--r-- | zap/source/amd64/bs/trap.s | 10 | ||||
-rw-r--r-- | zap/source/amd64/mem/cp.s | 11 | ||||
-rw-r--r-- | zap/source/amd64/mem/fill.s | 51 | ||||
-rw-r--r-- | zap/source/any/math/div0.c | 9 | ||||
-rw-r--r-- | zap/source/any/math/exp.c | 5 | ||||
-rw-r--r-- | zap/source/arm64/mem/cp.s | 4 |
7 files changed, 77 insertions, 20 deletions
diff --git a/zap/GNUmakefile b/zap/GNUmakefile index c8993ff..473fb0c 100644 --- a/zap/GNUmakefile +++ b/zap/GNUmakefile @@ -27,8 +27,10 @@ OBJ_MEM_WIN1252DEC := source/any/mem/win1252dec.o OBJ_MEM_WIN1252ENC := source/any/mem/win1252enc.o ifeq "$(arch)" "amd64" +OBJ_BS_TRAP := source/$(arch)/bs/trap.o OBJ_MATH_ABS := source/$(arch)/math/abs.o OBJ_MEM_CP := source/$(arch)/mem/cp.o +OBJ_MEM_FILL := source/$(arch)/mem/fill.o else ifeq "$(arch)" "arm64" OBJ_MATH_ABS := source/$(arch)/math/abs OBJ_MEM_CP := source/$(arch)/mem/cp.o @@ -72,11 +74,10 @@ CFLAGS := \ -pipe \ -std=c99 -ifeq "$(signaldiv0)" "true" -$(info signaldiv0 has been enabled! Remember to link with csys!) +ifeq "$(trapdiv0)" "true" CFLAGS := \ $(CFLAGS) \ - -Dzap_priv_signaldiv0 + -Dzap_priv_trapdiv0 endif .PHONY: clean install purge diff --git a/zap/source/amd64/bs/trap.s b/zap/source/amd64/bs/trap.s new file mode 100644 index 0000000..93138e5 --- /dev/null +++ b/zap/source/amd64/bs/trap.s @@ -0,0 +1,10 @@ +# Copyright 2022-2023 Gabriel Jensen. +# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>. + +.intel_syntax noprefix + +.globl zap_priv_trap + +zap_priv_trap: + ud2 diff --git a/zap/source/amd64/mem/cp.s b/zap/source/amd64/mem/cp.s index 5985f04..57e716f 100644 --- a/zap/source/amd64/mem/cp.s +++ b/zap/source/amd64/mem/cp.s @@ -9,21 +9,22 @@ zap_cp: # zap_i8 val1; # zap_i04 val8; + # unsigned int128_t val01; # unsigned int256_t val02; .big02cp: cmp rdx,0x20 jl .big01cp # if (num < 0x20u) goto big01cp; - vmovups ymm0,[rsi] # val01 = *src; - vmovups [rdi],ymm0 # *dest = val01; + vmovups ymm0,[rsi] # val02 = *src; + vmovups [rdi],ymm0 # *dest = val02; add rsi,0x20 # dest += 0x20u; add rdi,0x20 # src += 0x20u; sub rdx,0x20 # num -= 0x20u; jmp .big02cp # goto big02cp; -.big01cp: +.big01cp: # big01cp:; cmp rdx,0x10 jl .wrdcp # if (num < 0x10u) goto wrdcp; @@ -35,7 +36,7 @@ zap_cp: sub rdx,0x10 # num -= 0x10u; jmp .big01cp # goto big01cp; -.wrdcp: +.wrdcp: # wrdcp:; cmp rdx,0x8 jl .bytecp # if (num < 0x8u) goto bytecp; @@ -47,7 +48,7 @@ zap_cp: sub rdx,0x8 # num -= 0x8u; jmp .wrdcp # goto wrdcp -.bytecp: +.bytecp: # bytecp:; test rdx,rdx # if (rem == 0x0) jz .done # goto done diff --git a/zap/source/amd64/mem/fill.s b/zap/source/amd64/mem/fill.s new file mode 100644 index 0000000..0abd2be --- /dev/null +++ b/zap/source/amd64/mem/fill.s @@ -0,0 +1,51 @@ +# Copyright 2022-2023 Gabriel Jensen. +# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>. + +.intel_syntax noprefix + +.globl zap_fill + +zap_fill: + # zap_i8 val1; + # zap_i04 val8; + + movzx rax,sil # val8 = val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + shl rax,0x8 # val8 <<= 0x8u; + mov al,sil # val8 |= val; + # val1 = val8; + +.wrdfill: # wrdfill:; + cmp rdx,0x8 + jl .bytefill # if (num < 0x8u) goto bytefill; + + mov [rdi],rax # *dest = val8; + + add rdi,0x8 # dest += 0x8u; + sub rdx,0x8 # num -= 0x8u; + jmp .wrdfill # goto wrdfill + +.bytefill: # bytefill:; + test rdx,rdx # if (rem == 0x0) + jz .done # goto done + + mov [rsi],al # *dest = val1; + + inc rdi # ++dest; + dec rdx # --rem; + jmp .bytefill # goto bytefill; + +.done: + ret # return diff --git a/zap/source/any/math/div0.c b/zap/source/any/math/div0.c index 87fcfa0..07c15a4 100644 --- a/zap/source/any/math/div0.c +++ b/zap/source/any/math/div0.c @@ -8,15 +8,6 @@ #include <csys.h> -#if csys_os_linux -#include <asm/unistd.h> -#endif - zap_i04 zap_priv_div0(void) { -#if defined(zap_priv_signaldiv0) -#if csys_os_linux - csys_syscall(__NR_kill,csys_syscall(__NR_getpid),0x8u); -#endif -#endif return (zap_i04)-0x1; } diff --git a/zap/source/any/math/exp.c b/zap/source/any/math/exp.c index 6c16213..bf0869d 100644 --- a/zap/source/any/math/exp.c +++ b/zap/source/any/math/exp.c @@ -8,9 +8,12 @@ #define zap_priv_exp(_wdth) \ zap_i##_wdth zap_exp##_wdth(zap_i##_wdth const _val,zap_i##_wdth const _n) { \ - if (_val == 0x0) { \ + if (__builtin_expect(_n == 0x0u,0x0)) { \ return 0x1u; \ } \ + if (__builtin_expect(_val == 0x0u,0x0)) { \ + return 0x0u; \ + } \ zap_i##_wdth val = _val; \ for (zap_i##_wdth i = 0x1u;i < _n;++i) { \ val *= _val; \ diff --git a/zap/source/arm64/mem/cp.s b/zap/source/arm64/mem/cp.s index b581487..1eb1569 100644 --- a/zap/source/arm64/mem/cp.s +++ b/zap/source/arm64/mem/cp.s @@ -14,8 +14,8 @@ zap_cp: cmp x2,0x8 blt .bytecp // if (num < 0x8u) goto bytecp; - ldr x3,[x1] // tmp8 = *(zap_i02 *)src; - str x3,[x0] // *(zap_i02 *)dest = tmp8; + ldr x3,[x1] // tmp8 = *(zap_i04 *)src; + str x3,[x0] // *(zap_i04 *)dest = tmp8; add x0,x0,0x8 // dest += 0x8u; add x1,x1,0x8 // src += 0x8u; |