diff options
Diffstat (limited to 'zap')
-rw-r--r-- | zap/GNUmakefile | 16 | ||||
-rw-r--r-- | zap/include/zap/bs.h | 4 | ||||
-rw-r--r-- | zap/include/zap/mem.h | 14 | ||||
-rw-r--r-- | zap/source/amd64/math/abs.s | 34 | ||||
-rw-r--r-- | zap/source/amd64/mem/cp.s | 63 | ||||
-rw-r--r-- | zap/source/any/mth/abs.c (renamed from zap/source/any/math/abs.c) | 0 | ||||
-rw-r--r-- | zap/source/any/mth/div0.c (renamed from zap/source/any/math/div0.c) | 0 | ||||
-rw-r--r-- | zap/source/any/mth/divmod.c (renamed from zap/source/any/math/divmod.c) | 0 | ||||
-rw-r--r-- | zap/source/arm64/math/abs.s | 29 | ||||
-rw-r--r-- | zap/source/arm64/mem/cp.s | 40 |
10 files changed, 187 insertions, 13 deletions
diff --git a/zap/GNUmakefile b/zap/GNUmakefile index ecc858b..9c85885 100644 --- a/zap/GNUmakefile +++ b/zap/GNUmakefile @@ -25,11 +25,16 @@ OBJ_MEM_UTF8ENCLEN := source/any/mem/utf8enclen.o OBJ_MEM_WIN1252DEC := source/any/mem/win1252dec.o OBJ_MEM_WIN1252ENC := source/any/mem/win1252enc.o +ifeq "$(arch)" "amd64" +OBJ_MATH_ABS := source/$(arch)/math/abs +OBJ_MEM_CP := source/$(arch)/mem/cp.o.o +else ifeq "$(arch)" "arm64" +OBJ_MATH_ABS := source/$(arch)/math/abs +OBJ_MEM_CP := source/$(arch)/mem/cp.o +endif + OBJS := \ $(OBJ_BS_TRAP) \ - $(OBJ_MATH_ABS) \ - $(OBJ_MATH_DIV0) \ - $(OBJ_MATH_DIVMOD) \ $(OBJ_MEM_CP) \ $(OBJ_MEM_EQ) \ $(OBJ_MEM_FILL) \ @@ -39,7 +44,10 @@ OBJS := \ $(OBJ_MEM_UTF8ENC) \ $(OBJ_MEM_UTF8ENCLEN) \ $(OBJ_MEM_WIN1252DEC) \ - $(OBJ_MEM_WIN1252ENC) + $(OBJ_MEM_WIN1252ENC) \ + $(OBJ_MATH_ABS) \ + $(OBJ_MATH_DIV0) \ + $(OBJ_MATH_DIVMOD) LIB := libzap.a diff --git a/zap/include/zap/bs.h b/zap/include/zap/bs.h index 3d31a34..04a3d16 100644 --- a/zap/include/zap/bs.h +++ b/zap/include/zap/bs.h @@ -85,14 +85,14 @@ typedef zap_i02 zap_chr02; typedef zap_i8 zap_bool; +typedef zap_i8 zap_byte; + #if \ csys_arch_arm \ || csys_arch_ia32 -typedef zap_i8 zap_byte; typedef zap_i02 zap_ptr; typedef zap_i02 zap_sz; #else -typedef zap_i8 zap_byte; typedef zap_i04 zap_ptr; typedef zap_i04 zap_sz; #endif diff --git a/zap/include/zap/mem.h b/zap/include/zap/mem.h index 4085a20..c1544d0 100644 --- a/zap/include/zap/mem.h +++ b/zap/include/zap/mem.h @@ -12,17 +12,17 @@ zap_priv_cdecl void zap_cp( void * zap_priv_restr dest,void const * zap_priv_restr src, zap_sz num); -zap_bool zap_eq( void const * lbuf,void const * rbuf,zap_sz num); -void zap_fill(void * dest,zap_byte val, zap_sz num); -void * zap_srch(void const * buf, zap_byte val, zap_sz num); +zap_bool zap_eq( void const * lbuf,void const * rbuf,zap_sz num); +void zap_fill(void * dest,zap_byte val, zap_sz num); +void * zap_srch(void const * buf, zap_byte val, zap_sz num); zap_sz zap_utf8declen(zap_chr8 const * buf); zap_sz zap_utf8enclen(zap_chr02 const * buf); -void zap_utf8dec( zap_chr02 * dest,zap_chr8 const * src); -void zap_utf8enc( zap_chr8 * dest,zap_chr02 const * src); -void zap_win1252dec(zap_chr02 * dest,zap_chr8 const * src); -void zap_win1252enc(zap_chr8 * dest,zap_chr02 const * src); +void zap_utf8dec( zap_chr02 * zap_priv_restr dest,zap_chr8 const * zap_priv_restr src); +void zap_utf8enc( zap_chr8 * zap_priv_restr dest,zap_chr02 const * zap_priv_restr src); +void zap_win1252dec(zap_chr02 * zap_priv_restr dest,zap_chr8 const * zap_priv_restr src); +void zap_win1252enc(zap_chr8 * zap_priv_restr dest,zap_chr02 const * zap_priv_restr src); zap_priv_cdeclend diff --git a/zap/source/amd64/math/abs.s b/zap/source/amd64/math/abs.s new file mode 100644 index 0000000..178b5a7 --- /dev/null +++ b/zap/source/amd64/math/abs.s @@ -0,0 +1,34 @@ +# Copyright 2022-2023 Gabriel Jensen. +# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>. + +.intel_syntax noprefix + +.globl zap_abs8 +.globl zap_abs01 +.globl zap_abs02 +.globl zap_abs04 + +zap_abs8: + mov al,dil + neg al # zap_i8 ret = -inv; // Invert the copy of the input value. This also tests the sign of the value. + cmovs ax,di # if (val < 0x0) ret = val; // If it was positive, just return the unmodified input. + ret # return ret; + +zap_abs01: + mov ax,di + neg ax + cmovs ax,di + ret + +zap_abs02: + mov eax,edi + neg eax + cmovs eax,edi + ret + +zap_abs04: + mov rax,rdi + neg rax + cmovs rax,rdi + ret diff --git a/zap/source/amd64/mem/cp.s b/zap/source/amd64/mem/cp.s new file mode 100644 index 0000000..5985f04 --- /dev/null +++ b/zap/source/amd64/mem/cp.s @@ -0,0 +1,63 @@ +# Copyright 2022-2023 Gabriel Jensen. +# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>. + +.intel_syntax noprefix + +.globl zap_cp + +zap_cp: + # zap_i8 val1; + # zap_i04 val8; + # unsigned int256_t val02; + +.big02cp: + cmp rdx,0x20 + jl .big01cp # if (num < 0x20u) goto big01cp; + + vmovups ymm0,[rsi] # val01 = *src; + vmovups [rdi],ymm0 # *dest = val01; + + add rsi,0x20 # dest += 0x20u; + add rdi,0x20 # src += 0x20u; + sub rdx,0x20 # num -= 0x20u; + jmp .big02cp # goto big02cp; + +.big01cp: + cmp rdx,0x10 + jl .wrdcp # if (num < 0x10u) goto wrdcp; + + movdqu xmm0,[rsi] # val01 = *src; + movdqu [rdi],xmm0 # *dest = val01; + + add rsi,0x10 # dest += 0x10u; + add rdi,0x10 # src += 0x10u; + sub rdx,0x10 # num -= 0x10u; + jmp .big01cp # goto big01cp; + +.wrdcp: + cmp rdx,0x8 + jl .bytecp # if (num < 0x8u) goto bytecp; + + mov rcx,[rsi] # val8 = *src; + mov [rdi],rcx # *dest = val8; + + add rdi,0x8 # dest += 0x8u; + add rsi,0x8 # src += 0x8u; + sub rdx,0x8 # num -= 0x8u; + jmp .wrdcp # goto wrdcp + +.bytecp: + test rdx,rdx # if (rem == 0x0) + jz .done # goto done + + mov cl,[rsi] # val1 = *src; + mov [rsi],cl # *dest = val1; + + inc rdi # ++dest; + inc rsi # ++src; + dec rdx # --rem; + jmp .bytecp # goto bytecp; + +.done: + ret # return diff --git a/zap/source/any/math/abs.c b/zap/source/any/mth/abs.c index d12b6a6..d12b6a6 100644 --- a/zap/source/any/math/abs.c +++ b/zap/source/any/mth/abs.c diff --git a/zap/source/any/math/div0.c b/zap/source/any/mth/div0.c index 87fcfa0..87fcfa0 100644 --- a/zap/source/any/math/div0.c +++ b/zap/source/any/mth/div0.c diff --git a/zap/source/any/math/divmod.c b/zap/source/any/mth/divmod.c index 4214651..4214651 100644 --- a/zap/source/any/math/divmod.c +++ b/zap/source/any/mth/divmod.c diff --git a/zap/source/arm64/math/abs.s b/zap/source/arm64/math/abs.s new file mode 100644 index 0000000..218d4d8 --- /dev/null +++ b/zap/source/arm64/math/abs.s @@ -0,0 +1,29 @@ +// Copyright 2022-2023 Gabriel Jensen. +// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +// If a copy of the MPL was not distributed with this file, You can obtasrc one at <https://mozilla.org/MPL/2.0>. + +.globl zap_abs8 + +.func + +zap_abs8: + cmp w0,0x0 + cneg w0,w0,gt // if (val < 0x0) val = -val; + ret // return val; + +zap_abs01: + cmp w0,0x0 + cneg w0,w0,gt // if (val < 0x0) val = -val; + ret // return val; + +zap_abs02: + cmp w0,0x0 + cneg w0,w0,gt // if (val < 0x0) val = -val; + ret // return val; + +zap_abs04: + cmp x0,0x0 + cneg x0,x0,gt // if (val < 0x0) val = -val; + ret // return val; + +.endfunc diff --git a/zap/source/arm64/mem/cp.s b/zap/source/arm64/mem/cp.s new file mode 100644 index 0000000..b581487 --- /dev/null +++ b/zap/source/arm64/mem/cp.s @@ -0,0 +1,40 @@ +// Copyright 2022-2023 Gabriel Jensen. +// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +// If a copy of the MPL was not distributed with this file, You can obtasrc one at <https://mozilla.org/MPL/2.0>. + +.globl zap_cp + +.func + +zap_cp: + // zap_i8 tmp1; + // zap_i04 tmp4; + +.wrdcp: // wrdcp:; + cmp x2,0x8 + blt .bytecp // if (num < 0x8u) goto bytecp; + + ldr x3,[x1] // tmp8 = *(zap_i02 *)src; + str x3,[x0] // *(zap_i02 *)dest = tmp8; + + add x0,x0,0x8 // dest += 0x8u; + add x1,x1,0x8 // src += 0x8u; + sub x2,x2,0x8 // num -= 0x4u; + b .wrdcp // goto wrdcp; + +.bytecp: // bytecp:; + cmp x2,0x1 + blt .done // if (num == 0x1u) goto done; + + ldrb w3,[x1] // tmp1 = *(zap_i8 *)src; + strb w3,[x0] // *(zap_i8 *)dest = tmp1; + + add x0,x0,0x1 // ++dest; + add x1,x1,0x1 // ++src; + sub x2,x2,0x1 // --num; + b .bytecp // goto bytecp; + +.done: // done:; + ret // return; + +.endfunc |