diff options
-rw-r--r-- | CHANGELOG.txt | 6 | ||||
-rw-r--r-- | test.c | 103 | ||||
-rw-r--r-- | zap/GNUmakefile | 16 | ||||
-rw-r--r-- | zap/include/zap/bs.h | 4 | ||||
-rw-r--r-- | zap/include/zap/mem.h | 14 | ||||
-rw-r--r-- | zap/source/amd64/math/abs.s | 34 | ||||
-rw-r--r-- | zap/source/amd64/mem/cp.s | 63 | ||||
-rw-r--r-- | zap/source/any/mth/abs.c (renamed from zap/source/any/math/abs.c) | 0 | ||||
-rw-r--r-- | zap/source/any/mth/div0.c (renamed from zap/source/any/math/div0.c) | 0 | ||||
-rw-r--r-- | zap/source/any/mth/divmod.c (renamed from zap/source/any/math/divmod.c) | 0 | ||||
-rw-r--r-- | zap/source/arm64/math/abs.s | 29 | ||||
-rw-r--r-- | zap/source/arm64/mem/cp.s | 40 |
12 files changed, 285 insertions, 24 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 0378912..831e339 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,9 @@ +# 16.2 + +* Implement cp and abs in AMD64; +* Fix indentations; +* Implement cp and abs in ARM64; + # 16.1 * Add install target to makefile; @@ -27,18 +27,99 @@ int main(void) { } /* mem */ { - zap_i01 const in[0x4u] = { - 0xFFFFu, - 0xFFFu, - 0xFFu, - 0xFu, + zap_i8 const src[0x3Fu] = { +0x02u, +0x03u, +0x05u, +0x07u, +0x0Bu, +0x0Du, +0x11u, +0x13u, +0x17u, +0x1Du, +0x1Fu, +0x25u, +0x29u, +0x2Bu, +0x2Fu, +0x35u, +0x3Bu, +0x3Du, +0x43u, +0x47u, +0x49u, +0x4Fu, +0x53u, +0x59u, +0x61u, +0x65u, +0x67u, +0x6Bu, +0x6Du, +0x71u, +0x7Fu, +0x83u, +0x89u, +0x8Bu, +0x95u, +0x97u, +0x9Du, +0xA3u, +0xA7u, +0xADu, +0xB3u, +0xB5u, +0xBFu, +0xC1u, +0xC5u, +0xC7u, +0xD3u, +0xDFu, +0xE3u, +0xE5u, +0xE9u, +0xEFu, +0xF1u, +0xFBu, }; - zap_i01 out[0x4u] = {0x0u}; - zap_cp(out,in,sizeof (in)); - test(out[0x0u],0xFFFFu,==) - test(out[0x1u],0xFFFu, ==) - test(out[0x2u],0xFFu, ==) - test(out[0x3u],0xFu, ==) + zap_i8 dest[sizeof (src)]; + zap_cp(dest,src,sizeof (src)); + test(dest[0x00u],src[0x00u],==) + test(dest[0x01u],src[0x01u],==) + test(dest[0x02u],src[0x02u],==) + test(dest[0x03u],src[0x03u],==) + test(dest[0x04u],src[0x04u],==) + test(dest[0x05u],src[0x05u],==) + test(dest[0x06u],src[0x06u],==) + test(dest[0x07u],src[0x07u],==) + test(dest[0x08u],src[0x08u],==) + test(dest[0x09u],src[0x09u],==) + test(dest[0x0Au],src[0x0Au],==) + test(dest[0x0Bu],src[0x0Bu],==) + test(dest[0x0Cu],src[0x0Cu],==) + test(dest[0x0Du],src[0x0Du],==) + test(dest[0x0Eu],src[0x0Eu],==) + test(dest[0x0Fu],src[0x0Fu],==) + test(dest[0x10u],src[0x10u],==) + test(dest[0x11u],src[0x11u],==) + test(dest[0x12u],src[0x12u],==) + test(dest[0x13u],src[0x13u],==) + test(dest[0x14u],src[0x14u],==) + test(dest[0x15u],src[0x15u],==) + test(dest[0x16u],src[0x16u],==) + test(dest[0x17u],src[0x17u],==) + test(dest[0x18u],src[0x18u],==) + test(dest[0x19u],src[0x19u],==) + test(dest[0x1Au],src[0x1Au],==) + test(dest[0x1Bu],src[0x1Bu],==) + test(dest[0x1Cu],src[0x1Cu],==) + test(dest[0x1Du],src[0x1Du],==) + test(dest[0x1Eu],src[0x1Eu],==) + test(dest[0x1Fu],src[0x1Fu],==) + test(dest[0x20u],src[0x20u],==) + test(dest[0x21u],src[0x21u],==) + test(dest[0x22u],src[0x22u],==) } { zap_i04 val = 0x0u; diff --git a/zap/GNUmakefile b/zap/GNUmakefile index ecc858b..9c85885 100644 --- a/zap/GNUmakefile +++ b/zap/GNUmakefile @@ -25,11 +25,16 @@ OBJ_MEM_UTF8ENCLEN := source/any/mem/utf8enclen.o OBJ_MEM_WIN1252DEC := source/any/mem/win1252dec.o OBJ_MEM_WIN1252ENC := source/any/mem/win1252enc.o +ifeq "$(arch)" "amd64" +OBJ_MATH_ABS := source/$(arch)/math/abs +OBJ_MEM_CP := source/$(arch)/mem/cp.o.o +else ifeq "$(arch)" "arm64" +OBJ_MATH_ABS := source/$(arch)/math/abs +OBJ_MEM_CP := source/$(arch)/mem/cp.o +endif + OBJS := \ $(OBJ_BS_TRAP) \ - $(OBJ_MATH_ABS) \ - $(OBJ_MATH_DIV0) \ - $(OBJ_MATH_DIVMOD) \ $(OBJ_MEM_CP) \ $(OBJ_MEM_EQ) \ $(OBJ_MEM_FILL) \ @@ -39,7 +44,10 @@ OBJS := \ $(OBJ_MEM_UTF8ENC) \ $(OBJ_MEM_UTF8ENCLEN) \ $(OBJ_MEM_WIN1252DEC) \ - $(OBJ_MEM_WIN1252ENC) + $(OBJ_MEM_WIN1252ENC) \ + $(OBJ_MATH_ABS) \ + $(OBJ_MATH_DIV0) \ + $(OBJ_MATH_DIVMOD) LIB := libzap.a diff --git a/zap/include/zap/bs.h b/zap/include/zap/bs.h index 3d31a34..04a3d16 100644 --- a/zap/include/zap/bs.h +++ b/zap/include/zap/bs.h @@ -85,14 +85,14 @@ typedef zap_i02 zap_chr02; typedef zap_i8 zap_bool; +typedef zap_i8 zap_byte; + #if \ csys_arch_arm \ || csys_arch_ia32 -typedef zap_i8 zap_byte; typedef zap_i02 zap_ptr; typedef zap_i02 zap_sz; #else -typedef zap_i8 zap_byte; typedef zap_i04 zap_ptr; typedef zap_i04 zap_sz; #endif diff --git a/zap/include/zap/mem.h b/zap/include/zap/mem.h index 4085a20..c1544d0 100644 --- a/zap/include/zap/mem.h +++ b/zap/include/zap/mem.h @@ -12,17 +12,17 @@ zap_priv_cdecl void zap_cp( void * zap_priv_restr dest,void const * zap_priv_restr src, zap_sz num); -zap_bool zap_eq( void const * lbuf,void const * rbuf,zap_sz num); -void zap_fill(void * dest,zap_byte val, zap_sz num); -void * zap_srch(void const * buf, zap_byte val, zap_sz num); +zap_bool zap_eq( void const * lbuf,void const * rbuf,zap_sz num); +void zap_fill(void * dest,zap_byte val, zap_sz num); +void * zap_srch(void const * buf, zap_byte val, zap_sz num); zap_sz zap_utf8declen(zap_chr8 const * buf); zap_sz zap_utf8enclen(zap_chr02 const * buf); -void zap_utf8dec( zap_chr02 * dest,zap_chr8 const * src); -void zap_utf8enc( zap_chr8 * dest,zap_chr02 const * src); -void zap_win1252dec(zap_chr02 * dest,zap_chr8 const * src); -void zap_win1252enc(zap_chr8 * dest,zap_chr02 const * src); +void zap_utf8dec( zap_chr02 * zap_priv_restr dest,zap_chr8 const * zap_priv_restr src); +void zap_utf8enc( zap_chr8 * zap_priv_restr dest,zap_chr02 const * zap_priv_restr src); +void zap_win1252dec(zap_chr02 * zap_priv_restr dest,zap_chr8 const * zap_priv_restr src); +void zap_win1252enc(zap_chr8 * zap_priv_restr dest,zap_chr02 const * zap_priv_restr src); zap_priv_cdeclend diff --git a/zap/source/amd64/math/abs.s b/zap/source/amd64/math/abs.s new file mode 100644 index 0000000..178b5a7 --- /dev/null +++ b/zap/source/amd64/math/abs.s @@ -0,0 +1,34 @@ +# Copyright 2022-2023 Gabriel Jensen. +# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>. + +.intel_syntax noprefix + +.globl zap_abs8 +.globl zap_abs01 +.globl zap_abs02 +.globl zap_abs04 + +zap_abs8: + mov al,dil + neg al # zap_i8 ret = -inv; // Invert the copy of the input value. This also tests the sign of the value. + cmovs ax,di # if (val < 0x0) ret = val; // If it was positive, just return the unmodified input. + ret # return ret; + +zap_abs01: + mov ax,di + neg ax + cmovs ax,di + ret + +zap_abs02: + mov eax,edi + neg eax + cmovs eax,edi + ret + +zap_abs04: + mov rax,rdi + neg rax + cmovs rax,rdi + ret diff --git a/zap/source/amd64/mem/cp.s b/zap/source/amd64/mem/cp.s new file mode 100644 index 0000000..5985f04 --- /dev/null +++ b/zap/source/amd64/mem/cp.s @@ -0,0 +1,63 @@ +# Copyright 2022-2023 Gabriel Jensen. +# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this file, You can obtain one at <https://mozilla.org/MPL/2.0>. + +.intel_syntax noprefix + +.globl zap_cp + +zap_cp: + # zap_i8 val1; + # zap_i04 val8; + # unsigned int256_t val02; + +.big02cp: + cmp rdx,0x20 + jl .big01cp # if (num < 0x20u) goto big01cp; + + vmovups ymm0,[rsi] # val01 = *src; + vmovups [rdi],ymm0 # *dest = val01; + + add rsi,0x20 # dest += 0x20u; + add rdi,0x20 # src += 0x20u; + sub rdx,0x20 # num -= 0x20u; + jmp .big02cp # goto big02cp; + +.big01cp: + cmp rdx,0x10 + jl .wrdcp # if (num < 0x10u) goto wrdcp; + + movdqu xmm0,[rsi] # val01 = *src; + movdqu [rdi],xmm0 # *dest = val01; + + add rsi,0x10 # dest += 0x10u; + add rdi,0x10 # src += 0x10u; + sub rdx,0x10 # num -= 0x10u; + jmp .big01cp # goto big01cp; + +.wrdcp: + cmp rdx,0x8 + jl .bytecp # if (num < 0x8u) goto bytecp; + + mov rcx,[rsi] # val8 = *src; + mov [rdi],rcx # *dest = val8; + + add rdi,0x8 # dest += 0x8u; + add rsi,0x8 # src += 0x8u; + sub rdx,0x8 # num -= 0x8u; + jmp .wrdcp # goto wrdcp + +.bytecp: + test rdx,rdx # if (rem == 0x0) + jz .done # goto done + + mov cl,[rsi] # val1 = *src; + mov [rsi],cl # *dest = val1; + + inc rdi # ++dest; + inc rsi # ++src; + dec rdx # --rem; + jmp .bytecp # goto bytecp; + +.done: + ret # return diff --git a/zap/source/any/math/abs.c b/zap/source/any/mth/abs.c index d12b6a6..d12b6a6 100644 --- a/zap/source/any/math/abs.c +++ b/zap/source/any/mth/abs.c diff --git a/zap/source/any/math/div0.c b/zap/source/any/mth/div0.c index 87fcfa0..87fcfa0 100644 --- a/zap/source/any/math/div0.c +++ b/zap/source/any/mth/div0.c diff --git a/zap/source/any/math/divmod.c b/zap/source/any/mth/divmod.c index 4214651..4214651 100644 --- a/zap/source/any/math/divmod.c +++ b/zap/source/any/mth/divmod.c diff --git a/zap/source/arm64/math/abs.s b/zap/source/arm64/math/abs.s new file mode 100644 index 0000000..218d4d8 --- /dev/null +++ b/zap/source/arm64/math/abs.s @@ -0,0 +1,29 @@ +// Copyright 2022-2023 Gabriel Jensen. +// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +// If a copy of the MPL was not distributed with this file, You can obtasrc one at <https://mozilla.org/MPL/2.0>. + +.globl zap_abs8 + +.func + +zap_abs8: + cmp w0,0x0 + cneg w0,w0,gt // if (val < 0x0) val = -val; + ret // return val; + +zap_abs01: + cmp w0,0x0 + cneg w0,w0,gt // if (val < 0x0) val = -val; + ret // return val; + +zap_abs02: + cmp w0,0x0 + cneg w0,w0,gt // if (val < 0x0) val = -val; + ret // return val; + +zap_abs04: + cmp x0,0x0 + cneg x0,x0,gt // if (val < 0x0) val = -val; + ret // return val; + +.endfunc diff --git a/zap/source/arm64/mem/cp.s b/zap/source/arm64/mem/cp.s new file mode 100644 index 0000000..b581487 --- /dev/null +++ b/zap/source/arm64/mem/cp.s @@ -0,0 +1,40 @@ +// Copyright 2022-2023 Gabriel Jensen. +// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +// If a copy of the MPL was not distributed with this file, You can obtasrc one at <https://mozilla.org/MPL/2.0>. + +.globl zap_cp + +.func + +zap_cp: + // zap_i8 tmp1; + // zap_i04 tmp4; + +.wrdcp: // wrdcp:; + cmp x2,0x8 + blt .bytecp // if (num < 0x8u) goto bytecp; + + ldr x3,[x1] // tmp8 = *(zap_i02 *)src; + str x3,[x0] // *(zap_i02 *)dest = tmp8; + + add x0,x0,0x8 // dest += 0x8u; + add x1,x1,0x8 // src += 0x8u; + sub x2,x2,0x8 // num -= 0x4u; + b .wrdcp // goto wrdcp; + +.bytecp: // bytecp:; + cmp x2,0x1 + blt .done // if (num == 0x1u) goto done; + + ldrb w3,[x1] // tmp1 = *(zap_i8 *)src; + strb w3,[x0] // *(zap_i8 *)dest = tmp1; + + add x0,x0,0x1 // ++dest; + add x1,x1,0x1 // ++src; + sub x2,x2,0x1 // --num; + b .bytecp // goto bytecp; + +.done: // done:; + ret // return; + +.endfunc |