diff options
-rw-r--r-- | CHANGELOG.txt | 4 | ||||
-rw-r--r-- | zp/source/arm/mem/memcpy.s | 42 | ||||
-rw-r--r-- | zp/source/arm64/mem/memcpy.s | 2 |
3 files changed, 27 insertions, 21 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt index d8b90a7..e4c5386 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -64,6 +64,10 @@ * Add more run-time tests; * Rename likly and ulikly to lik and ulik; +* Fix memcpy results on ARM; +* Fix order of parameters in memcpy on ARM; +* Add more assembly comments; + # 0.0.2 * Migrate to CMake; diff --git a/zp/source/arm/mem/memcpy.s b/zp/source/arm/mem/memcpy.s index b07c1f6..3a9d0b6 100644 --- a/zp/source/arm/mem/memcpy.s +++ b/zp/source/arm/mem/memcpy.s @@ -12,39 +12,41 @@ .thumb_func zp_memcpy: - @ zp_i02 tmp4; - @ zp_i8 tmp1; + @ zp_i02 tmp4; + @ zp_i8 tmp1; -.wrdcpy: @ wrdcpy:; +.wrdcpy: @ wrdcpy:; @ Check if there are at least four bytes remaining: - cmp r1,0x4 - blt .bytcpy @ if (num < 0x4u) goto bytcpy; + cmp r3,0x4 + blt .bytcpy @ if (num < 0x4u) goto bytcpy; @ Copy one word: - ldm r0!,{r3} @ tmp4 = *(zp_i02 *)in; /* We use ldm/stm with an exclamation mark after the source/destination as this version saves the incremented address into the register, meaning we don't have to icrement it ourselves. */ - stm r2!,{r3} @ *(zp_i02 *)out = tmp4; + ldm r2!,{r4} @ tmp4 = *(zp_i02 *)src; // We use ldm/stm with an exclamation mark after the source/destination as this version saves the incremented address into the register, meaning we don't have to icrement it ourselves. + stm r1!,{r4} @ *(zp_i02 *)dst = tmp4; @ Continue to the next word: - subs r1,0x4 @ num -= 0x4u; - b .wrdcpy @ goto wrdcpy; + subs r3,0x4 @ num -= 0x4u; + b .wrdcpy @ goto wrdcpy; -.bytcpy: @ bytcpy:; +.bytcpy: @ bytcpy:; @ Check if we have any bytes remaining: - cmp r1,0x0 - beq .done @ if (num == 0x0u) goto done; + cmp r3,0x0 + beq .done @ if (num == 0x0u) goto done; @ Copy one byte: - ldrb r3,[r0] @ tmp1 = *(zp_i8 *)in; - strb r3,[r2] @ *(zp_i8 *)out = tmp1; + ldrb r4,[r2] @ tmp1 = *(zp_i8 *)src; + strb r4,[r1] @ *(zp_i8 *)dst = tmp1; @ Continue to the next byte: - adds r0,0x1 @ ++in; - adds r2,0x1 @ ++out; - subs r1,0x1 @ --num; - b .bytcpy @ goto bytcpy; + adds r1,0x1 @ ++dst; + adds r2,0x1 @ ++src; + subs r3,0x1 @ --num; + b .bytcpy @ goto bytcpy; -.done: @ done:; +.done: @ done:; @ Return: - bx lr @ return; + str r2,[r0] @ We place the structure members in the buffer given by the caller, the pointer to which is always passed in r0. + str r3,[r0,0x4] + bx lr @ return (zp_cpyres) {.dst = dst,.src = src}; .endfunc diff --git a/zp/source/arm64/mem/memcpy.s b/zp/source/arm64/mem/memcpy.s index 6d5631e..91fbcf7 100644 --- a/zp/source/arm64/mem/memcpy.s +++ b/zp/source/arm64/mem/memcpy.s @@ -35,6 +35,6 @@ zp_cp: b .bytcpy // goto bytcpy; .done: // done:; - ret // return; + ret // return; // The structure members are already in the appropriate registers. .endfunc |