summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.txt4
-rw-r--r--test/source/main.c3
-rw-r--r--test/source/test/utf20len.i11
-rw-r--r--zap/GNUmakefile1
-rw-r--r--zap/include/zap/bs.h2
-rw-r--r--zap/include/zap/mem.h1
-rw-r--r--zap/source/amd64/mem/strlen.S1
-rw-r--r--zap/source/amd64/mem/utf20len.S35
8 files changed, 57 insertions, 1 deletions
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index b0820e5..0c45421 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -1,3 +1,7 @@
+# 12.0
+
+* Add function for getting the length of UTF-32 strings;
+
# 11.1
* Fix install target;
diff --git a/test/source/main.c b/test/source/main.c
index 38c9f68..ce73d3a 100644
--- a/test/source/main.c
+++ b/test/source/main.c
@@ -33,6 +33,7 @@ typedef bool (* zaptest_testtyp)(void);
#include "test/streq.i"
#include "test/strfill.i"
#include "test/strlen.i"
+#include "test/utf20len.i"
#include "test/utf8dec.i"
#include "test/utf8enc.i"
#include "test/win1252dec.i"
@@ -55,6 +56,7 @@ static zaptest_testtyp zaptest_tests[] = {
zaptest_test_streq,
zaptest_test_strfill,
zaptest_test_strlen,
+ zaptest_test_utf20len,
zaptest_test_utf8dec,
zaptest_test_utf8enc,
zaptest_test_win1252dec,
@@ -78,6 +80,7 @@ static char const * zaptest_testnms[] = {
"streq",
"strfill",
"strlen",
+ "utf20len",
"utf8dec",
"utf8enc",
"win1252dec",
diff --git a/test/source/test/utf20len.i b/test/source/test/utf20len.i
new file mode 100644
index 0000000..98be658
--- /dev/null
+++ b/test/source/test/utf20len.i
@@ -0,0 +1,11 @@
+#include <stdbool.h>
+
+bool zaptest_test_utf20len(void) {
+ zap_chr20 const str0[] = U"Ça va?";
+ zap_chr20 const str1[] = U"Non, ça ne va pas.";
+ zap_sz const len0 = zap_utf20len(str0);
+ zap_sz const len1 = zap_utf20len(str1);
+ zaptest_chk(len0,0x6u,zap_sz,"%zX")
+ zaptest_chk(len1,0x12u,zap_sz,"%zX")
+ return false;
+}
diff --git a/zap/GNUmakefile b/zap/GNUmakefile
index a50f8f6..d3d83ef 100644
--- a/zap/GNUmakefile
+++ b/zap/GNUmakefile
@@ -48,6 +48,7 @@ OBJS = \
source/$(arch)/mem/strfill.o \
source/$(arch)/mem/strcp.o \
source/$(arch)/mem/strlen.o \
+ source/$(arch)/mem/utf20len.o \
source/$(arch)/mem/utf8dec.o \
source/$(arch)/mem/utf8declen.o \
source/$(arch)/mem/utf8enc.o \
diff --git a/zap/include/zap/bs.h b/zap/include/zap/bs.h
index 4d8d004..e3d4c3d 100644
--- a/zap/include/zap/bs.h
+++ b/zap/include/zap/bs.h
@@ -30,7 +30,7 @@ typedef signed char zap_cmp;
typedef unsigned long zap_sz;
-#define zap_ver ((unsigned long)+0x11u)
+#define zap_ver ((unsigned long)+0x12u)
#if defined(__cplusplus)
}
diff --git a/zap/include/zap/mem.h b/zap/include/zap/mem.h
index f4877fa..d69c45f 100644
--- a/zap/include/zap/mem.h
+++ b/zap/include/zap/mem.h
@@ -26,6 +26,7 @@ __attribute__ ((hot,nothrow)) zap_sz zap_strcp( char co
__attribute__ ((hot,nothrow,warn_unused_result)) zap_bool zap_streq( char const * lstr, char const * rstr);
__attribute__ ((hot,nothrow)) zap_sz zap_strfill( char * lstr, char chr);
__attribute__ ((hot,nothrow,warn_unused_result)) zap_sz zap_strlen( char const * str);
+__attribute__ ((hot,nothrow,warn_unused_result)) zap_sz zap_utf20len( zap_chr20 const * utf20);
__attribute__ ((hot,nothrow)) void zap_utf8dec( zap_chr8 const * in, zap_chr20 * out);
__attribute__ ((hot,nothrow)) zap_sz zap_utf8declen(zap_chr8 const * utf8);
__attribute__ ((hot,nothrow)) void zap_utf8enc( zap_chr20 const * in, zap_chr8 * out);
diff --git a/zap/source/amd64/mem/strlen.S b/zap/source/amd64/mem/strlen.S
index bd83008..e8739e2 100644
--- a/zap/source/amd64/mem/strlen.S
+++ b/zap/source/amd64/mem/strlen.S
@@ -6,6 +6,7 @@
zap_strlen:
# rax: Address of the current character.
+ # rdi: Address of the first character.
# rdx: Current character.
movq %rdi,%rax
diff --git a/zap/source/amd64/mem/utf20len.S b/zap/source/amd64/mem/utf20len.S
new file mode 100644
index 0000000..5af352b
--- /dev/null
+++ b/zap/source/amd64/mem/utf20len.S
@@ -0,0 +1,35 @@
+# Copyright 2022 Gabriel Jensen.
+# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
+# If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+.globl zap_utf20len
+
+zap_utf20len:
+ # rax: Address of the current character.
+ # rdi: Address of the first character.
+ # rdx: Current character.
+
+ movq %rdi,%rax
+
+ # Iterate over the string:
+.loop:
+
+ # Move the character into a register:
+ movl (%rax),%edx
+
+ # Check if we have reached the null-terminator:
+ testl %edx,%edx
+ jz .done # If so, we are done.
+
+ # Continue to the next character:
+ addq $0x4,%rax
+ jmp .loop
+
+ # Done:
+.done:
+
+ # Get the length:
+ subq %rdi,%rax
+ shrq $0x2,%rax # Divide by four to get the number of doublewords rather than bytes.
+
+ ret