diff options
105 files changed, 4842 insertions, 1855 deletions
@@ -1,6 +1,4 @@ -*.gz *.o *.so /test -/txttolit vgcore.* @@ -1,26 +1,41 @@ -CC = gcc -CFLAGS = -std=c2x -Wall -Wextra -Wmissing-prototypes -pedantic-errors -Iinclude -fPIC -ifneq ($(thrdsafe),0) +.DEFAULT_GOAL = $(LIB) +CC = gcc +CFLAGS = -std=c2x -Wall -Wextra -Wmissing-prototypes -pedantic-errors -Iinclude -fPIC +ifeq ($(thrdsafe),1) CFLAGS += -Du8c_bethrdsafe endif ifeq ($(debug),1) -CFLAGS += -g +CFLAGS += -O0 -g else -CFLAGS += -O3 -DNDEBUG +CFLAGS += -Os -DNDEBUG +endif +LDFLAGS = -shared +ifeq ($(thrdsafe),1) +LDFLAGS += -lpthread endif -LDFLAGS = -shared -lpthread HDRS = \ + include/u8c/SIZE_C.h \ + include/u8c/chk.h \ include/u8c/err.h \ include/u8c/fmt.h \ - include/u8c/is.h \ include/u8c/main.h \ - include/u8c/SIZE_C.h \ + include/u8c/str.h \ include/u8c/u16.h \ - include/u8c/u32.h \ include/u8c/u8.h HDRS_PRIV = \ include/u8c/intern.h SRCS = \ + src/u8c/chk.h.d/isalnum.c \ + src/u8c/chk.h.d/isalpha.c \ + src/u8c/chk.h.d/iscntrl.c \ + src/u8c/chk.h.d/isdigit.c \ + src/u8c/chk.h.d/islower.c \ + src/u8c/chk.h.d/ispunct.c \ + src/u8c/chk.h.d/isspace.c \ + src/u8c/chk.h.d/isspace.c \ + src/u8c/chk.h.d/issurro.c \ + src/u8c/chk.h.d/isupper.c \ + src/u8c/chk.h.d/isxdigit.c \ src/u8c/err.h.d/geterr.c \ src/u8c/err.h.d/regerrhandl.c \ src/u8c/err.h.d/seterr.c \ @@ -31,97 +46,41 @@ SRCS = \ src/u8c/fmt.h.d/vfmt.c \ src/u8c/fmt.h.d/vprint.c \ src/u8c/intern.h.d/dat.c \ - src/u8c/is.h.d/isalnum.c \ - src/u8c/is.h.d/isalpha.c \ - src/u8c/is.h.d/iscntrl.c \ - src/u8c/is.h.d/isdigit.c \ - src/u8c/is.h.d/ispunct.c \ - src/u8c/is.h.d/isspace.c \ - src/u8c/is.h.d/isxdigit.c \ src/u8c/main.h.d/abrtfn.c \ src/u8c/main.h.d/debug.c \ src/u8c/main.h.d/end.c \ src/u8c/main.h.d/init.c \ src/u8c/main.h.d/thrdsafe.c \ + src/u8c/main.h.d/uniblk.c \ + src/u8c/main.h.d/uninm.c \ + src/u8c/str.h.d/stralloc.c \ + src/u8c/str.h.d/strcat.c \ + src/u8c/str.h.d/strcmp.c \ + src/u8c/str.h.d/strcp.c \ + src/u8c/str.h.d/strfndchr.c \ + src/u8c/str.h.d/strfndpat.c \ + src/u8c/str.h.d/strfree.c \ + src/u8c/str.h.d/strins.c \ + src/u8c/str.h.d/strsubstr.c \ + src/u8c/str.h.d/strsz.c \ src/u8c/u8.h.d/u8alloc.c \ src/u8c/u8.h.d/u8dec.c \ src/u8c/u8.h.d/u8enc.c \ src/u8c/u8.h.d/u8free.c \ src/u8c/u16.h.d/u16alloc.c \ - src/u8c/u16.h.d/u16free.c \ - src/u8c/u32.h.d/u32alloc.c \ - src/u8c/u32.h.d/u32cat.c \ - src/u8c/u32.h.d/u32cmp.c \ - src/u8c/u32.h.d/u32cp.c \ - src/u8c/u32.h.d/u32fndchr.c \ - src/u8c/u32.h.d/u32fndpat.c \ - src/u8c/u32.h.d/u32free.c \ - src/u8c/u32.h.d/u32ins.c \ - src/u8c/u32.h.d/u32substr.c \ - src/u8c/u32.h.d/u32sz.c + src/u8c/u16.h.d/u16free.c OBJS = $(SRCS:.c=.o) LIB = libu8c.so $(LIB): $(OBJS) $(CC) $(LDFLAGS) $^ -o $@ $(OBJS): $(HDRS) $(HDRS_PRIV) -DOCS = \ - docs/u8c_abrt.3.gz \ - docs/u8c_abrtfn.3.gz \ - docs/u8c_col.3.gz \ - docs/u8c_dbg.3.gz \ - docs/u8c_dbgprint.3.gz \ - docs/u8c_end.3.gz \ - docs/u8c_errhandltyp.3.gz \ - docs/u8c_errtyp.3.gz \ - docs/u8c_fmt.3.gz \ - docs/u8c_fmttyp.3.gz \ - docs/u8c_geterr.3.gz \ - docs/u8c_init.3.gz \ - docs/u8c_isalnum.3.gz \ - docs/u8c_isalpha.3.gz \ - docs/u8c_iscntrl.3.gz \ - docs/u8c_isdigit.3.gz \ - docs/u8c_ispunct.3.gz \ - docs/u8c_isspace.3.gz \ - docs/u8c_isxdigit.3.gz \ - docs/u8c_print.3.gz \ - docs/u8c_println.3.gz \ - docs/u8c_regerrhandl.3.gz \ - docs/u8c_seterr.3.gz \ - docs/u8c_setfmt.3.gz \ - docs/u8c_thrdsafe.3.gz \ - docs/u8c_u8alloc.3.gz \ - docs/u8c_u8dec.3.gz \ - docs/u8c_u8enc.3.gz \ - docs/u8c_u8free.3.gz \ - docs/u8c_u32alloc.3.gz \ - docs/u8c_u32cat.3.gz \ - docs/u8c_u32cmp.3.gz \ - docs/u8c_u32cp.3.gz \ - docs/u8c_u32fndchr.3.gz \ - docs/u8c_u32fndpat.3.gz \ - docs/u8c_u32free.3.gz \ - docs/u8c_u32ins.3.gz \ - docs/u8c_u32substr.3.gz \ - docs/u8c_u32sz.3.gz \ - docs/u8c_u32max.3.gz \ - docs/u8c_ver.3.gz \ - docs/u8c_vfmt.3.gz \ - docs/u8c_vprint.3.gz -$(DOCS): - gzip --keep $(DOCS:.gz=) test: $(LIB) test.c $(CC) -std=c2x -Wall -Wextra -Wpedantic -Iinclude -O3 -g -L. -lu8c -o $@ [email protected] -.PHONY: clean docs install runtest +.PHONY: clean install clean: - rm --force test $(DOCS) $(LIB) $(OBJS) -docs: $(DOCS) -install: $(LIB) $(DOCS) + rm --force test $(LIB) $(OBJS) +install: $(LIB) mkdir --parents $(DESTDIR)/include/u8c mkdir --parents $(DESTDIR)/lib - mkdir --parents $(DESTDIR)/share/man/man3 - install --mode=444 --verbose $(DOCS) $(DESTDIR)/share/man/man3 install --mode=444 --verbose $(HDRS) $(DESTDIR)/include/u8c install --mode=555 --verbose $(LIB) $(DESTDIR)/lib -runtest: test - export LD_LIBRARY_PATH=$(CURDIR) && ./$^ @@ -1,6 +1,25 @@ # u8c -[*u8c*](https://mandelbrot.dk/delta/u8c) is a free, open-source and portable library for Unicode manipulation. +[*u8c*](https://mandelbrot.dk/delta/u8c) is a free, open-source and (very) portable library for Unicode manipulation. + +*u8c* is very portable, and should work (with a trivial build system) on **any** hosted conforming C23 implementation, be it one with 64 bits a byte, 4096 byte shorts, with or without multithreading. In the event it doesn't for you, I really want you to open an issue on GitLab (<https://mandelbrot.dk>). + +## Compiling + +*u8c* can be compiled via the provided *Makefile* using the command `make`. + +By default, GCC-11 is used to build *u8c*. Clang-12 doesn't support C23 to the same extend, and may therefore require some modifications. + +Currently, the following C23 features are required: + +* Attributes. +* Binary literals. + +Yet even with only these two seemingly trivial features, the newest version of Clang (Clang-12 at this time) is unable to compile the library without the `-Wno-gnu-binary-literal`. + +To enable debug mode, the option `debug=1` must be passed to Make. + +To enable thread-safe operations (where logical), the option `thrdsafe=1` must be passed to Make. ## Installing @@ -8,7 +27,7 @@ If it's installed so, one must make a note of the output, as it logs what system files have changed. -Using the PKGBUILD is as simple as `git clone https://mandelbrot.dk/pkgbuild/delta/u8c.git && cd u8c && makepkg --clean --install --syncdeps` (on Arch-based distributions). +Using the PKGBUILD is as simple as `git clone https://mandelbrot.dk/pkg/u8c.git && cd u8c && makepkg --clean --install --syncdeps` (on Arch-based distributions). ## Copyright & License diff --git a/changelog.md b/changelog.md index bad655b..bc97b12 100644 --- a/changelog.md +++ b/changelog.md @@ -1,10 +1,43 @@ +# 22 + +* Remove documentation (too hight-maintainence). +* Rename `include/u8c/is.h` to `include/u8c/chk.h` +* Revert u8c-9 “Remove some optimisations, as they prevent C++ compatibility.”. +* Fix #1. +* Use binary literals for bitwise operations. +* Add more control characters to `u8c_iscntrl`. +* Change type of result of the `u8c_is`* functions fromt `uint_least8_t` to `bool`. +* Add more characters to `u8c_ispunct`. +* Update Makefile. +* Revert u8c-21 “Rename `u8c_unimax` to `u8c_u32max` and move it to `u8c/u32.h`.”. +* Add function for checking if a character is a surrogate point; `u8c_issurro`. +* Split `u8c_isalpha` into `u8c_islower` and `u8c_isupper`, move current mapping to `u8c_islower`. All characters that are neither upper case or lower case must be put in `u8c_isalpha`. +* Add function for getting the name of an Unicode codepoint; `u8c_uninm` (has currently only mapped around ⅔% of all Unicode characters). +* Revert accidental changes to changelog (please be more careful and observant in the future). +* Delete `u8c_errtyp_maxerrtyp` (in favour of `u8c_errtyp_all`). +* Switch the arguments of `u8c_seterr`. +* Add function for getting the name of the block an Unicode codepoint is located in; `u8c_uniblk` (has currently only mapped around 39% of the Unicode blocks). +* Rename all instances of `u32` to `str`. +* Optimise for size (`-Os` instead of `-O3`). +* Update Readme. +* **MAJOR**: Return a tuple (structures) in all returning functions, otherwise void. +* Add help screen to test program. +* Update Gitignore. +* Restructure test program. +* Add more characters to `u8c_islower`. +* Add more characters to `u8c_isupper`. +* Remove the `runtest` target (just use `make && export LD_LIBRARY_PATH=$PWD && ./test`, which can more easily be modified to pass arguments). +* Add more characters to `u8c_isalpha`. +* Fix incorrect error being set (somewhere, I forgot where). +* Fix `SIZE_C`. + # 21 * Update readme. * Require C23 (C2x). * Use GCC (has better C23 support). * Cleanup UTF-8 decoder and encoder (using binary literals). -* Rename `u8c_u32max` to `u8c_u32max` and move it to `u8c/u32.h`. +* Rename `u8c_unimax` to `u8c_u32max` and move it to `u8c/u32.h`. * Don't clear last error message on calls to `u8c_geterr`. * Restructure source files. * Fix makefile. @@ -50,7 +83,7 @@ * `u8c_end` * `u8c_init` * `u8c_thrdsafe` - * `u8c_u32max` + * `u8c_unimax` * `u8c_ver` * `u8c/u16.h`: * `u8c_u16alloc` @@ -133,7 +166,7 @@ * Remove `u8c_txt` in favour of Unicode string literals (much clearer code, but less portable). * Add function for getting a sub-string of an UTF-32 string; `u8c_u32substr`. * Don't count the null-terminator in string sizes. -* Add macro for maximum valid Unicode codepoint; `u8c_u32max`. +* Add macro for maximum valid Unicode codepoint; `u8c_unimax`. * Remove `txttolit`. * Add function for deallocating UTF-8 strings; `u8c_u8free`. * Turn both `u8c_dbg` and `u8c_thrdsafe` into type `bool` from `uint_least8_t`. diff --git a/docs/u8c_abrt.3 b/docs/u8c_abrt.3 deleted file mode 100644 index 7c92b28..0000000 --- a/docs/u8c_abrt.3 +++ /dev/null @@ -1,19 +0,0 @@ -.TH "u8c_abrt" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_abrt - Abort - Abort program with diagnostic information. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/main.h> */ -# include <u8c/main.h -# define u8c_abrt(why) u8c_abrtfn(__FILE__,(long)__LINE__,__func__,why) -\f[R] -.fi -.SH DESCRIPTION -.PP -The macro \f[B]u8c_abrt\f[R] expands to a valid call to \f[B]u8c_abrtfn\f[R]. -.SH VERSION -.PP -u8c 21 diff --git a/docs/u8c_abrtfn.3 b/docs/u8c_abrtfn.3 deleted file mode 100644 index 5fbba82..0000000 --- a/docs/u8c_abrtfn.3 +++ /dev/null @@ -1,27 +0,0 @@ -.TH "u8c_abrtfn" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_abrtfn - Abort functions - Abort program with diagnostic information. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/main.h> */ -# include <stdbool.h -# include <stdnoreturn.h> -# include <uchar.h> -extern noreturn bool u8c_abrtfn(char const * const fl,long const ln,char const * const fn,char const * const why); -\f[R] -.fi -.SH DESCRIPTION -.PP -The function \f[B]u8c_abrtfn\f[R] aborts the program and prints diagnostic infiormation to \f[B]stderr\f[R]. -.PP -\f[B]__FILE__\f[R] is to be passed at \f[B]fl\f[R], \f[B](long)__LINE__\f[R] at \f[B]ln\f[R], and \f[B]__func__\f[R] at \f[B]fn\f[R]. A standard string (\f[B]char const *\f[R]) must be passed at \f[B]why\f[R], which explains the reason for aborting. -.PP -All arguments are printed, in the end followed by a timestamp representing the number of seconds passed since the current epoch (as returned by \f[B]time(NULL)\f[R]). -.PP -It's recommended to use the convenience macro \f[B]u8c_abrt\f[R], as it expands to a valid call to \f[B]u8c_abrt\f[R]. -.SH VERSION -.PP -u8c 16 (as \f[B]u8c_abrt\f[R]), u8c 21 diff --git a/docs/u8c_col.3 b/docs/u8c_col.3 deleted file mode 100644 index 784a67d..0000000 --- a/docs/u8c_col.3 +++ /dev/null @@ -1,34 +0,0 @@ -.TH "u8c_col" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_col - Colour - Set of macros expanding to hexadecimal colour value expressions. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/fmt.h> */ -# include <stdint.h> -static uint_least32_t const u8c_col_azure = UINT32_C(0x3DA9E1); -static uint_least32_t const u8c_col_ash = UINT32_C(0xD2D2CC); -static uint_least32_t const u8c_col_black = UINT32_C(0x444747); -static uint_least32_t const u8c_col_blue = UINT32_C(0x3D3DE1); -static uint_least32_t const u8c_col_chartreuse = UINT32_C(0xA9E13D); -static uint_least32_t const u8c_col_cyan = UINT32_C(0x3DE1E1); -static uint_least32_t const u8c_col_green = UINT32_C(0x3ED13D); -static uint_least32_t const u8c_col_magenta = UINT32_C(0xE13DE1); -static uint_least32_t const u8c_col_mint = UINT32_C(0x3DE1A9); -static uint_least32_t const u8c_col_orange = UINT32_C(0xE1A93D); -static uint_least32_t const u8c_col_red = UINT32_C(0xE13D3D); -static uint_least32_t const u8c_col_rose = UINT32_C(0xE13DA9); -static uint_least32_t const u8c_col_silver = UINT32_C(0x9CA1A1); -static uint_least32_t const u8c_col_violet = UINT32_C(0xA93dE1); -static uint_least32_t const u8c_col_white = UINT32_C(0xF8F8F1); -static uint_least32_t const u8c_col_yellow = UINT32_C(0xE1E13D); -\f[R] -.fi -.SH DESCRIPTION -.PP -The constant set \f[B]u8c_col\f[R] contains sixteen constants with colour values. -.SH VERSION -.PP -u8c 2 diff --git a/docs/u8c_dbg.3 b/docs/u8c_dbg.3 deleted file mode 100644 index d461d4e..0000000 --- a/docs/u8c_dbg.3 +++ /dev/null @@ -1,19 +0,0 @@ -.TH "u8c_dbg" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_dbg - Debug - Whether or not the library is in debug mode. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/main.h> */ -# include <stdbool.h> -extern bool const u8c_dbg; -\f[R] -.fi -.SH DESCRIPTION -.PP -The constant \f[B]u8c_dbg\f[R] has value \f[B]false\f[R] if u8c has been compiled with debugging disabled, otherwise \f[B]true\f[R]. -.SH VERSION -.PP -u8c 0 (as \f[B]u8c_debug\f[R]), u8c 16 diff --git a/docs/u8c_dbgprint.3 b/docs/u8c_dbgprint.3 deleted file mode 100644 index 3d400dc..0000000 --- a/docs/u8c_dbgprint.3 +++ /dev/null @@ -1,24 +0,0 @@ -.TH "u8c_dbgprint" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_dbgprint - Debug print - Print line if debug mode is enabled. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/fmt.h> */ -# if defined(NDEBUG) -# define u8c_dbgprint(...) ((void)0x0) -# else -# include <u8c/println.h> -# include <stdio.h> -# define u8c_dbgprint(...) u8c_println(stderr,__VA_ARGS__) -# endif -\f[R] -.fi -.SH DESCRIPTION -.PP -The function-like macro \f[B]u8c_dbgprint\f[R] passes it\[cq]s input to \f[B]u8c_println\f[R] (if the \f[B]NDEBUG\f[R] macro is defined, nothing is done). -.SH VERSION -.PP -u8c 0 diff --git a/docs/u8c_end.3 b/docs/u8c_end.3 deleted file mode 100644 index 593af14..0000000 --- a/docs/u8c_end.3 +++ /dev/null @@ -1,25 +0,0 @@ -.TH "u8c_end" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_end - End - Finalise u8c and clean up. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/main.h> */ -# include <stdbool.h> -extern bool u8c_end(void); -\f[R] -.fi -.SH DESCRIPTION -.PP -The function \f[B]u8c_end\f[R] ends the current u8c session. -.PP -If \f[B]u8c_end\f[R] is called before \f[B]u8c_init\f[R], \f[B]false\f[R] is returned, and nothing has happened. -.PP -If it is called after it has already been called, unless \f[B]u8c_init\f[R] has been called in the meantime, \f[B]false\f[R] is returned, and nothing has happened. -.PP -Even if \f[B]u8c_thrdsafe\f[R] evaluates to \f[B]true\f[R], this function is never thread-safe. -.SH VERSION -.PP -u8c 2 diff --git a/docs/u8c_errhandltyp.3 b/docs/u8c_errhandltyp.3 deleted file mode 100644 index 0477d2c..0000000 --- a/docs/u8c_errhandltyp.3 +++ /dev/null @@ -1,21 +0,0 @@ -.TH "u8c_errhandltyp" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_errhandltyp - Error handler type - Type to be used for error handlers passerd to \f[B]u8c_regerrhandl\f[R]. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/err.h> */ -# include <u8c/errtyp.h> -typedef void (* u8c_errhandltyp)(enum u8c_errtyp); -\f[R] -.fi -.SH DESCRIPTION -.PP -The type definition \f[B]u8c_errhandltyp\f[R] is for convenience. -.PP -It is to be the type of the error handler accepted by \f[B]u8c_regerrhandl\f[R]. -.SH VERSION -.PP -u8c 19 diff --git a/docs/u8c_errtyp.3 b/docs/u8c_errtyp.3 deleted file mode 100644 index f42ac10..0000000 --- a/docs/u8c_errtyp.3 +++ /dev/null @@ -1,31 +0,0 @@ -.TH "u8c_errtyp" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_errtyp - Error type - Enumeration for specifying the type of error. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/err.h> */ -enum u8c_errtyp { - u8c_errtyp_badalloc, - u8c_errtyp_badio, - u8c_errtyp_u32oor, - u8c_errtyp_u8oor, - u8c_errtyp_deferr, - u8c_errtyp_untermin, - u8c_errtyp_maxerrtyp, - u8c_errtyp_all, -}; -\f[R] -.fi -.SH DESCRIPTION -.PP -The enumeration \f[B]u8c_errtyp\f[R] contains the various types of error to be used in u8c. -.PP -The member \f[B]u8c_errtyp_maxerrtyp\f[R] must \f[I]NEVER\f[R] be passed to any u8c function. -.PP -The member \f[B]u8c_errtyp_all\f[R] is to be passed to \f[B]u8c_regerrhandl\f[R] only. -.SH VERSION -.PP -u8c 19 diff --git a/docs/u8c_fmt.3 b/docs/u8c_fmt.3 deleted file mode 100644 index e1499fd..0000000 --- a/docs/u8c_fmt.3 +++ /dev/null @@ -1,19 +0,0 @@ -.TH "u8c_fmt" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_fmt - Format - Format UTF-32 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/fmt.h> */ -# include <stddef.h> -# include <uchar.h> -extern bool u8c_fmt(size_t * const outsz,char32_t const * * const out,char32_t const * const in,...); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 2 diff --git a/docs/u8c_fmttyp.3 b/docs/u8c_fmttyp.3 deleted file mode 100644 index 04c4391..0000000 --- a/docs/u8c_fmttyp.3 +++ /dev/null @@ -1,46 +0,0 @@ -.TH "u8c_fmttyp" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_fmttyp - Format type - Format specifier to be used by \f[C]u8c_fmt\f[R] or \f[C]u8c_vfmt\f[R]. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/fmt.h> */ -enum u8c_fmttyp { - u8c_fmttyp_bgcol, - u8c_fmttyp_bgcol0, - u8c_fmttyp_bool, - u8c_fmttyp_byt, - u8c_fmttyp_chr, - u8c_fmttyp_fgcol, - u8c_fmttyp_fgcol0, - u8c_fmttyp_int, - u8c_fmttyp_int16, - u8c_fmttyp_int32, - u8c_fmttyp_int64, - u8c_fmttyp_int8, - u8c_fmttyp_llong, - u8c_fmttyp_long, - u8c_fmttyp_sbyt, - u8c_fmttyp_shrt, - u8c_fmttyp_str, - u8c_fmttyp_sz, - u8c_fmttyp_tm, - u8c_fmttyp_ubyt, - u8c_fmttyp_uint, - u8c_fmttyp_uint16, - u8c_fmttyp_uint32, - u8c_fmttyp_uint64, - u8c_fmttyp_uint8, - u8c_fmttyp_ulong, - u8c_fmttyp_ullong, - u8c_fmttyp_ushrt, -}; -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 0 diff --git a/docs/u8c_geterr.3 b/docs/u8c_geterr.3 deleted file mode 100644 index e19d50d..0000000 --- a/docs/u8c_geterr.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_geterr" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_geterr - Get error - Get last error set by \f[C]u8c_seterr\f[R]. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/err.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_geterr(size_t * const sz,char32_t const * * const out); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 2 diff --git a/docs/u8c_init.3 b/docs/u8c_init.3 deleted file mode 100644 index 35f6346..0000000 --- a/docs/u8c_init.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_init" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_init - Initialise - Initialise and start an u8c session. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/main.h> */ -# include <stdbool.h> -# include <u8c/errtyp.h> -extern bool u8c_init(void); -\f[R] -.fi -.SH DESCRIPTION -.PP -Even if \f[B]u8c_thrdsafe\f[R] evaluates to \f[B]true\f[R], this function is never thread-safe. -.SH VERSION -.PP -u8c 2 diff --git a/docs/u8c_isalnum.3 b/docs/u8c_isalnum.3 deleted file mode 100644 index 273c673..0000000 --- a/docs/u8c_isalnum.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_isalnum" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_isalnum - Is alphanumeric - Check if a character is alphanumeric. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/is.h> */ -# include <stdbool.h> -# include <stdint.h> -# include <uchar.h> -extern bool u8c_isalnum(uint_least8_t * const res,char32_t const chr); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c X diff --git a/docs/u8c_isalpha.3 b/docs/u8c_isalpha.3 deleted file mode 100644 index a063974..0000000 --- a/docs/u8c_isalpha.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_isalpha" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_isalpha - Is alphabetic - Check if a character is alphabetic. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/is.h> */ -# include <stdbool.h> -# include <stdint.h> -# include <uchar.h> -extern bool u8c_isalpha(uint_least8_t * const res,char32_t const chr); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c X diff --git a/docs/u8c_iscntrl.3 b/docs/u8c_iscntrl.3 deleted file mode 100644 index b4fa794..0000000 --- a/docs/u8c_iscntrl.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_iscntrl" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_iscntrl - Is control - Check if a character is a control character. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/is.h> */ -# include <stdbool.h> -# include <stdint.h> -# include <uchar.h> -extern bool u8c_iscntrl(uint_least8_t * const res,char32_t const chr); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c X diff --git a/docs/u8c_isdigit.3 b/docs/u8c_isdigit.3 deleted file mode 100644 index 0e93230..0000000 --- a/docs/u8c_isdigit.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_isdigit" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_isdigit - Is digit - Check if a character is a dozenal digit. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/is.h> */ -# include <stdbool.h> -# include <stdint.h> -# include <uchar.h> -extern bool u8c_isdigit(uint_least8_t * const res,char32_t const chr); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c X diff --git a/docs/u8c_ispunct.3 b/docs/u8c_ispunct.3 deleted file mode 100644 index db9878e..0000000 --- a/docs/u8c_ispunct.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_ispunct" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_ispunct - Is punctuation - Check if a character is a punctuation mark. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/is.h> */ -# include <stdbool.h> -# include <stdint.h> -# include <uchar.h> -extern bool u8c_ispunct(uint_least8_t * const res,char32_t const chr); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c X diff --git a/docs/u8c_isspace.3 b/docs/u8c_isspace.3 deleted file mode 100644 index e78395d..0000000 --- a/docs/u8c_isspace.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_isspace" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_isspace - Is space - Check if a character is an whitespace. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/is.h> */ -# include <stdbool.h> -# include <stdint.h> -# include <uchar.h> -extern bool u8c_isspace(uint_least8_t * const res,char32_t const chr); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c X diff --git a/docs/u8c_isxdigit.3 b/docs/u8c_isxdigit.3 deleted file mode 100644 index 7bb7649..0000000 --- a/docs/u8c_isxdigit.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_isxdigit" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_isxdigit - Is hexadecimal digit - Check if a character is a hexadecimal digit. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/is.h> */ -# include <stdbool.h> -# include <stdint.h> -# include <uchar.h> -extern bool u8c_isxdigit(uint_least8_t * const res,char32_t const chr); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 16 diff --git a/docs/u8c_print.3 b/docs/u8c_print.3 deleted file mode 100644 index 7b5f55f..0000000 --- a/docs/u8c_print.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_print" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_print - Print - Format UTF-32 and print it to file. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/fmt.h> */ -# include <stdbool.h> -# include <stdio.h> -# include <uchar.h> -extern bool u8c_print(FILE * fp,char32_t const * const msg,...); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 2 diff --git a/docs/u8c_println.3 b/docs/u8c_println.3 deleted file mode 100644 index 14b8131..0000000 --- a/docs/u8c_println.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_println" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_println - Print line - Format UTF-32 and print it to file (followed by a new-line). -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/fmt.h> */ -# include <stdbool.h> -# include <stdio.h> -# include <uchar.h> -extern bool u8c_println(FILE * fp,char32_t const * const msg,...); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 0 (as u8c_print), u8c 2 diff --git a/docs/u8c_regerrhandl.3 b/docs/u8c_regerrhandl.3 deleted file mode 100644 index 27be625..0000000 --- a/docs/u8c_regerrhandl.3 +++ /dev/null @@ -1,23 +0,0 @@ -.TH "u8c_regerrhandl" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_regerrhandl - Register error handler - Register error handler function to be called by \f[B]u8c_seterr\f[R]. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/err.h> */ -# include <stdbool.h> -# include <u8c/errhandltyp.h> -# include <u8c/errtyp.h> -extern bool u8c_regerrhandl(enum u8c_errtyp typ,u8c_errhandltyp errhandl); -\f[R] -.fi -.SH DESCRIPTION -.PP -Registers an error handler to be called when an error of type \f[B]typ\f[R] is detected. -.PP -If \f[B]u8c_errtyp_all\f[R] is passed at \f[B]typ\f[R], the error handler is registered for all error types. -.SH VERSION -.PP -u8c 19 diff --git a/docs/u8c_seterr.3 b/docs/u8c_seterr.3 deleted file mode 100644 index be9ed98..0000000 --- a/docs/u8c_seterr.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_seterr" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_seterr - Set error - Set error and call error handler. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/err.h> */ -# include <stdbool.h> -# include <u8c/errtyp.h> -# include <uchar.h> -extern bool u8c_seterr(char32_t const * const msg,enum u8c_errtyp _typ); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 10 diff --git a/docs/u8c_setfmt.3 b/docs/u8c_setfmt.3 deleted file mode 100644 index 65d7646..0000000 --- a/docs/u8c_setfmt.3 +++ /dev/null @@ -1,19 +0,0 @@ -.TH "u8c_setfmt" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_setfmt - Set format - Set format to be used by \f[B]u8c_fmt\f[R] and company. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/fmt.h> */ -# include <stdbool.h> -# include <stdint.h> -extern bool u8c_setfmt(uint_least8_t const base,uint_least8_t const endian); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 16 diff --git a/docs/u8c_thrdsafe.3 b/docs/u8c_thrdsafe.3 deleted file mode 100644 index 09a0e3f..0000000 --- a/docs/u8c_thrdsafe.3 +++ /dev/null @@ -1,23 +0,0 @@ -.TH "u8c_thrdsafe" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_thrdsafe - Thread-safe - Whether or not u8c is thread-safe. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/main.h> */ -# include <stdbool.h> -extern bool const u8c_thrdsafe; -\f[R] -.fi -.SH DESCRIPTION -.PP -The constant \f[B]u8c_thrdsafe\f[R] evaluates to \f[B]true\f[R] if u8c is thread-safe, that is, the following functions (that othwerise wouldn't be thread-safe) may be called from multiple threads: \f[B]u8c_geterr\f[R], \f[B]u8c_regerrhandl\f[R], \f[B]u8c_seterr\f[R], and \f[B]u8c_setfmt\f[R]. -.PP -If it evaluates to \f[B]false\f[R], the functions listed may only be called from one thread. -.PP -The functions \f[B]u8c_end\f[R] and \f[B]u8c_init\f[R] may never be called from more than one thread. -.SH VERSION -.PP -u8c 0 diff --git a/docs/u8c_u16alloc.3 b/docs/u8c_u16alloc.3 deleted file mode 100644 index 96a59a6..0000000 --- a/docs/u8c_u16alloc.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_u16alloc" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u16alloc - UTF-16 allocate - Allocate UTF-16 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u16.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u16alloc(char16_t * * const u16,size_t const sz); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 20 diff --git a/docs/u8c_u16free.3 b/docs/u8c_u16free.3 deleted file mode 100644 index 7674794..0000000 --- a/docs/u8c_u16free.3 +++ /dev/null @@ -1,19 +0,0 @@ -.TH "u8c_u16free" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u16free - UTF-16 free - Deallocate UTF-16 string and set it to \f[B]NULL\f[R]. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u16.h> */ -# include <stdbool.h> -# include <uchar.h> -extern bool u8c_u16free(char16_t const * * const u16); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 20 diff --git a/docs/u8c_u32alloc.3 b/docs/u8c_u32alloc.3 deleted file mode 100644 index 4ced722..0000000 --- a/docs/u8c_u32alloc.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_u32alloc" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32alloc - UTF-32 allocate - Allocate UTF-32 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u32alloc(char32_t * * const u32,size_t const sz); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 16 diff --git a/docs/u8c_u32cat.3 b/docs/u8c_u32cat.3 deleted file mode 100644 index 78d8a5c..0000000 --- a/docs/u8c_u32cat.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_u32cat" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32cat - UTF-32 concatenate - Concatenate two UTF-32 strings. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u32cat(size_t * const sz,char32_t const * * const out,char32_t const * const lstr,char32_t const * const rstr); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 16 diff --git a/docs/u8c_u32cmp.3 b/docs/u8c_u32cmp.3 deleted file mode 100644 index 2954a8d..0000000 --- a/docs/u8c_u32cmp.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_u32cmp" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32cmp - UTF-32 compare - Compare two UTF-32 strings. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <stdbool.h> -# include <stdint.h> -# include <uchar.h> -extern bool u8c_u32cmp(uint_least8_t * const res,char32_t const * const lstr,char32_t const * const rstr); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 9 diff --git a/docs/u8c_u32cp.3 b/docs/u8c_u32cp.3 deleted file mode 100644 index 507e36e..0000000 --- a/docs/u8c_u32cp.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_u32cp" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32cp - UTF-32 copy - Copy an UTF-32 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u32cp(size_t * const sz,char32_t const * * const out,char32_t const * const in); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 2 diff --git a/docs/u8c_u32fndchr.3 b/docs/u8c_u32fndchr.3 deleted file mode 100644 index 566985e..0000000 --- a/docs/u8c_u32fndchr.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_u32fndchr" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32fndchr - UTF-32 find character - Find the first occurence of an UTF-32 character in an UTF-32 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u32fndchr(size_t * const pos,char32_t const * const in,char32_t const chr); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 16 diff --git a/docs/u8c_u32fndpat.3 b/docs/u8c_u32fndpat.3 deleted file mode 100644 index b74da9b..0000000 --- a/docs/u8c_u32fndpat.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_u32fndpat" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32fndpat - UTF-32 find pattern - Find the first occurence of an UTF-32 pattern (string) in an UTF-32 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u32fndpat(size_t * const pos,char32_t const * const in,char32_t const * const pat); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 16 diff --git a/docs/u8c_u32free.3 b/docs/u8c_u32free.3 deleted file mode 100644 index 0dd58f1..0000000 --- a/docs/u8c_u32free.3 +++ /dev/null @@ -1,19 +0,0 @@ -.TH "u8c_u32free" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32free - UTF-32 free - Deallocate UTF-32 string and set it to \f[B]NULL\f[R]. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <stdbool.h> -# include <uchar.h> -extern bool u8c_u32free(char32_t const * * const u32); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 14 diff --git a/docs/u8c_u32ins.3 b/docs/u8c_u32ins.3 deleted file mode 100644 index 8dc54c7..0000000 --- a/docs/u8c_u32ins.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_u32ins" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32ins - UTF-32 insert - Insert an UTF-32 string into another UTF-32 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u32ins(size_t * const sz,char32_t const * * const out,size_t const pos,char32_t const * const str0,char32_t const * const str1); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 19 diff --git a/docs/u8c_u32max.3 b/docs/u8c_u32max.3 deleted file mode 100644 index fdb7270..0000000 --- a/docs/u8c_u32max.3 +++ /dev/null @@ -1,18 +0,0 @@ -.TH "u8c_u32max" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32max - UTF-32 maximum - Maximum valid UTF-32 value. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <uchar.h> -static char32_t const u8c_u32max = U'\x10FFFF'; -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 16 (as \f[B]u8c_unimax\f[R]), u8c 21 diff --git a/docs/u8c_u32substr.3 b/docs/u8c_u32substr.3 deleted file mode 100644 index 8615447..0000000 --- a/docs/u8c_u32substr.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_u32substr" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32substr - UTF-32 sub-string - Get sub-string of an UTF-32 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u32substr(char32_t const * * const out,size_t const start,size_t const len,char32_t const * const in); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 16 diff --git a/docs/u8c_u32sz.3 b/docs/u8c_u32sz.3 deleted file mode 100644 index 61254b7..0000000 --- a/docs/u8c_u32sz.3 +++ /dev/null @@ -1,20 +0,0 @@ -.TH "u8c_u32sz" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u32sz - UTF-32 size - Get the size of an UTF-32 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u32.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u32sz(size_t * sz,char32_t const * in); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 2 diff --git a/docs/u8c_u8alloc.3 b/docs/u8c_u8alloc.3 deleted file mode 100644 index ab0cf15..0000000 --- a/docs/u8c_u8alloc.3 +++ /dev/null @@ -1,19 +0,0 @@ -.TH "u8c_u8alloc" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u8alloc - UTF-8 allocate - Allocate UTF-8 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u8.h> */ -# include <stdbool.h> -# include <stddef.h> -extern bool u8c_u8alloc(unsigned char * * const u32,size_t const sz); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 16 diff --git a/docs/u8c_u8dec.3 b/docs/u8c_u8dec.3 deleted file mode 100644 index e4f7fc8..0000000 --- a/docs/u8c_u8dec.3 +++ /dev/null @@ -1,28 +0,0 @@ -.\" Automatically generated by Pandoc 2.14.0.2 -.\" -.TH "" "" "" "" "" -.hy -.SH NAME -.PP -u8c_u8dec - UTF-8 decode - Convert an UTF-8 string to UTF-32. -.SH DECLARATION -.IP -.nf -\f[C] -/* # include <u8c/u8.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u8dec(size_t * const sz,char32_t const * * const out,unsigned char const * const in); -\f[R] -.fi -.SH DESCRIPTION -.PP -The function \f[C]u8c_u8dec\f[R] converts the given UTF-8 string -(\f[I]in\f[R]) to UTF-32 (\f[I]out\f[R]). -.PP -The size of the output string (excluding the null-terminator) is placed -into \f[I]sz\f[R], if [\f[I]sz\f[R]] is not equal to \f[I]NULL\f[R]. -.SH VERSION -.PP -u8c 0 diff --git a/docs/u8c_u8enc.3 b/docs/u8c_u8enc.3 deleted file mode 100644 index d6fc549..0000000 --- a/docs/u8c_u8enc.3 +++ /dev/null @@ -1,28 +0,0 @@ -.\" Automatically generated by Pandoc 2.14.0.2 -.\" -.TH "" "" "" "" "" -.hy -.SH NAME -.PP -u8c_u8enc - UTF-8 encode - Convert an UTF-32 string to UTF-8. -.SH DECLARATION -.IP -.nf -\f[C] -/* # include <u8c/u8.h> */ -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_u8enc(size_t * const sz,unsigned char const * * const out,char32_t const * const in); -\f[R] -.fi -.SH DESCRIPTION -.PP -The function \f[C]u8c_u8enc\f[R] converts the given UTF-32 string -(\f[B]in\f[R]) to UTF-8 (\f[B]out\f[R]). -.PP -The size of the output string (excluding the null-terminator) is placed -into \f[B]sz\f[R], if [\f[B]sz\f[R]] is not equal to \f[I]NULL\f[R]. -.SH VERSION -.PP -u8c 0 diff --git a/docs/u8c_u8free.3 b/docs/u8c_u8free.3 deleted file mode 100644 index da10a20..0000000 --- a/docs/u8c_u8free.3 +++ /dev/null @@ -1,18 +0,0 @@ -.TH "u8c_u8free" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_u8free - UTF-8 free - Deallocate UTF-8 string and set it to \f[B]NULL\f[R]. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/u8.h> */ -# include <stdbool.h> -extern bool u8c_u8free(unsigned char const * * const u8); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 16 diff --git a/docs/u8c_ver.3 b/docs/u8c_ver.3 deleted file mode 100644 index c1a2dc3..0000000 --- a/docs/u8c_ver.3 +++ /dev/null @@ -1,18 +0,0 @@ -.TH "u8c_ver" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_ver - Version - Version of the u8c API. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/main.h> */ -# include <stdint.h> -static uint_least64_t const u8c_ver = /* version */; -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 0 diff --git a/docs/u8c_vfmt.3 b/docs/u8c_vfmt.3 deleted file mode 100644 index 0eff93a..0000000 --- a/docs/u8c_vfmt.3 +++ /dev/null @@ -1,21 +0,0 @@ -.TH "u8c_vfmt" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_vfmt - Variadic format - Format UTF-32 string. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/fmt.h> */ -# include <stdarg.h> -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -extern bool u8c_vfmt(size_t * const sz,char32_t const * * const out,char32_t const * const in,va_list args); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 2 diff --git a/docs/u8c_vprint.3 b/docs/u8c_vprint.3 deleted file mode 100644 index e3d58f6..0000000 --- a/docs/u8c_vprint.3 +++ /dev/null @@ -1,21 +0,0 @@ -.TH "u8c_vprint" "3" "" "u8c" "u8c API Manual" -.SH NAME -.PP -u8c_vprint - Variadic print - Format UTF-32 and print it to file. -.SH DECLARATION -.PP -.nf -\f[C] -/* # include <u8c/fmt.h> */ -# include <stdarg.h> -# include <stdbool.h> -# include <stdio.h> -# include <uchar.h> -extern bool u8c_vprint(FILE * fp,char32_t const * const msg,va_list args); -\f[R] -.fi -.SH DESCRIPTION -.PP -.SH VERSION -.PP -u8c 2 diff --git a/include/u8c/SIZE_C.h b/include/u8c/SIZE_C.h index 542ddfd..be253f3 100644 --- a/include/u8c/SIZE_C.h +++ b/include/u8c/SIZE_C.h @@ -21,7 +21,7 @@ # elif SIZE_MAX == UINT_LEAST16_MAX # define SIZE_C(val) UINT16_C(val) # elif SIZE_MAX == UINT_LEAST32_MAX -# define SIZE_C(val) UINT32s_C(val) +# define SIZE_C(val) UINT32_C(val) # elif SIZE_MAX == UINT_LEAST64_MAX # define SIZE_C(val) UINT64_C(val) # else diff --git a/include/u8c/chk.h b/include/u8c/chk.h new file mode 100644 index 0000000..03e4ff2 --- /dev/null +++ b/include/u8c/chk.h @@ -0,0 +1,76 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# if !defined(u8c_hdr_chk) +# define u8c_hdr_chk +# include <stdbool.h> +# include <uchar.h> +/* Enumerations: */ +/* Type definitions: */ +/* Structures: */ +struct u8c_isalnum_tuple { + bool res; + bool stat; +}; +struct u8c_isalpha_tuple { + bool res; + bool stat; +}; +struct u8c_iscntrl_tuple { + bool res; + bool stat; +}; +struct u8c_isdigit_tuple { + bool res; + bool stat; +}; +struct u8c_islower_tuple { + bool res; + bool stat; +}; +struct u8c_ispunct_tuple { + bool res; + bool stat; +}; +struct u8c_isspace_tuple { + bool res; + bool stat; +}; +struct u8c_issurro_tuple { + bool res; + bool stat; +}; +struct u8c_isupper_tuple { + bool res; + bool stat; +}; +struct u8c_isxdigit_tuple { + bool res; + bool stat; +}; +/* Functions: */ +extern struct u8c_isalnum_tuple u8c_isalnum( char32_t const chr); /* Is alphanumeric */ +extern struct u8c_isalpha_tuple u8c_isalpha( char32_t const chr); /* Is alphabetic */ +extern struct u8c_iscntrl_tuple u8c_iscntrl( char32_t const chr); /* Is control character */ +extern struct u8c_isdigit_tuple u8c_isdigit( char32_t const chr); /* Is digit */ +extern struct u8c_islower_tuple u8c_islower( char32_t const chr); /* Is lowercase */ +extern struct u8c_ispunct_tuple u8c_ispunct( char32_t const chr); /* Is punctuation */ +extern struct u8c_isspace_tuple u8c_isspace( char32_t const chr); /* Is space */ +extern struct u8c_issurro_tuple u8c_issurro( char32_t const chr); /* Is surrogate point */ +extern struct u8c_isupper_tuple u8c_isupper( char32_t const chr); /* Is uppercase */ +extern struct u8c_isxdigit_tuple u8c_isxdigit(char32_t const chr); /* Is hexadecimal digit */ +/* Constants & Variables: */ +/* Macros: */ +# endif diff --git a/include/u8c/err.h b/include/u8c/err.h index c6b0f9f..26d4b2f 100644 --- a/include/u8c/err.h +++ b/include/u8c/err.h @@ -20,22 +20,32 @@ # include <uchar.h> /* Enumerations: */ enum u8c_errtyp { - u8c_errtyp_badalloc, /* Bad allocation */ - u8c_errtyp_badio, /* Bad input or output */ - u8c_errtyp_u32oor, /* UTF-32 out of range */ - u8c_errtyp_u8oor, /* UTF-8 out of range */ - u8c_errtyp_deferr, /* Default error */ - u8c_errtyp_untermin, /* Unterminated input */ - u8c_errtyp_maxerrtyp, /* Maximum error type */ - u8c_errtyp_all, /* All */ + u8c_errtyp_badalloc, /* Bad allocation */ + u8c_errtyp_badio, /* Bad input or output */ + u8c_errtyp_stroor, /* UTF-32 out of range */ + u8c_errtyp_u8oor, /* UTF-8 out of range */ + u8c_errtyp_deferr, /* Default error */ + u8c_errtyp_untermin, /* Unterminated input */ + u8c_errtyp_all, /* All */ }; /* Type definitions: */ -typedef void (* u8c_errhandltyp)(enum u8c_errtyp); +typedef void (* u8c_errhandltyp)(enum u8c_errtyp); /* Error handler type */ /* Structures: */ -/* Functions */ -extern bool u8c_geterr( size_t * const sz, char32_t const * * const out); -extern bool u8c_regerrhandl(enum u8c_errtyp typ,u8c_errhandltyp errhandl); -extern bool u8c_seterr( char32_t const * const msg,enum u8c_errtyp typ); +struct u8c_geterr_tuple { + char32_t const * err; + size_t errsz; + bool stat; +}; +struct u8c_regerrhandl_tuple { + bool stat; +}; +struct u8c_seterr_tuple { + bool stat; +}; +/* Functions: */ +extern struct u8c_geterr_tuple u8c_geterr( void); /* Get error */ +extern struct u8c_regerrhandl_tuple u8c_regerrhandl(enum u8c_errtyp typ,u8c_errhandltyp errhandl); /* Register error handler */ +extern struct u8c_seterr_tuple u8c_seterr( enum u8c_errtyp typ,char32_t const * const restrict msg); /* Set error */ /* Constants & Variables: */ /* Macros: */ # endif diff --git a/include/u8c/fmt.h b/include/u8c/fmt.h index 138acd6..5ddfb02 100644 --- a/include/u8c/fmt.h +++ b/include/u8c/fmt.h @@ -54,13 +54,35 @@ enum u8c_fmttyp { }; /* Type definitions: */ /* Structures: */ -/* Functions */ -extern bool u8c_fmt( size_t * const outsz,char32_t const * * const out, char32_t const * const in,...); -extern bool u8c_print( FILE * fp, char32_t const * const msg, ...); -extern bool u8c_println(FILE * fp, char32_t const * const msg, ...); -extern bool u8c_setfmt( uint_least8_t const base, uint_least8_t const endian); -extern bool u8c_vfmt( size_t * const sz, char32_t const * * const out, char32_t const * const in,va_list args); -extern bool u8c_vprint( FILE * fp, char32_t const * const msg, va_list args); +struct u8c_fmt_tuple { + char32_t const * str; + size_t strsz; + bool stat; +}; +struct u8c_print_tuple { + bool stat; +}; +struct u8c_println_tuple { + bool stat; +}; +struct u8c_setfmt_tuple { + bool stat; +}; +struct u8c_vfmt_tuple { + char32_t const * str; + size_t strsz; + bool stat; +}; +struct u8c_vprint_tuple { + bool stat; +}; +/* Functions: */ +extern struct u8c_fmt_tuple u8c_fmt( char32_t const * const restrict in, ...); /* Format */ +extern struct u8c_print_tuple u8c_print( FILE * restrict fp, char32_t const * const restrict msg, ...); /* Print */ +extern struct u8c_println_tuple u8c_println(FILE * restrict fp, char32_t const * const restrict msg, ...); /* Print line */ +extern struct u8c_setfmt_tuple u8c_setfmt( uint_least8_t const base,bool const endian); /* Set format */ +extern struct u8c_vfmt_tuple u8c_vfmt( char32_t const * const restrict in, va_list args); /* Variadic format */ +extern struct u8c_vprint_tuple u8c_vprint( FILE * restrict fp, char32_t const * const restrict msg, va_list args); /* Variadic print */ /* Constants & Variables: */ static uint_least32_t const u8c_col_azure = UINT32_C(0x3DA9E1); static uint_least32_t const u8c_col_ash = UINT32_C(0xD2D2CC); @@ -80,6 +102,7 @@ static uint_least32_t const u8c_col_white = UINT32_C(0xF8F8F1); static uint_least32_t const u8c_col_yellow = UINT32_C(0xE1E13D); /* Macros: */ # if defined(NDEBUG) +/* Debug print */ # define u8c_dbgprint(...) ((void)0x0) # else # define u8c_dbgprint(...) u8c_println(stderr,__VA_ARGS__) diff --git a/include/u8c/intern.h b/include/u8c/intern.h index 6aeaee2..4549337 100644 --- a/include/u8c/intern.h +++ b/include/u8c/intern.h @@ -15,34 +15,35 @@ */ # if !defined(u8c_sym_dattyp) # define u8c_sym_dattyp -# include <stdalign.h> +# if defined(u8c_bethrdsafe) && defined(__STDC_NO_THREADS__) +# error u8c is set to be thread-safe, but the implementation does not support multithreading. +# endif # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/err.h> # include <uchar.h> +# include <u8c/err.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif /* Enumerations: */ /* Type definitions: */ /* Structures: */ -struct u8c_dattyp { - char32_t const * err; - u8c_errhandltyp errhandls[(size_t)u8c_errtyp_maxerrtyp]; - uint_least8_t fmtbase; - bool fmtendian; - uint_least8_t stat; +struct u8c_dattyp { /* Data type */ + char32_t const * err; /* Error */ + u8c_errhandltyp errhandls[(size_t)u8c_errtyp_all]; /* Error handlers */ + uint_least8_t fmtbase; /* Format base */ + bool fmtendian; /* Format endian */ + uint_least8_t stat; /* Status */ # if defined(u8c_bethrdsafe) - mtx_t errlock; - mtx_t errhandlslock; - mtx_t fmtlock; - mtx_t outlock; + mtx_t errhandlslock; /* Error handlers lock */ + mtx_t errlock; /* Error lock */ + mtx_t fmtlock; /* Format lock */ + mtx_t outlock; /* Output lock */ # endif }; -/* Functions */ +/* Functions: */ /* Constants & Variables: */ -extern struct u8c_dattyp u8c_dat; +extern struct u8c_dattyp u8c_dat; /* Data */ /* Macros: */ # endif diff --git a/include/u8c/is.h b/include/u8c/is.h deleted file mode 100644 index 20097cf..0000000 --- a/include/u8c/is.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(u8c_hdr_is) -# define u8c_hdr_is -# include <stdbool.h> -# include <stdint.h> -# include <uchar.h> -/* Enumerations: */ -/* Type definitions: */ -/* Structures: */ -/* Functions */ -extern bool u8c_isalnum( uint_least8_t * const res,char32_t const chr); -extern bool u8c_isalpha( uint_least8_t * const res,char32_t const chr); -extern bool u8c_iscntrl( uint_least8_t * const res,char32_t const chr); -extern bool u8c_isdigit( uint_least8_t * const res,char32_t const chr); -extern bool u8c_ispunct( uint_least8_t * const res,char32_t const chr); -extern bool u8c_isspace( uint_least8_t * const res,char32_t const chr); -extern bool u8c_isxdigit(uint_least8_t * const res,char32_t const chr); -/* Constants & Variables: */ -/* Macros: */ -# endif diff --git a/include/u8c/main.h b/include/u8c/main.h index 2a13cc0..4004550 100644 --- a/include/u8c/main.h +++ b/include/u8c/main.h @@ -13,24 +13,43 @@ If not, see <https://www.gnu.org/licenses/>. */ -/* Abort */ # if !defined(u8c_hdr_main) # define u8c_hdr_main # include <stdbool.h> +# include <stddef.h> # include <stdint.h> # include <stdnoreturn.h> # include <uchar.h> /* Enumerations: */ /* Type definitions: */ /* Structures: */ -/* Functions */ -extern noreturn bool u8c_abrtfn(char const * const fl,long const ln,char const * const fn,char const * const why); -extern bool u8c_end( void); -extern bool u8c_init( void); +struct u8c_end_tuple { + bool stat; +}; +struct u8c_init_tuple { + bool stat; +}; +struct u8c_uniblk_tuple { + char32_t const * blk; + size_t blksz; + bool stat; +}; +struct u8c_uninm_tuple { + char32_t const * nm; + size_t nmsz; + bool stat; +}; +/* Functions: */ +noreturn extern void u8c_abrtfn(char const * const restrict fl, long const ln,char const * const restrict fn,char const * const restrict why); /* Abort function */ +extern struct u8c_end_tuple u8c_end( void); /* End */ +extern struct u8c_init_tuple u8c_init( void); /* Initialise */ +extern struct u8c_uniblk_tuple u8c_uniblk(char32_t const chr); /* Unicode block */ +extern struct u8c_uninm_tuple u8c_uninm( char32_t const chr); /* Unicode name */ /* Constants & Variables: */ -extern bool const u8c_dbg; -extern bool const u8c_thrdsafe; -static uint_least64_t const u8c_ver = UINT64_C(0x19); +extern bool const u8c_dbg; /* Debug */ +extern bool const u8c_thrdsafe; /* Thread-safe */ +static char32_t const u8c_unimax = U'\U0010FFFF'; /* Unicode maximum */ +static uint_least64_t const u8c_ver = UINT64_C(0x1A); /* Version */ /* Macros: */ -# define u8c_abrt(why) u8c_abrtfn(__FILE__,(long)__LINE__,__func__,why) +# define u8c_abrt(why) u8c_abrtfn(__FILE__,(long)__LINE__,__func__,why) /* Abort */ # endif diff --git a/include/u8c/str.h b/include/u8c/str.h new file mode 100644 index 0000000..eecea26 --- /dev/null +++ b/include/u8c/str.h @@ -0,0 +1,81 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# if !defined(u8c_hdr_str) +# define u8c_hdr_str +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <uchar.h> +/* Enumerations: */ +/* Type definitions: */ +/* Structures: */ +struct u8c_stralloc_tuple { + bool stat; + char32_t * str; +}; +struct u8c_strcat_tuple { + bool stat; + char32_t const * str; + size_t strsz; +}; +struct u8c_strcmp_tuple { + bool stat; + uint_least8_t res; +}; +struct u8c_strcp_tuple { + bool stat; + char32_t const * str; + size_t strsz; +}; +struct u8c_strfndchr_tuple { + size_t pos; + bool stat; +}; +struct u8c_strfndpat_tuple { + size_t pos; + bool stat; +}; +struct u8c_strfree_tuple { + bool stat; +}; +struct u8c_strins_tuple { + bool stat; + char32_t const * str; + size_t strsz; +}; +struct u8c_strsubstr_tuple { + bool stat; + char32_t const * str; + size_t strsz; +}; +struct u8c_strsz_tuple { + bool stat; + size_t sz; +}; +/* Functions: */ +extern struct u8c_stralloc_tuple u8c_stralloc( size_t const sz); /* String allocate */ +extern struct u8c_strcat_tuple u8c_strcat( char32_t const * const restrict str, char32_t const * const rstr); /* String concatenate */ +extern struct u8c_strcmp_tuple u8c_strcmp( char32_t const * const restrict lstr,char32_t const * const restrict rstr); /* String compare */ +extern struct u8c_strcp_tuple u8c_strcp( char32_t const * const restrict in); /* String copy */ +extern struct u8c_strfndchr_tuple u8c_strfndchr(char32_t const * const restrict in, char32_t const chr); /* String find character */ +extern struct u8c_strfndpat_tuple u8c_strfndpat(char32_t const * const restrict in, char32_t const * const restrict pat); /* String find pattern */ +extern struct u8c_strfree_tuple u8c_strfree( char32_t const * const restrict str); /* String free */ +extern struct u8c_strins_tuple u8c_strins( size_t const pos, char32_t const * const restrict str0,char32_t const * const restrict str1); /* String insert */ +extern struct u8c_strsubstr_tuple u8c_strsubstr(size_t const start,size_t const len, char32_t const * const restrict in); /* String sub-string */ +extern struct u8c_strsz_tuple u8c_strsz( char32_t const * const restrict in); /* String size */ +/* Constants & Variables: */ +/* Macros: */ +# endif diff --git a/include/u8c/u16.h b/include/u8c/u16.h index 8d81784..629d39a 100644 --- a/include/u8c/u16.h +++ b/include/u8c/u16.h @@ -21,9 +21,16 @@ /* Enumerations: */ /* Type definitions: */ /* Structures: */ -/* Functions */ -extern bool u8c_u16alloc(char16_t * * const u16,size_t const sz); -extern bool u8c_u16free( char16_t const * * const u16); +struct u8c_u16alloc_tuple { + bool stat; + char16_t * u16; +}; +struct u8c_u16free_tuple { + bool stat; +}; +/* Functions: */ +extern struct u8c_u16alloc_tuple u8c_u16alloc(size_t const sz); /* UTF-16 allocate */ +extern struct u8c_u16free_tuple u8c_u16free( char16_t const * const restrict u16); /* UTF-16 free */ /* Constants & Variables: */ /* Macros: */ # endif diff --git a/include/u8c/u32.h b/include/u8c/u32.h deleted file mode 100644 index e5567e7..0000000 --- a/include/u8c/u32.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(u8c_hdr_u32) -# define u8c_hdr_u32 -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <uchar.h> -/* Enumerations: */ -/* Type definitions: */ -/* Structures: */ -/* Functions */ -extern bool u8c_u32alloc( char32_t * * const u32,size_t const sz); -extern bool u8c_u32cat( size_t * const sz, char32_t const * * const out, char32_t const * const lstr,char32_t const * const rstr); -extern bool u8c_u32cmp( uint_least8_t * const res,char32_t const * const lstr, char32_t const * const rstr); -extern bool u8c_u32cp( size_t * const sz, char32_t const * * const out, char32_t const * const in); -extern bool u8c_u32fndchr(size_t * const pos,char32_t const * const in, char32_t const chr); -extern bool u8c_u32fndpat(size_t * const pos,char32_t const * const in, char32_t const * const pat); -extern bool u8c_u32free( char32_t const * * const u32); -extern bool u8c_u32ins( size_t * const sz, char32_t const * * const out, size_t const pos, char32_t const * const str0,char32_t const * const str1); -extern bool u8c_u32substr(char32_t const * * const out,size_t const start,size_t const len, char32_t const * const in); -extern bool u8c_u32sz( size_t * sz, char32_t const * in); -/* Constants & Variables: */ -static char32_t const u8c_u32max = U'\x10FFFF'; -/* Macros: */ -# endif diff --git a/include/u8c/u8.h b/include/u8c/u8.h index f47666b..c7b6cc5 100644 --- a/include/u8c/u8.h +++ b/include/u8c/u8.h @@ -21,11 +21,28 @@ /* Enumerations: */ /* Type definitions: */ /* Structures: */ -/* Functions */ -extern bool u8c_u8alloc(unsigned char * * const u32,size_t const sz); -extern bool u8c_u8dec( size_t * const sz, char32_t const * * const out,unsigned char const * const in); -extern bool u8c_u8enc( size_t * const sz, unsigned char const * * const out,char32_t const * const in); -extern bool u8c_u8free( unsigned char const * * const u8); +struct u8c_u8alloc_tuple { + bool stat; + unsigned char * u8; +}; +struct u8c_u8dec_tuple { + bool stat; + char32_t const * str; + size_t strsz; +}; +struct u8c_u8enc_tuple { + bool stat; + unsigned char const * u8; + size_t u8sz; +}; +struct u8c_u8free_tuple { + bool stat; +}; +/* Functions: */ +extern struct u8c_u8alloc_tuple u8c_u8alloc(size_t const sz); /* UTF-8 allocate */ +extern struct u8c_u8dec_tuple u8c_u8dec( unsigned char const * const restrict u8); /* UTF-8 decode */ +extern struct u8c_u8enc_tuple u8c_u8enc( char32_t const * const restrict u8); /* UTF-8 encode */ +extern struct u8c_u8free_tuple u8c_u8free( unsigned char const * const restrict u8); /* UTF-8 free */ /* Constants & Variables: */ /* Macros: */ # endif diff --git a/src/u8c/is.h.d/isalnum.c b/src/u8c/chk.h.d/isalnum.c index 13834c3..2c8115b 100644 --- a/src/u8c/is.h.d/isalnum.c +++ b/src/u8c/chk.h.d/isalnum.c @@ -13,20 +13,14 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/is.h> -bool u8c_isalnum(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); - uint_least8_t res = UINT8_C(0x0); - u8c_isalpha(&res,_chr); - if(res) { - *_res = res; - return false; - } - u8c_isdigit(&res,_chr); - *_res = res; - return false; +# include <u8c/chk.h> +struct u8c_isalnum_tuple u8c_isalnum(char32_t const _chr) { + struct u8c_isalnum_tuple ret = { + .stat = false, + }; + ret.res = u8c_isalpha(_chr).res || u8c_isdigit(_chr).res; + return ret; } diff --git a/src/u8c/chk.h.d/isalpha.c b/src/u8c/chk.h.d/isalpha.c new file mode 100644 index 0000000..2631095 --- /dev/null +++ b/src/u8c/chk.h.d/isalpha.c @@ -0,0 +1,109 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <u8c/chk.h> +struct u8c_isalpha_tuple u8c_isalpha(char32_t const _chr) { + struct u8c_isalpha_tuple ret = { + .stat = false, + }; + ret.res = u8c_islower(_chr).res || u8c_isupper(_chr).res; + if(ret.res) { + return ret; + } + switch(_chr) { + case U'\u0297': /* LATIN LETTER GLOTTAL STOP */ + case U'\u16A0': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A1': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A2': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A3': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A4': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A5': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A6': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A7': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A8': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A9': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AA': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AB': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AC': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AD': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AE': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AF': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B0': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B1': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B2': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B3': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B4': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B5': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B6': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B7': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B8': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B9': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BA': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BB': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BC': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BD': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BE': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BF': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C0': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C1': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C2': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C3': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C4': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C5': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C6': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C7': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C8': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C9': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CA': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CB': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CC': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CD': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CE': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CF': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D0': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D1': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D2': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D3': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D4': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D5': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D6': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D7': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D8': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D9': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DA': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DB': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DC': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DD': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DE': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DF': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E0': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E1': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E2': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E3': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E4': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E5': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E6': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E7': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E8': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E9': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16EA': /* RUNIC LETTER FEHU FEOH FE F */ + ret.res = true; + break; + } + return ret; +} diff --git a/src/u8c/chk.h.d/iscntrl.c b/src/u8c/chk.h.d/iscntrl.c new file mode 100644 index 0000000..c8532fc --- /dev/null +++ b/src/u8c/chk.h.d/iscntrl.c @@ -0,0 +1,104 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <u8c/chk.h> +struct u8c_iscntrl_tuple u8c_iscntrl(char32_t const _chr) { + struct u8c_iscntrl_tuple ret = { + .stat = false, + }; + switch(_chr) { + default: + ret.res = false; + break; + case U'\x0': /* NULL */ + case U'\x1': /* START OF HEADING */ + case U'\x2': /* START OF TEXT */ + case U'\x3': /* END OF TEXT */ + case U'\x4': /* END OF TRANSMISSION */ + case U'\x5': /* ENQUIRY */ + case U'\x6': /* ACKNOWLEDGE */ + case U'\a': /* BELL */ + case U'\b': /* BACKSPACE */ + case U'\t': /* HORIZONTAL TABULATION */ + case U'\n': /* NEW LINE */ + case U'\v': /* VERTICAL TABULATION */ + case U'\f': /* FORM FEED */ + case U'\r': /* CARRIAGE RETURN */ + case U'\xE': /* SHIFT OUT */ + case U'\xF': /* SHIFT IN */ + case U'\x10': /* DATA LINK ESCAPE */ + case U'\x11': /* DEVICE CONTROL ONE */ + case U'\x12': /* DEVICE CONTROL TWO */ + case U'\x13': /* DEVICE CONTROL THREE */ + case U'\x14': /* DEVICE CONTROL FOUR */ + case U'\x15': /* NEGATIVE ACKNOWLEDGE */ + case U'\x16': /* SYNCHRONOUS IDLE */ + case U'\x17': /* END OF TRANSMISSION BLOCK */ + case U'\x18': /* CANCEL */ + case U'\x19': /* END OF MEDIUM */ + case U'\x1A': /* SUBSTITUTE */ + case U'\x1B': /* ESCAPE */ + case U'\x1C': /* FILE SEPERATOR */ + case U'\x1D': /* GROUP SEPERATOR */ + case U'\x1E': /* RECORD SEPERATOR */ + case U'\x1F': /* UNIT SEPERATOR */ + case U'\x7F': /* DELETE */ + case U'\x80': /* <CONTROL> */ + case U'\x81': /* <CONTROL */ + case U'\x82': /* BREAK PERMITTED HERE */ + case U'\x83': /* NO BREAK HERE */ + case U'\x84': /* <CONTROL> */ + case U'\x85': /* NEXT LINE */ + case U'\x86': /* START OF SELECTED AREA */ + case U'\x87': /* END OF SELECTED AREA */ + case U'\x88': /* CHARACTER TABULATION SET */ + case U'\x89': /* CHARACTER TABULATION SET WITH JUSTIFICATION */ + case U'\x8A': /* LINE TABULATION SET */ + case U'\x8B': /* PARTIAL LINE FORWARD */ + case U'\x8C': /* PARTIAL LINE BACKWARD */ + case U'\x8D': /* REVERSE LINE FEED */ + case U'\x8E': /* SINGLE SHIFT TWO */ + case U'\x8F': /* SINGLE SHIFT THREE */ + case U'\x90': /* DEVICE CONTROL STRING */ + case U'\x91': /* PRIVATE USE ONE */ + case U'\x92': /* PRIVATE USE TWO */ + case U'\x93': /* SET TRANSMIT STATE */ + case U'\x94': /* CANCEL CHARACTER */ + case U'\x95': /* MESSAGE WAITING */ + case U'\x96': /* START OF GUARDED AREA */ + case U'\x97': /* END OF GUARDED AREA */ + case U'\x98': /* START OF STRING */ + case U'\x99': /* <CONTROL> */ + case U'\x9A': /* SINGLE CHARACTER INTRODUCER */ + case U'\x9B': /* CONTROL SEQUENCE INTRODUCER */ + case U'\x9C': /* STRING TERMINATOR */ + case U'\x9D': /* OPERATING SYSTEM COMMAND */ + case U'\x9E': /* PRIVACY MESSAGE */ + case U'\x9F': /* APPLICATION PROGRAM COMMAND */ + case U'\xA0': /* NO-BREAK SPACE */ + case U'\u2028': /* LINE SEPERATOR */ + case U'\u2029': /* PARAGRAPH SEPERATOR */ + case U'\u202D': /* LEFT-TO-RIGHT OVERRIDE */ + case U'\u202E': /* RIGHT-TO-LEFT OVERRIDE */ + case U'\u2068': /* FIRST STRONG ISOLATE */ + case U'\u2069': /* POP DIRECTIONAL ISOLATE */ + ret.res = true; + break; + } + return ret; +} diff --git a/src/u8c/is.h.d/isdigit.c b/src/u8c/chk.h.d/isdigit.c index 8b799d9..61665cf 100644 --- a/src/u8c/is.h.d/isdigit.c +++ b/src/u8c/chk.h.d/isdigit.c @@ -13,16 +13,17 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/is.h> -bool u8c_isdigit(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); +# include <u8c/chk.h> +struct u8c_isdigit_tuple u8c_isdigit(char32_t const _chr) { + struct u8c_isdigit_tuple ret = { + .stat = false, + }; switch(_chr) { default: - *_res = UINT8_C(0x0); + ret.res = false; break; case U'0': /* DIGIT ZERO */ case U'1': /* DIGIT ONE */ @@ -36,8 +37,8 @@ bool u8c_isdigit(uint_least8_t * const _res,char32_t const _chr) { case U'9': /* DIGIT NINE */ case U'\u218A': /* TURNED DIGIT TWO */ case U'\u218B': /* TURNED DIGIT THREE */ - *_res = UINT8_C(0x1); + ret.res = true; break; } - return false; + return ret; } diff --git a/src/u8c/chk.h.d/islower.c b/src/u8c/chk.h.d/islower.c new file mode 100644 index 0000000..0cb3ea1 --- /dev/null +++ b/src/u8c/chk.h.d/islower.c @@ -0,0 +1,187 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <u8c/chk.h> +struct u8c_islower_tuple u8c_islower(char32_t const _chr) { + struct u8c_islower_tuple ret = { + .stat = false, + }; + switch(_chr) { + default: + ret.res = false; + break; + case U'a': /* LATIN SMALL LETTER A */ + case U'b': /* LATIN SMALL LETTER B */ + case U'c': /* LATIN SMALL LETTER C */ + case U'd': /* LATIN SMALL LETTER D */ + case U'e': /* LATIN SMALL LETTER E */ + case U'f': /* LATIN SMALL LETTER F */ + case U'g': /* LATIN SMALL LETTER G */ + case U'h': /* LATIN SMALL LETTER H */ + case U'i': /* LATIN SMALL LETTER I */ + case U'j': /* LATIN SMALL LETTER J */ + case U'k': /* LATIN SMALL LETTER K */ + case U'l': /* LATIN SMALL LETTER L */ + case U'm': /* LATIN SMALL LETTER M */ + case U'n': /* LATIN SMALL LETTER N */ + case U'o': /* LATIN SMALL LETTER O */ + case U'p': /* LATIN SMALL LETTER P */ + case U'q': /* LATIN SMALL LETTER Q */ + case U'r': /* LATIN SMALL LETTER R */ + case U's': /* LATIN SMALL LETTER S */ + case U't': /* LATIN SMALL LETTER T */ + case U'u': /* LATIN SMALL LETTER U */ + case U'v': /* LATIN SMALL LETTER V */ + case U'w': /* LATIN SMALL LETTER W */ + case U'x': /* LATIN SMALL LETTER X */ + case U'y': /* LATIN SMALL LETTER Y */ + case U'z': /* LATIN SMALL LETTER Z */ + case U'\u00DF': /* LATIN SMALL LETTER SHARP S */ + case U'\u00E0': /* LATIN SMALL LETTER A WITH GRAVE */ + case U'\u00E1': /* LATIN SMALL LETTER A WITH ACUTE */ + case U'\u00E2': /* LATIN SMALL LETTER A WITH CIRCUMFLEX */ + case U'\u00E3': /* LATIN SMALL LETTER A WITH TILDE */ + case U'\u00E4': /* LATIN SMALL LETTER A WITH DIAERESIS */ + case U'\u00E5': /* LATIN SMALL LETTER A WITH RING ABOVE */ + case U'\u00E6': /* LATIN SMALL LETTER AE */ + case U'\u00E7': /* LATIN SMALL LETTER C WITH CEDILLA */ + case U'\u00E8': /* LATIN SMALL LETTER E WITH GRAVE */ + case U'\u00E9': /* LATIN SMALL LETTER E WITH ACUTE */ + case U'\u00EA': /* LATIN SMALL LETTER E WITH CIRCUMFLEX */ + case U'\u00EB': /* LATIN SMALL LETTER E WITH DIAERESIS */ + case U'\u00EC': /* LATIN SMALL LETTER I WITH GRAVE */ + case U'\u00ED': /* LATIN SMALL LETTER I WITH ACUTE */ + case U'\u00EE': /* LATIN SMALL LETTER I WITH CIRCUMFLEX */ + case U'\u00EF': /* LATIN SMALL LETTER I WITH DIAERESIS */ + case U'\u00F0': /* LATIN SMALL LETTER ETH */ + case U'\u00F1': /* LATIN SMALL LETTER N WITH TILDE */ + case U'\u00F2': /* LATIN SMALL LETTER O WITH GRAVE */ + case U'\u00F3': /* LATIN SMALL LETTER O WITH ACUTE */ + case U'\u00F4': /* LATIN SMALL LETTER O WITH CIRCUMFLEX */ + case U'\u00F5': /* LATIN SMALL LETTER O WITH TILDE */ + case U'\u00F6': /* LATIN SMALL LETTER O WITH DIAERESIS */ + case U'\u00F8': /* LATIN SMALL LETTER O WITH STROKE */ + case U'\u00F9': /* LATIN SMALL LETTER U WITH GRAVE */ + case U'\u00FA': /* LATIN SMALL LETTER U WITH ACUTE */ + case U'\u00FB': /* LATIN SMALL LETTER U WITH CIRCUMFLEX */ + case U'\u00FC': /* U WITH TWO DOTS */ + case U'\u00FD': /* LATIN SMALL LETTER Y WITH ACUTE */ + case U'\u00FE': /* LATIN SMALL LETTER THORN */ + case U'\u00FF': /* LATIN SMALL LETTER Y WITH DIAERESIS */ + case U'\u0105': /* LATIN SMALL LETTER A WITH OGONEK */ + case U'\u0107': /* LATIN SMALL LETTER C WITH ACUTE */ + case U'\u010D': /* LATIN SMALL LETTER C WITH CARON */ + case U'\u010F': /* LATIN SMALL LETTER D WITH CARON */ + case U'\u0119': /* LATIN SMALL LETTER E WITH OGONEK */ + case U'\u011B': /* LATIN SMALL LETTER E WITH CARON */ + case U'\u011F': /* LATIN SMALL LETTER G WITH BREVE */ + case U'\u0131': /* LATIN SMALL LETTER DOTLESS I */ + case U'\u0133': /* LATIN SMALL LIGATURE LJ */ + case U'\u0138': /* LATIN SMALL LETTER KRA */ + case U'\u0142': /* LATIN SMALL LETTER L WITH STROKE */ + case U'\u0144': /* LATIN SMALL LETTER N WITH ACUTE */ + case U'\u0148': /* LATIN SMALL LETTER N WITH CARON */ + case U'\u014B': /* LATIN SMALL LETTER ENG */ + case U'\u0153': /* LATIN SMALL LIGATURE OE */ + case U'\u0159': /* LATIN SMALL LETTER R WITH CARON */ + case U'\u015B': /* LATIN SMALL LETTER S WITH ACUTE */ + case U'\u015F': /* LATIN SMALL LETTER S WITH CEDILLA */ + case U'\u0161': /* LATIN SMALL LETTER S WITH CARON */ + case U'\u0165': /* LATIN SMALL LETTER T WITH CARON */ + case U'\u016F': /* LATIN SMALL LETTER U WITH RING ABOVE */ + case U'\u017A': /* LATIN SMALL LETTER Z WITH ACUTE */ + case U'\u017C': /* LATIN SMALL LETTER Z WITH DOT ABOVE */ + case U'\u017E': /* LATIN SMALL LETTER Z WITH CARON */ + case U'\u01BF': /* LATIN LETTER WYNN */ + case U'\u01DD': /* LATIN SMALL LETTER TURNED E */ + case U'\u021D': /* LATIN SMALL LETTER YOGH */ + case U'\u0242': /* LATIN SMALL LETTER GLOTTAL STOP */ + case U'\u0250': /* LATIN SMALL LETTER TURNED A */ + case U'\u0251': /* LATIN SMALL LETTER ALPHA */ + case U'\u0252': /* LATIN SMALL LETTER TURNED ALPHA */ + case U'\u0253': /* LATIN SMALL LETTER B WITH HOOk */ + case U'\u0254': /* LATIN SMALL LETTER OPEN O */ + case U'\u0255': /* LATIN SMALL LETTER C WITH CURL */ + case U'\u0256': /* LATIN SMALL LETTER D WITH TAIL */ + case U'\u0257': /* LATIN SMALL LETTER D WITH HOOk */ + case U'\u0258': /* LATIN SMALL LETTER REVERSED E */ + case U'\u0259': /* LATIN SMALL LETTER SCHWA */ + case U'\u025A': /* LATIN SMALL LETTER SCHWA WITH HOOK */ + case U'\u025B': /* LATIN SMALL LETTER OPEN E */ + case U'\u025C': /* LATIN SMALL LETTER REVERSED OPEN E */ + case U'\u025D': /* LATIN SMALL LETTER REVERSED OPEN E WITH HOOK */ + case U'\u025E': /* LATIN SMALL LETTER CLOSED REVERSED OPEN E */ + case U'\u025F': /* LATIN SMALL LETTER DOTLESS J WITH STROKE */ + case U'\u0260': /* LATIN SMALL LETTER G WITH HOOK */ + case U'\u0261': /* LATIN SMALL LETTER SCRIPT G */ + case U'\u0262': /* LATIN LETTER SMALL CAPITAL G */ + case U'\u0263': /* LATIN SMALL LETTER GAMMA */ + case U'\u0264': /* LATIN SMALL LETTER RAMS HORN */ + case U'\u0265': /* LATIN SMALL LETTER TURNED H */ + case U'\u0266': /* LATIN SMALL LETTER H WITH HOOK */ + case U'\u0267': /* LATIN SMALL LETTER HENG WITH HOOK */ + case U'\u0268': /* LATIN SMALL LETTER I WITH STROKE */ + case U'\u0269': /* LATIN SMALL LETTER IOTA */ + case U'\u026A': /* LATIN LETTER SMALL CAPITAL I */ + case U'\u026B': /* LATIN SMALL LETTER L WITH MIDDLE TILDE */ + case U'\u026C': /* LATIN SMALL LETTER L WITH BELT */ + case U'\u026D': /* LATIN SMALL LETTER L WITH RETROFLEX HOOK */ + case U'\u026E': /* LATIN SMALL LETTER LEZH */ + case U'\u026F': /* LATIN SMALL LETTER TURNED M */ + case U'\u0270': /* LATIN SMALL LETTER TURNED M WITH LONG LEG */ + case U'\u0271': /* LATIN SMALL LETTER M WITH HOOK */ + case U'\u0272': /* LATIN SMALL LETTER N WITH LEFT HOOK */ + case U'\u0273': /* LATIN SMALL LETTER N WITH RETROFLEX HOOK */ + case U'\u0283': /* LATIN SMALL LETTER ESH */ + case U'\u028A': /* LATIN SMALL LETTER UPSILON */ + case U'\u028B': /* LATIN SMALL LETTER V WITH HOOK */ + case U'\u0292': /* LATIN SMALL LETTER EZH */ + case U'\u0294': /* LATIN SMALL LETTER GLOTTAL STOP */ + case U'\u03B1': /* GREEK SMALL LETTER ALPHA */ + case U'\u03B2': /* GREEK SMALL LETTER BETA */ + case U'\u03B3': /* GREEK SMALL LETTER GAMMA */ + case U'\u03B4': /* GREEK SMALL LETTER DELTA */ + case U'\u03B5': /* GREEK SMALL LETTER EPSILON */ + case U'\u03B6': /* GREEK SMALL LETTER ZETA */ + case U'\u03B7': /* GREEK SMALL LETTER ETA */ + case U'\u03B8': /* GREEK SMALL LETTER THETA */ + case U'\u03B9': /* GREEK SMALL LETTER IOTA */ + case U'\u03BA': /* GREEK SMALL LETTER KAPPA */ + case U'\u03BB': /* GREEK SMALL LETTER LAMBDA */ + case U'\u03BC': /* GREEK SMALL LETTER MU */ + case U'\u03BD': /* GREEK SMALL LETTER NU */ + case U'\u03BE': /* GREEK SMALL LETTER XI */ + case U'\u03BF': /* GREEK SMALL LETTER OMICRON */ + case U'\u03C0': /* GREEK SMALL LETTER PI */ + case U'\u03C1': /* GREEK SMALL LETTER RHO */ + case U'\u03C2': /* GREEK SMALL LETTER FINAL SIGMA */ + case U'\u03C3': /* GREEK SMALL LETTER SIGMA */ + case U'\u03C4': /* GREEK SMALL LETTER TAU */ + case U'\u03C5': /* GREEK SMALL LETTER UPSILON */ + case U'\u03C6': /* GREEK SMALL LETTER PHI */ + case U'\u03C7': /* GREEK SMALL LETTER CHI */ + case U'\u03C8': /* GREEK SMALL LETTER PSI */ + case U'\u03C9': /* GREEK SMALL LETTER OMEGA */ + case U'\u1D79': /* LATIN SMALL LETTER INSULAR G */ + case U'\uA7B7': /* LATIN SMALL LETTER OMEGA */ + case U'\uFB00': /* LATIN SMALL LIGATURE FF */ + ret.res = true; + break; + } + return ret; +} diff --git a/src/u8c/is.h.d/ispunct.c b/src/u8c/chk.h.d/ispunct.c index f6b041f..2d2a276 100644 --- a/src/u8c/is.h.d/ispunct.c +++ b/src/u8c/chk.h.d/ispunct.c @@ -13,16 +13,17 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/is.h> -bool u8c_ispunct(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); +# include <u8c/chk.h> +struct u8c_ispunct_tuple u8c_ispunct(char32_t const _chr) { + struct u8c_ispunct_tuple ret = { + .stat = false, + }; switch(_chr) { default: - *_res = UINT8_C(0x0); + ret.res = false; break; case U'!': /* EXCLAMATION MARK */ case U'\"': /* QUOTATION MARK */ @@ -83,6 +84,18 @@ bool u8c_ispunct(uint_least8_t * const _res,char32_t const _chr) { case U'\u201D': /* RIGHT DOUBLE QUOTATION MARK */ case U'\u2026': /* HORIZONTAL ELLIPSIS */ case U'\u2030': /* PER MILLE SIGN */ + case U'\u2031': /* PER TEN THOUSAND SIGN */ + case U'\u2032': /* PRIME */ + case U'\u2033': /* DOUBLE PRIME */ + case U'\u2034': /* TRIPLE PRIME */ + case U'\u2035': /* REVERSED PRIME */ + case U'\u2036': /* REVERSED DOUBLE PRIME */ + case U'\u2037': /* REVERSED TRIPLE PRIME */ + case U'\u203C': /* DOUBLE EXCLAMATION MARK */ + case U'\u203D': /* INTERROBANG */ + case U'\u2047': /* DOUBLE QUOTATION MARK */ + case U'\u2048': /* QUESTION EXCLAMATION MARK */ + case U'\u2049': /* EXCLAMATION QUESTION MARK */ case U'\u20A3': /* FRENCH FRANC SIGN */ case U'\u20A4': /* LIRA SIGN */ case U'\u20A8': /* RUPEE SIGN */ @@ -145,8 +158,8 @@ bool u8c_ispunct(uint_least8_t * const _res,char32_t const _chr) { case U'\U0001F16D': /* CIRCLED CC */ case U'\U0001F16E': /* CIRCLED C WITH OVERLAID BACKSLASH */ case U'\U0001F16F': /* CIRCLED HUMAN FIGURE */ - *_res = UINT8_C(0x1); + ret.res = true; break; } - return false; + return ret; } diff --git a/src/u8c/is.h.d/isspace.c b/src/u8c/chk.h.d/isspace.c index 9473476..478e7a7 100644 --- a/src/u8c/is.h.d/isspace.c +++ b/src/u8c/chk.h.d/isspace.c @@ -13,16 +13,17 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/is.h> -bool u8c_isspace(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); +# include <u8c/chk.h> +struct u8c_isspace_tuple u8c_isspace(char32_t const _chr) { + struct u8c_isspace_tuple ret = { + .stat = false, + }; switch(_chr) { default: - *_res = UINT8_C(0x0); + ret.res = false; break; case U'\t': /* HORIZONTAL TABULATION */ case U'\n': /* NEW LINE */ @@ -30,8 +31,8 @@ bool u8c_isspace(uint_least8_t * const _res,char32_t const _chr) { case U'\f': /* FORM FEED */ case U'\r': /* CARRIAGE RETURN */ case U' ': /* SPACE */ - *_res = UINT8_C(0x1); + ret.res = true; break; } - return false; + return ret; } diff --git a/src/u8c/u32.h.d/u32sz.c b/src/u8c/chk.h.d/issurro.c index deb1ecd..e6873cd 100644 --- a/src/u8c/u32.h.d/u32sz.c +++ b/src/u8c/chk.h.d/issurro.c @@ -13,19 +13,18 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/u32.h> -# include <uchar.h> -bool u8c_u32sz(size_t * const _sz,char32_t const * const _in) { - assert(_sz != NULL); - size_t sz = SIZE_C(0x0); - if(u8c_u32fndchr(&sz,_in,UINT8_C(0x0))) { - return true; +# include <u8c/chk.h> +struct u8c_issurro_tuple u8c_issurro(char32_t const _chr) { + struct u8c_issurro_tuple ret = { + .stat = false, + }; + bool res = false; + if(_chr >= U'\xD800' && _chr <= U'\xDFFF') { + res = true; } - *_sz = sz; - return true; + ret.res = res; + return ret; } diff --git a/src/u8c/chk.h.d/isupper.c b/src/u8c/chk.h.d/isupper.c new file mode 100644 index 0000000..8c5e2ab --- /dev/null +++ b/src/u8c/chk.h.d/isupper.c @@ -0,0 +1,147 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <u8c/chk.h> +struct u8c_isupper_tuple u8c_isupper(char32_t const _chr) { + struct u8c_isupper_tuple ret = { + .stat = false, + }; + switch(_chr) { + default: + ret.res = false; + break; + case U'A': /* LATIN CAPITAL LETTER A */ + case U'B': /* LATIN CAPITAL LETTER B */ + case U'C': /* LATIN CAPITAL LETTER C */ + case U'D': /* LATIN CAPITAL LETTER D */ + case U'E': /* LATIN CAPITAL LETTER E */ + case U'F': /* LATIN CAPITAL LETTER F */ + case U'G': /* LATIN CAPITAL LETTER G */ + case U'H': /* LATIN CAPITAL LETTER H */ + case U'I': /* LATIN CAPITAL LETTER I */ + case U'J': /* LATIN CAPITAL LETTER J */ + case U'K': /* LATIN CAPITAL LETTER K */ + case U'L': /* LATIN CAPITAL LETTER L */ + case U'M': /* LATIN CAPITAL LETTER M */ + case U'N': /* LATIN CAPITAL LETTER N */ + case U'O': /* LATIN CAPITAL LETTER O */ + case U'P': /* LATIN CAPITAL LETTER P */ + case U'Q': /* LATIN CAPITAL LETTER Q */ + case U'R': /* LATIN CAPITAL LETTER R */ + case U'S': /* LATIN CAPITAL LETTER S */ + case U'T': /* LATIN CAPITAL LETTER T */ + case U'U': /* LATIN CAPITAL LETTER U */ + case U'V': /* LATIN CAPITAL LETTER V */ + case U'X': /* LATIN CAPITAL LETTER Y */ + case U'W': /* LATIN CAPITAL LETTER X */ + case U'Y': /* LATIN CAPITAL LETTER Y */ + case U'Z': /* LATIN CAPITAL LETTER Z */ + case U'\u00C0': /* LATIN CAPITAL LETTER A WITH GRAVE */ + case U'\u00C1': /* LATIN CAPITAL LETTER A WITH ACUTE */ + case U'\u00C2': /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ + case U'\u00C3': /* LATIN CAPITAL LETTER A WITH TILDE */ + case U'\u00C4': /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + case U'\u00C5': /* LATIN CAPITAL LETTER A WITH RING ABOVE */ + case U'\u00C6': /* LATIN CAPITAL LETTER AE */ + case U'\u00C7': /* LATIN CAPITAL LETTER C WITH CEDILLA */ + case U'\u00C8': /* LATIN CAPITAL LETTER E WITH GRAVE */ + case U'\u00C9': /* LATIN CAPITAL LETTER E WITH ACUTE */ + case U'\u00CA': /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */ + case U'\u00CB': /* LATIN CAPITAL LETTER E WITH DIAERESIS */ + case U'\u00CC': /* LATIN CAPITAL LETTER I WITH GRAVE */ + case U'\u00CD': /* LATIN CAPITAL LETTER I WITH ACUTE */ + case U'\u00CE': /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */ + case U'\u00CF': /* LATIN CAPITAL LETTER I WITH DIAERESIS */ + case U'\u00D0': /* LATIN CAPITAL LETTER ETH */ + case U'\u00D1': /* LATIN CAPITAL LETTER N WITH TILDE */ + case U'\u00D2': /* LATIN CAPITAL LETTER O WITH GRAVE */ + case U'\u00D3': /* LATIN CAPITAL LETTER O WITH ACUTE */ + case U'\u00D4': /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */ + case U'\u00D5': /* LATIN CAPITAL LETTER O WITH TILDE */ + case U'\u00D6': /* LATIN CAPITAL LETTER O WITH DIAERESIS */ + case U'\u00D8': /* LATIN CAPITAL LETTER O WITH STROKE */ + case U'\u00D9': /* LATIN CAPITAL LETTER U WITH GRAVE */ + case U'\u00DA': /* LATIN CAPITAL LETTER U WITH STROKE */ + case U'\u00DB': /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */ + case U'\u00DC': /* LATIN CAPITAL LETTER U WITH DIAERESIS */ + case U'\u00DD': /* LATIN CAPITAL LETTER Y WITH ACUTE */ + case U'\u00DE': /* LATIN CAPITAL LETTER THORN */ + case U'\u0100': /* LATIN CAPITAL LETTER A WITH MACRON */ + case U'\u0102': /* LATIN CAPITAL LETTER A WITH BREVE */ + case U'\u0104': /* LATIN CAPITAL LETTER A WITH OGONEK */ + case U'\u0106': /* LATIN CAPITAL LETTER C WITH ACUTE */ + case U'\u0108': /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX */ + case U'\u010A': /* LATIN CAPITAL LETTER C WITH DOT ABOVE */ + case U'\u010C': /* LATIN CAPITAL LETTER C WITH CARON */ + case U'\u010E': /* LATIN CAPITAL LETTER D WITH CARON */ + case U'\u0110': /* LATIN CAPITAL LETTER D WITH STROKE */ + case U'\u0112': /* LATIN CAPITAL LETTER E WITH MACRON */ + case U'\u0114': /* LATIN CAPITAL LETTER E WITH BREVE */ + case U'\u0116': /* LATIN CAPITAL LETTER E WITH DOT ABOVE */ + case U'\u0118': /* LATIN CAPITAL LETTER E WITH OGONEK */ + case U'\u011A': /* LATIN CAPITAL LETTER E WITH CARON */ + case U'\u011C': /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX */ + case U'\u014A': /* LATIN CAPITAL LETTER ENG */ + case U'\u0152': /* LATIN CAPITAL LIGATURE OE */ + case U'\u0186': /* LATIN CAPITAL LETTER OPEN O */ + case U'\u018E': /* LATIN CAPITAL LETTER REVERSED E */ + case U'\u018F': /* LATIN CAPITAL LETTER SCHWA */ + case U'\u0190': /* LATIN CAPITAL LETTER OPEN E */ + case U'\u0194': /* LATIN CAPITAL LETTER GAMMA */ + case U'\u0196': /* LATIN CAPITAL LETTER IOTA */ + case U'\u01A9': /* LATIN CAPITAL LETTER ESH */ + case U'\u01B1': /* LATIN CAPITAL LETTER UPSILON */ + case U'\u01B2': /* LATIN CAPITAL LETTER V WITH HOOk */ + case U'\u01B7': /* LATIN CAPITAL LETTER EZH */ + case U'\u01F7': /* LATIN CAPITAL LETTER WYNN */ + case U'\u021C': /* LATIN CAPITAL LETTER YOGH */ + case U'\u0241': /* LATIN CAPITAL LETTER GLOTTAL STOP */ + case U'\u0391': /* GREEK CAPITAL LETTER ALPHA */ + case U'\u0392': /* GREEK CAPITAL LETTER BETA */ + case U'\u0393': /* GREEK CAPITAL LETTER GAMMA */ + case U'\u0394': /* GREEK CAPITAL LETTER DELTA */ + case U'\u0395': /* GREEK CAPITAL LETTER EPSILON */ + case U'\u0396': /* GREEK CAPITAL LETTER ZETA */ + case U'\u0397': /* GREEK CAPITAL LETTER ETA */ + case U'\u0398': /* GREEK CAPITAL LETTER THETA */ + case U'\u0399': /* GREEK CAPITAL LETTER IOTA */ + case U'\u039A': /* GREEK CAPITAL LETTER KAPPA */ + case U'\u039B': /* GREEK CAPITAL LETTER LAMBDA */ + case U'\u039C': /* GREEK CAPITAL LETTER MU */ + case U'\u039D': /* GREEK CAPITAL LETTER NU */ + case U'\u039E': /* GREEK CAPITAL LETTER XI */ + case U'\u039F': /* GREEK CAPITAL LETTER OMICRON */ + case U'\u03A0': /* GREEK CAPITAL LETTER PI */ + case U'\u03A1': /* GREEK CAPITAL LETTER RHO */ + case U'\u03A3': /* GREEK CAPITAL LETTER SIGMA */ + case U'\u03A4': /* GREEK CAPITAL LETTER TAU */ + case U'\u03A5': /* GREEK CAPITAL LETTER UPSILON */ + case U'\u03A6': /* GREEK CAPITAL LETTER PHI */ + case U'\u03A7': /* GREEK CAPITAL LETTER CHI */ + case U'\u03A8': /* GREEK CAPITAL LETTER PSI */ + case U'\u03A9': /* GREEK CAPITAL LETTER OMEGA */ + case U'\u1E9E': /* LATIN CAPITAL LETTER SHARP S */ + case U'\u2C6D': /* LATIN CAPITAL LETTER ALPHA */ + case U'\uA77D': /* LATIN CAPITAL LETTER INSULAR G */ + case U'\uA7B4': /* LATIN CAPITAL LETTER BETA */ + case U'\uA7B6': /* LATIN CAPITAL LETTER OMEGA */ + ret.res = true; + break; + } + return ret; +} diff --git a/src/u8c/is.h.d/isxdigit.c b/src/u8c/chk.h.d/isxdigit.c index 4a59b0d..5100624 100644 --- a/src/u8c/is.h.d/isxdigit.c +++ b/src/u8c/chk.h.d/isxdigit.c @@ -13,16 +13,17 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/is.h> -bool u8c_isxdigit(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); +# include <u8c/chk.h> +struct u8c_isxdigit_tuple u8c_isxdigit(char32_t const _chr) { + struct u8c_isxdigit_tuple ret = { + .stat = false, + }; switch(_chr) { default: - *_res = UINT8_C(0x0); + ret.res = false; break; case U'0': /* DIGIT ZERO */ case U'1': /* DIGIT ONE */ @@ -40,8 +41,8 @@ bool u8c_isxdigit(uint_least8_t * const _res,char32_t const _chr) { case U'D': /* LATIN CAPITAL LETTER D */ case U'E': /* LATIN CAPITAL LETTER E */ case U'F': /* LATIN CAPITAL LETTER F */ - *_res = UINT8_C(0x1); + ret.res = true; break; } - return false; + return ret; } diff --git a/src/u8c/err.h.d/geterr.c b/src/u8c/err.h.d/geterr.c index 58f3612..5219348 100644 --- a/src/u8c/err.h.d/geterr.c +++ b/src/u8c/err.h.d/geterr.c @@ -18,14 +18,21 @@ # include <stdint.h> # include <u8c/err.h> # include <u8c/intern.h> -# include <u8c/u32.h> -bool u8c_geterr(size_t * const _sz,char32_t const * * const _out) { - # if defined(u8c_bethrdsafe) - mtx_lock(&u8c_dat.errlock); - # endif - u8c_u32cp(_sz,_out,u8c_dat.err); - # if defined(u8c_bethrdsafe) - mtx_unlock(&u8c_dat.errlock); - # endif - return false; +# include <u8c/str.h> +struct u8c_geterr_tuple u8c_geterr(void) { + struct u8c_geterr_tuple ret = { + .stat = false, + }; + { +# if defined(u8c_bethrdsafe) + mtx_lock(&u8c_dat.errlock); +# endif + struct u8c_strcp_tuple const tuple = u8c_strcp(u8c_dat.err); +# if defined(u8c_bethrdsafe) + mtx_unlock(&u8c_dat.errlock); +# endif + ret.err = tuple.str; + ret.errsz = tuple.strsz; } + return ret; +} diff --git a/src/u8c/err.h.d/regerrhandl.c b/src/u8c/err.h.d/regerrhandl.c index 5e97177..5ac43a5 100644 --- a/src/u8c/err.h.d/regerrhandl.c +++ b/src/u8c/err.h.d/regerrhandl.c @@ -20,12 +20,15 @@ static void u8c_regerrhandl_seterrhandl(enum u8c_errtyp _typ,u8c_errhandltyp _errhandl) { u8c_dat.errhandls[(size_t)_typ] = _errhandl; } -bool u8c_regerrhandl(enum u8c_errtyp _typ,u8c_errhandltyp _errhandl) { +struct u8c_regerrhandl_tuple u8c_regerrhandl(enum u8c_errtyp _typ,u8c_errhandltyp _errhandl) { + struct u8c_regerrhandl_tuple ret = { + .stat = false, + }; # if defined(u8c_bethrdsafe) mtx_lock(&u8c_dat.errhandlslock); # endif if(_typ == u8c_errtyp_all) { - for(register int n = 0x0;n < (int)u8c_errtyp_maxerrtyp;n += 0x1) { + for(register int n = 0x0;n < (int)u8c_errtyp_all;n += 0x1) { u8c_regerrhandl_seterrhandl((enum u8c_errtyp)n,_errhandl); } } @@ -35,5 +38,5 @@ bool u8c_regerrhandl(enum u8c_errtyp _typ,u8c_errhandltyp _errhandl) { # if defined(u8c_bethrdsafe) mtx_unlock(&u8c_dat.errhandlslock); # endif - return false; + return ret; } diff --git a/src/u8c/err.h.d/seterr.c b/src/u8c/err.h.d/seterr.c index 017e45e..89edf19 100644 --- a/src/u8c/err.h.d/seterr.c +++ b/src/u8c/err.h.d/seterr.c @@ -13,25 +13,26 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> # include <u8c/err.h> # include <u8c/fmt.h> # include <u8c/intern.h> -# include <u8c/u32.h> +# include <u8c/str.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_seterr(char32_t const * const _msg,enum u8c_errtyp _typ) { - assert(_msg != NULL); - //u8c_dbgprint(_msg); +struct u8c_seterr_tuple u8c_seterr(enum u8c_errtyp _typ,char32_t const * const restrict _msg) { + struct u8c_seterr_tuple ret = { + .stat = false, + }; + /* u8c_dbgprint(_msg); */ # if defined(u8c_bethrdsafe) mtx_lock(&u8c_dat.errlock); # endif - u8c_u32free(&u8c_dat.err); - u8c_u32cp(NULL,&u8c_dat.err,_msg); + u8c_strfree(u8c_dat.err); + u8c_dat.err = u8c_strcp(_msg).str; # if defined(u8c_bethrdsafe) mtx_unlock(&u8c_dat.errlock); # endif @@ -44,5 +45,5 @@ bool u8c_seterr(char32_t const * const _msg,enum u8c_errtyp _typ) { # if defined(u8c_bethrdsafe) mtx_unlock(&u8c_dat.errhandlslock); # endif - return false; + return ret; } diff --git a/src/u8c/fmt.h.d/fmt.c b/src/u8c/fmt.h.d/fmt.c index 43f1ea4..59feb74 100644 --- a/src/u8c/fmt.h.d/fmt.c +++ b/src/u8c/fmt.h.d/fmt.c @@ -18,11 +18,15 @@ # include <stddef.h> # include <stdint.h> # include <u8c/fmt.h> -# include <u8c/u32.h> -bool u8c_fmt(size_t * const _outsz,char32_t const * * const _out,char32_t const * const _in,...) { +# include <u8c/str.h> +struct u8c_fmt_tuple u8c_fmt(char32_t const * const restrict _in,...) { + struct u8c_fmt_tuple ret; va_list args; va_start(args,_in); - uint_least8_t val = u8c_vfmt(_outsz,_out,_in,args); + struct u8c_vfmt_tuple tuple = u8c_vfmt(_in,args); va_end(args); - return val; + ret.stat = tuple.stat; + ret.str = tuple.str; + ret.strsz = tuple.strsz; + return ret; } diff --git a/src/u8c/fmt.h.d/print.c b/src/u8c/fmt.h.d/print.c index 8785ad8..e2f4802 100644 --- a/src/u8c/fmt.h.d/print.c +++ b/src/u8c/fmt.h.d/print.c @@ -17,10 +17,12 @@ # include <stdbool.h> # include <stdint.h> # include <u8c/fmt.h> -bool u8c_print(FILE * _fp,char32_t const * const _msg,...) { +struct u8c_print_tuple u8c_print(FILE * restrict _fp,char32_t const * const restrict _msg,...) { + struct u8c_print_tuple ret; va_list args; va_start(args,_msg); - uint_least8_t val = u8c_vprint(_fp,_msg,args); + struct u8c_vprint_tuple tuple = u8c_vprint(_fp,_msg,args); va_end(args); - return val; + ret.stat = tuple.stat; + return ret; } diff --git a/src/u8c/fmt.h.d/println.c b/src/u8c/fmt.h.d/println.c index 1a924ad..d73a897 100644 --- a/src/u8c/fmt.h.d/println.c +++ b/src/u8c/fmt.h.d/println.c @@ -13,27 +13,21 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdarg.h> # include <stdbool.h> # include <stdint.h> # include <stdio.h> # include <u8c/fmt.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <uchar.h> -bool u8c_println(FILE * _fp,char32_t const * const _msg,...) { - assert(_fp != NULL); +struct u8c_println_tuple u8c_println(FILE * restrict _fp,char32_t const * const restrict _msg,...) { + struct u8c_println_tuple ret; va_list args; va_start(args,_msg); - char32_t const * msg = NULL; - u8c_u32cat(NULL,&msg,_msg,U"\n"); - { - register bool const val = u8c_vprint(_fp,msg,args); - u8c_u32free(&msg); - if(val) { - return true; - } - } + char32_t const * msg = u8c_strcat(_msg,U"\n").str; + register struct u8c_vprint_tuple const tuple = u8c_vprint(_fp,msg,args); + u8c_strfree(msg); va_end(args); - return false; + ret.stat = tuple.stat; + return ret; } diff --git a/src/u8c/fmt.h.d/setfmt.c b/src/u8c/fmt.h.d/setfmt.c index 7956ea3..4c6d3a3 100644 --- a/src/u8c/fmt.h.d/setfmt.c +++ b/src/u8c/fmt.h.d/setfmt.c @@ -20,15 +20,15 @@ # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_setfmt(unsigned char const _base,unsigned char const _endian) { - uint_least8_t base = _base; - uint_least8_t endian = _endian; +struct u8c_setfmt_tuple u8c_setfmt(uint_least8_t const _base,bool const _endian) { + struct u8c_setfmt_tuple ret = { + .stat = false, + }; + register uint_least8_t base = _base; + register bool endian = _endian; if(_base > UINT8_C(0x20)) { base = UINT8_C(0xC); } - if(_endian > UINT8_C(0x1)) { - endian = UINT8_C(0x0); - } # if defined(u8c_bethrdsafe) mtx_lock(&u8c_dat.fmtlock); # endif @@ -37,5 +37,5 @@ bool u8c_setfmt(unsigned char const _base,unsigned char const _endian) { # if defined(u8c_bethrdsafe) mtx_unlock(&u8c_dat.fmtlock); # endif - return false; + return ret; } diff --git a/src/u8c/fmt.h.d/vfmt.c b/src/u8c/fmt.h.d/vfmt.c index 5b0c1e1..5148784 100644 --- a/src/u8c/fmt.h.d/vfmt.c +++ b/src/u8c/fmt.h.d/vfmt.c @@ -16,11 +16,18 @@ # include <stdarg.h> # include <stdbool.h> # include <u8c/fmt.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <uchar.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_vfmt(size_t * const _sz,char32_t const * * const _out,char32_t const * const _in,[[maybe_unused]] va_list _args) { - return u8c_u32cp(_sz,_out,_in); +struct u8c_vfmt_tuple u8c_vfmt(char32_t const * const restrict _in,[[maybe_unused]] va_list _args) { + struct u8c_vfmt_tuple ret = { + .stat = false, + }; + struct u8c_strcp_tuple const tuple = u8c_strcp(_in); + ret.stat = tuple.stat; + ret.str = tuple.str; + ret.strsz = tuple.strsz; + return ret; } diff --git a/src/u8c/fmt.h.d/vprint.c b/src/u8c/fmt.h.d/vprint.c index 8d824a8..cfcb850 100644 --- a/src/u8c/fmt.h.d/vprint.c +++ b/src/u8c/fmt.h.d/vprint.c @@ -13,7 +13,6 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdarg.h> # include <stdbool.h> # include <stdint.h> @@ -23,20 +22,24 @@ # include <u8c/err.h> # include <u8c/fmt.h> # include <u8c/intern.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <u8c/u8.h> # include <uchar.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_vprint(FILE * _fp,char32_t const * const _msg,va_list _args) { - assert(_msg != NULL); - char32_t const * str0 = NULL; - u8c_vfmt(NULL,&str0,_msg,_args); +struct u8c_vprint_tuple u8c_vprint(FILE * restrict _fp,char32_t const * const restrict _msg,va_list _args) { + struct u8c_vprint_tuple ret = { + .stat = false, + }; + char32_t const * str0 = u8c_vfmt(_msg,_args).str; size_t str1sz = SIZE_C(0x0); unsigned char const * str1 = NULL; - u8c_u8enc(&str1sz,&str1,str0); - assert(str1sz > SIZE_C(0x0)); + { + struct u8c_u8enc_tuple const tuple = u8c_u8enc(str0); + str1 = tuple.u8; + str1sz = tuple.u8sz; + } # if defined(u8c_bethrdsafe) mtx_lock(&u8c_dat.outlock); # endif @@ -46,11 +49,12 @@ bool u8c_vprint(FILE * _fp,char32_t const * const _msg,va_list _args) { mtx_unlock(&u8c_dat.outlock); # endif if(val < str1sz - SIZE_C(0x1)) { - u8c_seterr(U"u8c_vprint: fwrite: Unable to write to stdout.",u8c_errtyp_badio); - return true; + u8c_seterr(u8c_errtyp_badio,U"u8c_vprint: Unable to write to stdout."); + ret.stat = true; + return ret; } } - u8c_u32free(&str0); - u8c_u8free(&str1); - return false; + u8c_strfree(str0); + u8c_u8free(str1); + return ret; } diff --git a/src/u8c/is.h.d/isalpha.c b/src/u8c/is.h.d/isalpha.c deleted file mode 100644 index d5952dc..0000000 --- a/src/u8c/is.h.d/isalpha.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <assert.h> -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/is.h> -bool u8c_isalpha(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); - switch(_chr) { - default: - *_res = UINT8_C(0x0); - break; - case U'a': /* LATIN SMALL LETTER A */ - case U'b': /* LATIN SMALL LETTER B */ - case U'c': /* LATIN SMALL LETTER C */ - case U'd': /* LATIN SMALL LETTER D */ - case U'e': /* LATIN SMALL LETTER E */ - case U'f': /* LATIN SMALL LETTER F */ - case U'g': /* LATIN SMALL LETTER G */ - case U'h': /* LATIN SMALL LETTER H */ - case U'i': /* LATIN SMALL LETTER I */ - case U'j': /* LATIN SMALL LETTER J */ - case U'k': /* LATIN SMALL LETTER K */ - case U'l': /* LATIN SMALL LETTER L */ - case U'm': /* LATIN SMALL LETTER M */ - case U'n': /* LATIN SMALL LETTER N */ - case U'o': /* LATIN SMALL LETTER O */ - case U'p': /* LATIN SMALL LETTER P */ - case U'q': /* LATIN SMALL LETTER Q */ - case U'r': /* LATIN SMALL LETTER R */ - case U's': /* LATIN SMALL LETTER S */ - case U't': /* LATIN SMALL LETTER T */ - case U'u': /* LATIN SMALL LETTER U */ - case U'v': /* LATIN SMALL LETTER V */ - case U'w': /* LATIN SMALL LETTER W */ - case U'x': /* LATIN SMALL LETTER X */ - case U'y': /* LATIN SMALL LETTER Y */ - case U'z': /* LATIN SMALL LETTER Z */ - case U'\u00DF': /* LATIN SMALL LETTER SHARP S */ - case U'\u00E1': /* LATIN SMALL LETTER A WITH ACUTE */ - case U'\u00E4': /* LATIN SMALL LETTER A WITH DIAERESIS */ - case U'\u00E5': /* LATIN SMALL LETTER A WITH RING ABOVE */ - case U'\u00E6': /* LATIN SMALL LETTER AE */ - case U'\u00E7': /* LATIN SMALL LETTER C WITH CEDILLA */ - case U'\u00E9': /* LATIN SMALL LETTER E WITH ACUTE */ - case U'\u00ED': /* LATIN SMALL LETTER I WITH ACUTE */ - case U'\u00F0': /* LATIN SMALL LETTER ETH */ - case U'\u00F3': /* LATIN SMALL LETTER O WITH ACUTE */ - case U'\u00F6': /* LATIN SMALL LETTER O WITH DIAERESIS */ - case U'\u00F8': /* LATIN SMALL LETTER O WITH STROKE */ - case U'\u00FA': /* LATIN SMALL LETTER U WITH ACUTE */ - case U'\u00FC': /* U WITH TWO DOTS */ - case U'\u00FD': /* LATIN SMALL LETTER Y WITH ACUTE */ - case U'\u00FE': /* LATIN SMALL LETTER THORN */ - case U'\u0105': /* LATIN SMALL LETTER A WITH OGONEK */ - case U'\u0107': /* LATIN SMALL LETTER C WITH ACUTE */ - case U'\u010D': /* LATIN SMALL LETTER C WITH CARON */ - case U'\u010F': /* LATIN SMALL LETTER D WITH CARON */ - case U'\u0119': /* LATIN SMALL LETTER E WITH OGONEK */ - case U'\u011B': /* LATIN SMALL LETTER E WITH CARON */ - case U'\u011F': /* LATIN SMALL LETTER G WITH BREVE */ - case U'\u0131': /* LATIN SMALL LETTER DOTLESS I */ - case U'\u0133': /* LATIN SMALL LIGATURE LJ */ - case U'\u0138': /* LATIN SMALL LETTER KRA */ - case U'\u0142': /* LATIN SMALL LETTER L WITH STROKE */ - case U'\u0144': /* LATIN SMALL LETTER N WITH ACUTE */ - case U'\u0148': /* LATIN SMALL LETTER N WITH CARON */ - case U'\u014B': /* LATIN SMALL LETTER ENG */ - case U'\u0153': /* LATIN SMALL LIGATURE OE */ - case U'\u0159': /* LATIN SMALL LETTER R WITH CARON */ - case U'\u015B': /* LATIN SMALL LETTER S WITH ACUTE */ - case U'\u015F': /* LATIN SMALL LETTER S WITH CEDILLA */ - case U'\u0161': /* LATIN SMALL LETTER S WITH CARON */ - case U'\u0165': /* LATIN SMALL LETTER T WITH CARON */ - case U'\u016F': /* LATIN SMALL LETTER U WITH RING ABOVE */ - case U'\u017A': /* LATIN SMALL LETTER Z WITH ACUTE */ - case U'\u017C': /* LATIN SMALL LETTER Z WITH DOT ABOVE */ - case U'\u017E': /* LATIN SMALL LETTER Z WITH CARON */ - case U'\u01BF': /* LATIN LETTER WYNN */ - case U'\u01DD': /* LATIN SMALL LETTER TURNED E */ - case U'\u021D': /* LATIN SMALL LETTER YOGH */ - case U'\u0251': /* LATIN SMALL LETTER ALPHA */ - case U'\u0254': /* LATIN SMALL LETTER OPEN O */ - case U'\u0259': /* LATIN SMALL LETTER SCHWA */ - case U'\u025B': /* LATIN SMALL LETTER OPEN E */ - case U'\u0263': /* LATIN SMALL LETTER GAMMA */ - case U'\u0269': /* LATIN SMALL LETTER IOTA */ - case U'\u0283': /* LATIN SMALL LETTER ESH */ - case U'\u028A': /* LATIN SMALL LETTER UPSILON */ - case U'\u028B': /* LATIN SMALL LETTER V WITH HOOK */ - case U'\u0292': /* LATIN SMALL LETTER EZH */ - case U'\u0294': /* LATIN SMALL LETTER GLOTTAL STOP */ - case U'\u03B1': /* GREEK SMALL LETTER ALPHA */ - case U'\u03B2': /* GREEK SMALL LETTER BETA */ - case U'\u03B3': /* GREEK SMALL LETTER GAMMA */ - case U'\u03B4': /* GREEK SMALL LETTER DELTA */ - case U'\u03B5': /* GREEK SMALL LETTER EPSILON */ - case U'\u03B6': /* GREEK SMALL LETTER ZETA */ - case U'\u03B7': /* GREEK SMALL LETTER ETA */ - case U'\u03B8': /* GREEK SMALL LETTER THETA */ - case U'\u03B9': /* GREEK SMALL LETTER IOTA */ - case U'\u03BA': /* GREEK SMALL LETTER KAPPA */ - case U'\u03BB': /* GREEK SMALL LETTER LAMBDA */ - case U'\u03BC': /* GREEK SMALL LETTER MU */ - case U'\u03BD': /* GREEK SMALL LETTER NU */ - case U'\u03BE': /* GREEK SMALL LETTER XI */ - case U'\u03BF': /* GREEK SMALL LETTER OMICRON */ - case U'\u03C0': /* GREEK SMALL LETTER PI */ - case U'\u03C1': /* GREEK SMALL LETTER RHO */ - case U'\u03C2': /* GREEK SMALL LETTER FINAL SIGMA */ - case U'\u03C3': /* GREEK SMALL LETTER SIGMA */ - case U'\u03C4': /* GREEK SMALL LETTER TAU */ - case U'\u03C5': /* GREEK SMALL LETTER UPSILON */ - case U'\u03C6': /* GREEK SMALL LETTER PHI */ - case U'\u03C7': /* GREEK SMALL LETTER CHI */ - case U'\u03C8': /* GREEK SMALL LETTER PSI */ - case U'\u03C9': /* GREEK SMALL LETTER OMEGA */ - case U'\u1D79': /* LATIN SMALL LETTER INSULAR G */ - case U'\uA7B7': /* LATIN SMALL LETTER OMEGA */ - case U'\uFB00': /* LATIN SMALL LIGATURE FF */ - *_res = UINT8_C(0x1); - break; - } - return false; -} diff --git a/src/u8c/is.h.d/iscntrl.c b/src/u8c/is.h.d/iscntrl.c deleted file mode 100644 index 4dcf543..0000000 --- a/src/u8c/is.h.d/iscntrl.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <assert.h> -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/is.h> -bool u8c_iscntrl(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); - switch(_chr) { - default: - *_res = UINT8_C(0x0); - break; - case U'\x0': /* NULL */ - case U'\x1': /* START OF HEADING */ - case U'\x2': /* START OF TEXT */ - case U'\x3': /* END OF TEXT */ - case U'\x4': /* END OF TRANSMISSION */ - case U'\x5': /* ENQUIRY */ - case U'\x6': /* ACKNOWLEDGE */ - case U'\a': /* BELL */ - case U'\b': /* BACKSPACE */ - case U'\t': /* HORIZONTAL TABULATION */ - case U'\n': /* NEW LINE */ - case U'\v': /* VERTICAL TABULATION */ - case U'\f': /* FORM FEED */ - case U'\r': /* CARRIAGE RETURN */ - case U'\xE': /* SHIFT OUT */ - case U'\xF': /* SHIFT IN */ - case U'\x10': /* DATA LINK ESCAPE */ - case U'\x11': /* DEVICE CONTROL ONE */ - case U'\x12': /* DEVICE CONTROL TWO */ - case U'\x13': /* DEVICE CONTROL THREE */ - case U'\x14': /* DEVICE CONTROL FOUR */ - case U'\x15': /* NEGATIVE ACKNOWLEDGE */ - case U'\x16': /* SYNCHRONOUS IDLE */ - case U'\x17': /* END OF TRANSMISSION BLOCK */ - case U'\x18': /* CANCEL */ - case U'\x19': /* END OF MEDIUM */ - case U'\x1A': /* SUBSTITUTE */ - case U'\x1B': /* ESCAPE */ - case U'\x1C': /* FILE SEPERATOR */ - case U'\x1D': /* GROUP SEPERATOR */ - case U'\x1E': /* RECORD SEPERATOR */ - case U'\x1F': /* UNIT SEPERATOR */ - *_res = UINT8_C(0x1); - break; - } - return false; -} diff --git a/src/u8c/main.h.d/abrtfn.c b/src/u8c/main.h.d/abrtfn.c index 80a03e3..bab21f2 100644 --- a/src/u8c/main.h.d/abrtfn.c +++ b/src/u8c/main.h.d/abrtfn.c @@ -22,7 +22,7 @@ # include <time.h> # include <u8c/intern.h> # include <u8c/main.h> -noreturn bool u8c_abrtfn(char const * const _fl,long const _ln,char const * const _fn,char const * const _why) { +noreturn void u8c_abrtfn(char const * const restrict _fl,long const _ln,char const * const restrict _fn,char const * const restrict _why) { fprintf(stderr,"u8c: *** Aborted (\"%s\":%li in function \"%s\": \"%s\" @ %" PRIuMAX ") ***\nLibrary diagnostics:\n debug:%s\n status:%" PRIuLEAST8 "\n thread-safe:%s\n version:%" PRIuLEAST64 "\n",_fl,_ln,_fn,_why,(intmax_t)time(NULL),u8c_dbg ? "true" : "false",u8c_dat.stat,u8c_thrdsafe ? "true" : "false",u8c_ver); fprintf(stderr,"Trying to clean up...\n"); u8c_end(); diff --git a/src/u8c/main.h.d/end.c b/src/u8c/main.h.d/end.c index ef8114c..741a961 100644 --- a/src/u8c/main.h.d/end.c +++ b/src/u8c/main.h.d/end.c @@ -19,13 +19,16 @@ # include <u8c/SIZE_C.h> # include <u8c/intern.h> # include <u8c/main.h> -# include <u8c/u32.h> +# include <u8c/str.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_end(void) { +struct u8c_end_tuple u8c_end(void) { + struct u8c_end_tuple ret = { + .stat = false, + }; if(!u8c_dat.stat) { - return false; + return ret; } # if defined(u8c_bethrdsafe) /* Destroy mutexes: */ @@ -35,11 +38,11 @@ bool u8c_end(void) { mtx_destroy(&u8c_dat.outlock); # endif /* Free error message: */ - u8c_u32free(&u8c_dat.err); + u8c_strfree(u8c_dat.err); /* Set default formatting options: */ u8c_dat.fmtbase = UINT8_C(0xC); u8c_dat.fmtendian = UINT8_C(0x0); /* Set status: */ u8c_dat.stat = UINT8_C(0x0); - return false; + return ret; } diff --git a/src/u8c/main.h.d/init.c b/src/u8c/main.h.d/init.c index 0b34577..1762822 100644 --- a/src/u8c/main.h.d/init.c +++ b/src/u8c/main.h.d/init.c @@ -19,37 +19,45 @@ # include <stdbool.h> # include <stddef.h> # include <stdint.h> +# include <u8c/SIZE_C.h> # include <u8c/err.h> # include <u8c/intern.h> # include <u8c/main.h> -# include <u8c/u32.h> +# include <u8c/str.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_init() { +struct u8c_init_tuple u8c_init() { + struct u8c_init_tuple ret = { + .stat = false, + }; /* Initialise mutexes: */ # if defined(u8c_bethrdsafe) if(mtx_init(&u8c_dat.errhandlslock,mtx_plain) == thrd_error) { - return true; + ret.stat = true; + return ret; } if(mtx_init(&u8c_dat.errlock,mtx_plain) == thrd_error) { - return true; + ret.stat = true; + return ret; } if(mtx_init(&u8c_dat.fmtlock,mtx_plain) == thrd_error) { - return true; + ret.stat = true; + return ret; } if(mtx_init(&u8c_dat.outlock,mtx_plain) == thrd_error) { - return true; + ret.stat = true; + return ret; } # endif /* Set default error message: */ u8c_dat.err = NULL; - u8c_seterr(U"",u8c_errtyp_deferr); + u8c_seterr(u8c_errtyp_deferr,U""); /* Initialise error handler array: */ - for(register size_t n = SIZE_C(0x0);n < u8c_errtyp_maxerrtyp;n += SIZE_C(0x1)) { + for(register size_t n = SIZE_C(0x0);n < u8c_errtyp_all;n += SIZE_C(0x1)) { u8c_dat.errhandls[n] = NULL; } /* Set status: */ u8c_dat.stat = UINT8_C(0x1); - return false; + return ret; } diff --git a/src/u8c/main.h.d/thrdsafe.c b/src/u8c/main.h.d/thrdsafe.c index 9d8e41a..4e8c40c 100644 --- a/src/u8c/main.h.d/thrdsafe.c +++ b/src/u8c/main.h.d/thrdsafe.c @@ -22,3 +22,193 @@ bool const u8c_thrdsafe = # else false; # endif +/* +1720 173F HANUNOO +1740 175F BUHID +1760 177F TAGBANWA +1780 17FF KHMER +1800 18AF MONGOLIAN +18B0 18FF UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED +1900 194F LIMBU +1950 197F TAI LE +1980 19DF NEW TAI LUE +19E0 19FF KHMER SYMBOLS +1A00 1A1F BUGINESE +1A20 1AAF TAI THAM +1AB0 1AFF COMBINING DIACRITICAL MARKS EXTENDED +1B00 1B7F BALINESE +1B80 1BBF SUNDANESE +1BC0 1BFF BATAK +1C00 1C4F LEPCHA +1C50 1C7F OL CHIKI +1C80 1C8F CYRILLIC EXTENDED C +1C90 1CBF GEORGIAN EXTENDED +1CC0 1CCF SUNDANESE SUPPLEMENT +1CD0 1CFF VEDIC EXTENSIONS +1D00 1D7F PHONETIC EXTENSIONS +1D80 1DBF PHONETIC EXTENSIONS SUPPLEMENT +1DC0 1DFF COMBINING DIACRITICAL MARKS SUPPLEMENT +1E00 1EFF LATIN EXTENDED ADDITIONAL +1F00 1FFF GREEK EXTENDED +2000 206F GENERAL PUNCTUATION +2070 209F SUPERSCRIPTS AND SUBSCRIPTS +20A0 20CF CURRENCY SYMBOLS +20D0 20FF COMBINING DIACRITICAL MARKS FOR SYMBOLS +2100 214F LETTERLIKE SYMBOLS +2150 218F NUMBER FORMS +2190 21FF ARROWS +2200 22FF MATHEMATICAL OPERATORS +2300 23FF MISCELLANEOUS TECHNICAL +2400 243F CONTROL PICTURES +2440 245F OPTICAL CHARACTER RECOGNITION +2460 24FF ENCLOSED ALPHANUMERICS +2500 257F BOX DRAWING +2580 259F BLOCK ELEMENTS +25A0 25FF GEOMETRIC SHAPES +2600 26FF MISCELLANEOUS SYMBOLS +2700 27BF DINGBATS +27C0 27EF MISCELLANEOUS MATHEMATICAL SYMBOLS-A +27F0 27FF SUPPLEMENTAL ARROWS-A +2800 28FF BRAILLE PATTERNS +2900 297F SUPPLEMENTAL ARROWS-B +2980 29FF MISCELLANEOUS MATHEMATICAL SYMBOLS-B +2A00 2AFF SUPPLEMENTAL MATHEMATICAL OPERATORS +2B00 2BFF MISCELLANEOUS SYMBOLS AND ARROWS +2C00 2C5F GLAGOLITIC +2C60 2C7F LATIN EXTENDED-C +2C80 2CFF COPTIC +2D00 2D2F GEORGIAN SUPPLEMENT +2D30 2D7F TIFINAGH +2D80 2DDF ETHIOPIC EXTENDED +2DE0 2DFF CYRILLIC EXTENDED-A +2E00 2E7F SUPPLEMENTAL PUNCTUATION +2E80 2EFF CJK RADICALS SUPPLEMENT +2F00 2FDF KANGXI RADICALS +2FF0 2FFF IDEOGRAPHIC DESCRIPTION CHARACTERS +3000 303F CJK SYMBOLS AND PUNCTUATION +3040 309F HIRAGANA +30A0 30FF KATAKANA +3100 312F BOPOMOFO +3130 318F HANGUL COMPATIBILITY JAMO +3190 319F KANBUN +31A0 31BF BOPOMOFO EXTENDED +31C0 31EF CJK STROKES +31F0 31FF KATAKANA PHONETIC EXTENSIONS +3200 32FF ENCLOSED CJK LETTERS AND MONTHS +3300 33FF CJK COMPATIBILITY +3400 4DBF CJK UNIFIED IDEOGRAPHS EXTENSION A +4DC0 4DFF YIJING HEXAGRAM SYMBOLS +4E00 9FFF CJK UNIFIED IDEOGRAPHS +A000 A48F YI SYLLABLES +A490 A4CF YI RADICALS +A4D0 A4FF LISU +A500 A63F VAI +A640 A69F CYRILLIC EXTENDED-B +A6A0 A6FF BAMUM +A700 A71F MODIFIER TONE LETTERS +A720 A7FF LATIN EXTENDED-D +A800 A82F SYLOTI NAGRI +A830 A83F COMMON INDIC NUMBER FORMS +A840 A87F PHAGS-PA +A880 A8DF SAURASHTRA +A8E0 A8FF DEVANAGARI EXTENDED +A900 A92F KAYAH LI +A930 A95F REJANG +A960 A97F HANGUL JAMO EXTENDED-A +A980 A9DF JAVANESE +A9E0 A9FF MYANMAR EXTENDED-B +AA00 AA5F CHAM +AA60 AA7F MYANMAR EXTENDED-A +AA80 AADF TAI VIET +AAE0 AAFF MEETEI MAYEK EXTENSIONS +AB00 AB2F ETHIOPIC EXTENDED-A +AB30 AB6F LATIN EXTENDED-E +AB70 ABBF CHEROKEE SUPPLEMENT +ABC0 ABFF MEETEI MAYEK +AC00 D7AF HANGUL SYLLABLES +D7B0 D7FF HANGUL JAMO EXTENDED-B +D800 DB7F HIGH SURROGATES +DB80 DBFF HIGH PRIVATE USE SURROGATES +DC00 DFFF LOW SURROGATES +E000 F8FF PRIVATE USE AREA +F900 FAFF CJK COMPATIBILITY IDEOGRAPHS +FB00 FB4F ALPHABETIC PRESENTATION FORMS +FB50 FDFF ARABIC PRESENTATION FORMS-A +FE00 FE0F VARIATION SELECTORS +FE10 FE1F VERTICAL FORMS +FE20 FE2F COMBINING HALF MARKS +FE30 FE4F CJK COMPATIBILITY FORMS +FE50 FE6F SMALL FORM VARIANTS +FE70 FEFF ARABIC PRESENTATION FORMS-B +FF00 FFEF HALFWIDTH AND FULLWIDTH FORMS +FFF0 FFFF SPECIALS +10000 1007F LINEAR B SYLLABARY +10080 100FF LINEAR B IDEOGRAMS +10100 1013F AEGEAN NUMBERS +10140 1018F ANCIENT GREEK NUMBERS +10190 101CF ANCIENT SYMBOLS +101D0 101FF PHAISTOS DISC +10280 1029F LYCIAN +102A0 102DF CARIAN +102E0 102FF COPTIC EPACT NUMBERS +10300 1032F OLD ITALIC +10330 1034F GOTHIC +10350 1037F OLD PERMIC +10380 1039F UGARITIC +103A0 103DF OLD PERSIAN +10400 1044F DESERET +10450 1047F SHAVIAN +10480 104AF OSMANYA +104B0 104FF OSAGE +10500 1052F ELBASAN +10530 1056F CAUCASIAN ALBANIAN +10600 1077F LINEAR A +10800 1083F CYPRIOT SYLLABARY +10840 1085F IMPERIAL ARAMAIC +10860 1087F PALMYRENE +10880 108AF NABATAEAN +108E0 108FF HATRAN +10900 1091F PHOENICIAN +10920 1093F LYDIAN +10980 1099F MEROITIC HIEROGLYPHS +109A0 109FF MEROITIC CURSIVE +10A00 10A5F KHAROSHTHI +10A60 10A7F OLD SOUTH ARABIAN +10A80 10A9F OLD NORTH ARABIAN +10AC0 10AFF MANICHAEAN +10B00 10B3F AVESTAN +10B40 10B5F INSCRIPTIONAL PARTHIAN +10B60 10B7F INSCRIPTIONAL PAHLAVI +10B80 10BAF PSALTER PAHLAVI +10C00 10C4F OLD TURKIC +10C80 10CFF OLD HUNGARIAN +10D00 10D3F HANIFI ROHINGYA +10E60 10E7F RUMI NUMERAL SYMBOLS +10E80 10EBF YEZIDI +10F00 10F2F OLD SOGDIAN +10F30 10F6F SOGDIAN +10FB0 10FDF CHORASMIAN +10FE0 10FFF ELYMAIC +11000 1107F BRAHMI +11080 110CF KAITHI +110D0 110FF SORA SOMPENG +11100 1114F CHAKMA +11150 1117F MAHAJANI +11180 111DF SHARADA +111E0 111FF SINHALA ARCHAIC NUMBERS +11200 1124F KHOJKI +11280 112AF MULTANI +112B0 112FF KHUDAWADI +11300 1137F GRANTHA +11400 1147F NEWA +11480 114DF TIRHUTA +11580 115FF SIDDHAM +11600 1165F MODI +11660 1167F MONGOLIAN SUPPLEMENT +11680 116CF TAKRI +11700 1173F AHOM +11800 1184F DOGRA +118A0 118FF WARANG CITI +11900 1195F DIVES AKURU +119A0 119FF NANDINAGARI +*/ diff --git a/src/u8c/main.h.d/uniblk.c b/src/u8c/main.h.d/uniblk.c new file mode 100644 index 0000000..332f8aa --- /dev/null +++ b/src/u8c/main.h.d/uniblk.c @@ -0,0 +1,517 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <u8c/main.h> +# include <u8c/str.h> +# include <uchar.h> +# define u8c_uniblk_isinrng(val,start,end) (val >= start && val <= end) +struct u8c_uniblk_tuple u8c_uniblk(char32_t const _chr) { + struct u8c_uniblk_tuple ret = { + .stat = false, + }; + char32_t const * blk = U"UNDEFINED IN UNICODE"; + if(_chr <= U'\x7F') { + blk = U"BASIC LATIN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\x80',U'\u00FF')) { + blk = U"LATIN-1 SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0100',U'\u017F')) { + blk = U"LATIN EXTENDED-A"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0180',U'\u024F')) { + blk = U"LATIN EXTENDED-B"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0250',U'\u02AF')) { + blk = U"IPA EXTENSIONS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u02B0',U'\u02FF')) { + blk = U"SPACING MODIFIER LETTERS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0300',U'\u036F')) { + blk = U"COMBINING DIRACITICAL MARKS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0370',U'\u03FF')) { + blk = U"GREEK AND COPTIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0400',U'\u04FF')) { + blk = U"CYRILLIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0500',U'\u052F')) { + blk = U"CYRILLIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0530',U'\u058F')) { + blk = U"ARMENIAN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0590',U'\u05FF')) { + blk = U"HEBREW"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0600',U'\u06FF')) { + blk = U"ARABIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0700',U'\u074F')) { + blk = U"SYRIAC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0750',U'\u077F')) { + blk = U"ARABIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0780',U'\u07BF')) { + blk = U"THAANA"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u07C0',U'\u07FF')) { + blk = U"NKO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0800',U'\u083F')) { + blk = U"SAMARITAN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0840',U'\u085F')) { + blk = U"MANDAIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0860',U'\u086F')) { + blk = U"SYRIAC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u08A0',U'\u08FF')) { + blk = U"ARABIC EXTENDED-A"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0900',U'\u097F')) { + blk = U"DEVANAGARI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0980',U'\u09FF')) { + blk = U"BENGALI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0A00',U'\u0A7F')) { + blk = U"GURMUKHI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0A80',U'\u0AFF')) { + blk = U"GUJARATI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0B00',U'\u0B7F')) { + blk = U"ORIYAS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0B80',U'\u0BFF')) { + blk = U"TAMIL"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0C00',U'\u0C7F')) { + blk = U"TELUGU"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0C80',U'\u0CFF')) { + blk = U"KANNADA"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0D00',U'\u0D7F')) { + blk = U"MALAYALAM"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0D80',U'\u0DFF')) { + blk = U"SINHALA"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0E00',U'\u0E7F')) { + blk = U"THAI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0E80',U'\u0EFF')) { + blk = U"LAO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0F00',U'\u0FFF')) { + blk = U"TIBETAN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1000',U'\u109F')) { + blk = U"MYANMAR"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u10A0',U'\u10FF')) { + blk = U"GEORGIAN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1100',U'\u11FF')) { + blk = U"HANGUL JAMO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1200',U'\u137F')) { + blk = U"ETHIOPIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1380',U'\u139F')) { + blk = U"ETHIOPIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u13A0',U'\u13FF')) { + blk = U"CHEROKEE"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1400',U'\u167F')) { + blk = U"UNIFIED CANADIAN ABORIGINAL SYLLABICS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1680',U'\u169F')) { + blk = U"OGHAM"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u16A0',U'\u16FF')) { + blk = U"RUNIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1700',U'\u171F')) { + blk = U"TAGALOG"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011A00',U'\U00011A4F')) { + blk = U"ZANABAZAR SQUARE"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011A50',U'\U00011AAF')) { + blk = U"SOYOMBO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011AC0',U'\U00011AFF')) { + blk = U"PAU CIN HAU"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011C00',U'\U00011C6F')) { + blk = U"BHAIKSUKI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011C70',U'\U00011CBF')) { + blk = U"MARCHEN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011D00',U'\U00011D5F')) { + blk = U"MASARAM GONDI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011D60',U'\U00011DAF')) { + blk = U"GUNJALA GONDI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011EE0',U'\U00011EFF')) { + blk = U"MAKASAR"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011FB0',U'\U00011FBF')) { + blk = U"LISU SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011FC0',U'\U00011FFF')) { + blk = U"TAMIL SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00012000',U'\U000123FF')) { + blk = U"CUNEIFORM"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00012400',U'\U0001247F')) { + blk = U"CUNEIFORM NUMBERS AND PUNCTUATION"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00012480',U'\U0001254F')) { + blk = U"EARLY DYNASTIC CUNEIFORM"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00013000',U'\U0001342F')) { + blk = U"EGYPTIAN HIEROGLYPHS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00013430',U'\U0001343F')) { + blk = U"EGYPTIAN HIEROGLYPH FORMAT CONTROLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00014400',U'\U0001467F')) { + blk = U"ANATOLIAN HIEROGLYPHS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016800',U'\U00016A3F')) { + blk = U"BAMUM SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016A40',U'\U00016A6F')) { + blk = U"MRO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016AD0',U'\U00016AFF')) { + blk = U"BASSA VAH"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016B00',U'\U00016B8F')) { + blk = U"PAHAWH HMONG"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016E40',U'\U00016E9F')) { + blk = U"MEDEFAIDRIN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016F00',U'\U00016F9F')) { + blk = U"MIAO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016FE0',U'\U00016FFF')) { + blk = U"IDEOGRAPHIC SYMBOLS AND PUNCTUATION"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00017000',U'\U000187FF')) { + blk = U"TANGUT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00018800',U'\U00018AFF')) { + blk = U"TANGUT COMPONENTS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00018B00',U'\U00018CFF')) { + blk = U"KHITAN SMALL SCRIPT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00018D00',U'\U00018D8F')) { + blk = U"TANGUT SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001B000',U'\U0001B0FF')) { + blk = U"KANA SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001B100',U'\U0001B12F')) { + blk = U"KANA EXTENDED-A"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001B130',U'\U0001B16F')) { + blk = U"SMALL KANA EXTENSION"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001B170',U'\U0001B2FF')) { + blk = U"NUSHU"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001BC00',U'\U0001BC9F')) { + blk = U"DUPLOYAN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001BCA0',U'\U0001BCAF')) { + blk = U"SHORTHAND FORMAT CONTROLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D000',U'\U0001D0FF')) { + blk = U"BYZANTINE MUSICAL SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D100',U'\U0001D1FF')) { + blk = U"MUSICAL SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D200',U'\U0001D24F')) { + blk = U"ANCIENT GREEK MUSICAL NOTATION"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D2E0',U'\U0001D2FF')) { + blk = U"MAYAN NUMERALS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D300',U'\U0001D35F')) { + blk = U"TAI XUAN JING SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D360',U'\U0001D37F')) { + blk = U"COUNTING ROD NUMERALS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D400',U'\U0001D7FF')) { + blk = U"MATHEMATICAL ALPHANUMERIC SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D800',U'\U0001DAAF')) { + blk = U"SUTTON SIGNWRITING"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001E000',U'\U0001E02F')) { + blk = U"GLAGOLITIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001E100',U'\U0001E14F')) { + blk = U"NYIAKENG PUACHUE HMONG"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001E2C0',U'\U0001E2FF')) { + blk = U"WANCHO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001E800',U'\U0001E8DF')) { + blk = U"MENDE KIKAKUI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001E900',U'\U0001E95F')) { + blk = U"ADLAM"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001EC70',U'\U0001ECBF')) { + blk = U"INDIC SIYAQ NUMBERS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001ED00',U'\U0001ED4F')) { + blk = U"OTTOMAN SIYAQ NUMBERS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001EE00',U'\U0001EEFF')) { + blk = U"ARABIC MATHEMATICAL ALPHABETIC SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F000',U'\U0001F02F')) { + blk = U"MAHJONG TILES"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F030',U'\U0001F09F')) { + blk = U"DOMINO TILES"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F0A0',U'\U0001F0FF')) { + blk = U"PLAYING CARDS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F100',U'\U0001F1FF')) { + blk = U"ENCLOSED ALPHANUMERIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F200',U'\U0001F2FF')) { + blk = U"ENCLOSED IDEOGRAPHIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F300',U'\U0001F5FF')) { + blk = U"MISCELLANEOUS SYMBOLS AND PICTOGRAPHS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F600',U'\U0001F64F')) { + blk = U"EMOTICONS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F650',U'\U0001F67F')) { + blk = U"ORNAMENTAL DINGBATS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F680',U'\U0001F6FF')) { + blk = U"TRANSPORT AND MAP SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F700',U'\U0001F77F')) { + blk = U"ALCHEMICAL SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F780',U'\U0001F7FF')) { + blk = U"GEOMETRIC SHAPES EXTENDED"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F800',U'\U0001F8FF')) { + blk = U"SUPPLEMENTAL ARROWS-C"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F900',U'\U0001F9FF')) { + blk = U"SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001FA00',U'\U0001FA6F')) { + blk = U"CHESS SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001FA70',U'\U0001FAFF')) { + blk = U"SYMBOLS AND PICTOGRAPHS EXTENDED-A"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001FB00',U'\U0001FBFF')) { + blk = U"SYMBOLS FOR LEGACY COMPUTING"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00020000',U'\U0002A6DF')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION B"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0002A700',U'\U0002B73F')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION C"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0002B740',U'\U0002B81F')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION D"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0002B820',U'\U0002CEAF')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION E"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0002CEB0',U'\U0002EBEF')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION F"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0002F800',U'\U0002FA1F')) { + blk = U"CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00030000',U'\U0003134F')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION G"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U000E0000',U'\U000E007F')) { + blk = U"TAGS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U000E0100',U'\U000E1EFF')) { + blk = U"VARIATION SELECTORS SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U000F0000',U'\U000FFFFF')) { + blk = U"SUPPLEMENTARY PRIVATE USE AREA-A"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00100000',U'\U0010FFFF')) { + blk = U"SUPPLEMENTARY PRIVATE USE AREA-B"; + goto end; + } + if(_chr > u8c_unimax) { + ret.stat = true; + return ret; + } +end:; + { + struct u8c_strcp_tuple const tuple = u8c_strcp(blk); + ret.blk = tuple.str; + ret.blksz = tuple.strsz; + } + return ret; +} diff --git a/src/u8c/main.h.d/uninm.c b/src/u8c/main.h.d/uninm.c new file mode 100644 index 0000000..d409263 --- /dev/null +++ b/src/u8c/main.h.d/uninm.c @@ -0,0 +1,2582 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <u8c/err.h> +# include <u8c/main.h> +# include <u8c/str.h> +# include <uchar.h> +struct u8c_uninm_tuple u8c_uninm(char32_t const _chr) { + struct u8c_uninm_tuple ret; + if(_chr > u8c_unimax) { + u8c_seterr(u8c_errtyp_stroor,U"u8c_uninm: Character out of range."); + ret.stat = true; + return ret; + } + char32_t const * nm = U""; + switch(_chr) { + default: + nm = U"UNDEFINED IN UNICODE"; + break; + /* BASIC LATIN: */ + case U'\x0': + nm = U"NULL"; + break; + case U'\x1': + nm = U"START OF HEADING"; + break; + case U'\x2': + nm = U"START OF TEXT"; + break; + case U'\x3': + nm = U"END OF TEXT"; + break; + case U'\x4': + nm = U"END OF TRANSMISSION"; + break; + case U'\x5': + nm = U"ENQUIRY"; + break; + case U'\x6': + nm = U"ACKNOWLEDGE"; + break; + case U'\a': + nm = U"BELL"; + break; + case U'\b': + nm = U"BACKSPACE"; + break; + case U'\t': + nm = U"HORIZONTAL TABULATION"; + break; + case U'\n': + nm = U"NEW LINE"; + break; + case U'\v': + nm = U"VERTICAL TABULATION"; + break; + case U'\f': + nm = U"FORM FEED"; + break; + case U'\r': + nm = U"CARRIAGE RETURN"; + break; + case U'\xE': + nm = U"SHIFT OUT"; + break; + case U'\xF': + nm = U"SHIFT IN"; + break; + case U'\x10': + nm = U"DATA LINK ESCAPE"; + break; + case U'\x11': + nm = U"DEVICE CONTROL ONE"; + break; + case U'\x12': + nm = U"DEVICE CONTROL TWO"; + break; + case U'\x13': + nm = U"DEVICE CONTROL THREE"; + break; + case U'\x14': + nm = U"DEVICE CONTROL FOUR"; + break; + case U'\x15': + nm = U"NEGATIVE ACKNOWLEDGE"; + break; + case U'\x16': + nm = U"SYNCHRONOUS IDLE"; + break; + case U'\x17': + nm = U"END OF TRANSMISSION BLOCk"; + break; + case U'\x18': + nm = U"CANCEL"; + break; + case U'\x19': + nm = U"END OF MEDIUM"; + break; + case U'\x1A': + nm = U"SUBSTITUTE"; + break; + case U'\x1B': + nm = U"ESCAPE"; + break; + case U'\x1C': + nm = U"FILE SEPERATOR"; + break; + case U'\x1D': + nm = U"GROUP SEPERATOR"; + break; + case U'\x1E': + nm = U"RECORD SEPERATOR"; + break; + case U'\x1F': + nm = U"UNIT SEPERATOR"; + break; + case U' ': + nm = U"SPACE"; + break; + case U'!': + nm = U"EXCLAMATION MARK"; + break; + case U'\"': + nm = U"QUOTATION MARK"; + break; + case U'#': + nm = U"NUMBER SIGN"; + break; + case U'\u0024': + nm = U"DOLLAR SIGN"; + break; + case U'%': + nm = U"PERCENT SIGN"; + break; + case U'&': + nm = U"AMPERSAND"; + break; + case U'\'': + nm = U"APOSTROPHE"; + break; + case U'(': + nm = U"LEFT PARANTHESIS"; + break; + case U')': + nm = U"RIGHT PARANTHESIS"; + break; + case U'*': + nm = U"ASTERISK"; + break; + case U'+': + nm = U"PLUS SIGN"; + break; + case U',': + nm = U"COMMA"; + break; + case U'-': + nm = U"HYPHEN-MINUS"; + break; + case U'.': + nm = U"FULL STOP"; + break; + case U'/': + nm = U"SOLIDUS"; + break; + case U'0': + nm = U"DIGIT ZERO"; + break; + case U'1': + nm = U"DIGIT ONE"; + break; + case U'2': + nm = U"DIGIT TWO"; + break; + case U'3': + nm = U"DIGIT THREE"; + break; + case U'4': + nm = U"DIGIT FOUR"; + break; + case U'5': + nm = U"DIGIT FIVE"; + break; + case U'6': + nm = U"DIGIT SIX"; + break; + case U'7': + nm = U"DIGIT SEVEN"; + break; + case U'8': + nm = U"DIGIT EIGHT"; + break; + case U'9': + nm = U"DIGIT NINE"; + break; + case U':': + nm = U"COLON"; + break; + case U';': + nm = U"SEMICOLON"; + break; + case U'<': + nm = U"LESS-THAN SIGN"; + break; + case U'=': + nm = U"EQUALS SIGN"; + break; + case U'>': + nm = U"GREATER-THAN SIGN"; + break; + case U'?': + nm = U"QUESTION MARK"; + break; + case U'\u0040': + nm = U"COMMERCIAL AT"; + break; + case U'A': + nm = U"LATIN CAPITAL LETTER A"; + break; + case U'B': + nm = U"LATIN CAPITAL LETTER B"; + break; + case U'C': + nm = U"LATIN CAPITAL LETTER C"; + break; + case U'D': + nm = U"LATIN CAPITAL LETTER D"; + break; + case U'E': + nm = U"LATIN CAPITAL LETTER E"; + break; + case U'F': + nm = U"LATIN CAPITAL LETTER F"; + break; + case U'G': + nm = U"LATIN CAPITAL LETTER G"; + break; + case U'H': + nm = U"LATIN CAPITAL LETTER H"; + break; + case U'I': + nm = U"LATIN CAPITAL LETTER I"; + break; + case U'J': + nm = U"LATIN CAPITAL LETTER J"; + break; + case U'K': + nm = U"LATIN CAPITAL LETTER K"; + break; + case U'L': + nm = U"LATIN CAPITAL LETTER L"; + break; + case U'M': + nm = U"LATIN CAPITAL LETTER M"; + break; + case U'N': + nm = U"LATIN CAPITAL LETTER N"; + break; + case U'O': + nm = U"LATIN CAPITAL LETTER O"; + break; + case U'P': + nm = U"LATIN CAPITAL LETTER P"; + break; + case U'Q': + nm = U"LATIN CAPITAL LETTER Q"; + break; + case U'R': + nm = U"LATIN CAPITAL LETTER R"; + break; + case U'S': + nm = U"LATIN CAPITAL LETTER S"; + break; + case U'T': + nm = U"LATIN CAPITAL LETTER T"; + break; + case U'U': + nm = U"LATIN CAPITAL LETTER U"; + break; + case U'V': + nm = U"LATIN CAPITAL LETTER V"; + break; + case U'W': + nm = U"LATIN CAPITAL LETTER W"; + break; + case U'X': + nm = U"LATIN CAPITAL LETTER X"; + break; + case U'Y': + nm = U"LATIN CAPITAL LETTER Y"; + break; + case U'Z': + nm = U"LATIN CAPITAL LETTER Z"; + break; + case U'[': + nm = U"LEFT SQUARE BRACKET"; + break; + case U'\\': + nm = U"REVERSE SOLIDUS"; + break; + case U']': + nm = U"RIGHT SQUARE BRACKET"; + break; + case U'^': + nm = U"CIRCUMFLEX ACCENT"; + break; + case U'_': + nm = U"LOW LINE"; + break; + case U'\u0060': + nm = U"GRAVE ACCENT"; + break; + case U'a': + nm = U"LATIN SMALL LETTER A"; + break; + case U'b': + nm = U"LATIN SMALL LETTER B"; + break; + case U'c': + nm = U"LATIN SMALL LETTER C"; + break; + case U'd': + nm = U"LATIN SMALL LETTER D"; + break; + case U'e': + nm = U"LATIN SMALL LETTER E"; + break; + case U'f': + nm = U"LATIN SMALL LETTER F"; + break; + case U'g': + nm = U"LATIN SMALL LETTER G"; + break; + case U'h': + nm = U"LATIN SMALL LETTER H"; + break; + case U'i': + nm = U"LATIN SMALL LETTER I"; + break; + case U'j': + nm = U"LATIN SMALL LETTER J"; + break; + case U'k': + nm = U"LATIN SMALL LETTER K"; + break; + case U'l': + nm = U"LATIN SMALL LETTER L"; + break; + case U'm': + nm = U"LATIN SMALL LETTER M"; + break; + case U'n': + nm = U"LATIN SMALL LETTER N"; + break; + case U'o': + nm = U"LATIN SMALL LETTER O"; + break; + case U'p': + nm = U"LATIN SMALL LETTER P"; + break; + case U'q': + nm = U"LATIN SMALL LETTER Q"; + break; + case U'r': + nm = U"LATIN SMALL LETTER R"; + break; + case U's': + nm = U"LATIN SMALL LETTER S"; + break; + case U't': + nm = U"LATIN SMALL LETTER T"; + break; + case U'u': + nm = U"LATIN SMALL LETTER U"; + break; + case U'v': + nm = U"LATIN SMALL LETTER V"; + break; + case U'w': + nm = U"LATIN SMALL LETTER W"; + break; + case U'x': + nm = U"LATIN SMALL LETTER X"; + break; + case U'y': + nm = U"LATIN SMALL LETTER Y"; + break; + case U'z': + nm = U"LATIN SMALL LETTER Z"; + break; + case U'{': + nm = U"LEFT CURLY BRACKET"; + break; + case U'|': + nm = U"VERTICAL LINE"; + break; + case U'}': + nm = U"RIGHT CURLY BRACKET"; + break; + case U'~': + nm = U"TILDE"; + break; + case U'\x7F': + nm = U"DELETE"; + break; + /* LATIN-1 SUPPLEMENT: */ + case U'\x80': + nm = U"<CONTROL>"; + break; + case U'\x81': + nm = U"<CONTROL>"; + break; + case U'\x82': + nm = U"BREAK PERMITTED HERE"; + break; + case U'\x83': + nm = U"NO BREAK HERE"; + break; + case U'\x84': + nm = U"<CONTROL>"; + break; + case U'\x85': + nm = U"NEXT LINE"; + break; + case U'\x86': + nm = U"START OF SELECTED AREA"; + break; + case U'\x87': + nm = U"END OF SELECTED AREA"; + break; + case U'\x88': + nm = U"CHARACTER TABULATION SET"; + break; + case U'\x89': + nm = U"CHARACTER TABULATION WITH JUSTIFICATION"; + break; + case U'\x8A': + nm = U"LINE TABULATION SET"; + break; + case U'\x8B': + nm = U"PARTIAL LINE FORWARD"; + break; + case U'\x8C': + nm = U"PARTIAL LINE BACKWARD"; + break; + case U'\x8D': + nm = U"REVERSE LINE FEED"; + break; + case U'\x8E': + nm = U"SINGLE SHIFT TWO"; + break; + case U'\x8F': + nm = U"SINGLE SHIFT THREE"; + break; + case U'\x90': + nm = U"DEVICE CONTROL STRING"; + break; + case U'\x91': + nm = U"PRIVATE USE ONE"; + break; + case U'\x92': + nm = U"PRIVATE USE TWO"; + break; + case U'\x93': + nm = U"SET TRANSMIT STATE"; + break; + case U'\x94': + nm = U"CANCEL CHARACTER"; + break; + case U'\x95': + nm = U"MESSAGE WAITING"; + break; + case U'\x96': + nm = U"START OF GUARDED AREA"; + break; + case U'\x97': + nm = U"END OF GUARDED AREA"; + break; + case U'\x98': + nm = U"START OF STRING"; + break; + case U'\x99': + nm = U"<CONTROL>"; + break; + case U'\x9A': + nm = U"SINGLE CHARACTER INTRODUCER"; + break; + case U'\x9B': + nm = U"CONTROL SEQUENCE INTRODUCER"; + break; + case U'\x9C': + nm = U"STRING TERMINATOR"; + break; + case U'\x9D': + nm = U"OPERATING SYSTEM COMMAND"; + break; + case U'\x9E': + nm = U"PRIVACY MESSAGE"; + break; + case U'\x9F': + nm = U"APPLICATION PROGRAM COMMAND"; + break; + case U'\xA0': + nm = U"NO-BREAK SPACE"; + break; + case U'\u00A1': + nm = U"INVERTED EXCLAMATION MARK"; + break; + case U'\u00A2': + nm = U"CENT SIGN"; + break; + case U'\u00A3': + nm = U"POUND SIGN"; + break; + case U'\u00A4': + nm = U"CURRENCY SIGN"; + break; + case U'\u00A5': + nm = U"YEN SIGN"; + break; + case U'\u00A6': + nm = U"BROKEN BAR"; + break; + case U'\u00A7': + nm = U"SECTION SIGN"; + break; + case U'\u00A8': + nm = U"DIAERESIS"; + break; + case U'\u00A9': + nm = U"COPYRIGHT SIGN"; + break; + case U'\u00AA': + nm = U"FEMININE ORDINAL INDICATOR"; + break; + case U'\u00AB': + nm = U"LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"; + break; + case U'\u00AC': + nm = U"NOT SIGN"; + break; + case U'\u00AD': + nm = U"SOFT HYPHEN"; + break; + case U'\u00AE': + nm = U"REGISTERED SIGN"; + break; + case U'\u00AF': + nm = U"MACRON"; + break; + case U'\u00B0': + nm = U"DEGREE SIGN"; + break; + case U'\u00B1': + nm = U"PLUS MINUS SYMBOL"; + break; + case U'\u00B2': + nm = U"SUPERSCRIPT TWO"; + break; + case U'\u00B3': + nm = U"SUPERSCRIPT THREE"; + break; + case U'\u00B4': + nm = U"ACUTE ACCENT"; + break; + case U'\u00B5': + nm = U"MICRO SIGN"; + break; + case U'\u00B6': + nm = U"PILCROW SIGN"; + break; + case U'\u00B7': + nm = U"MIDDLE DOT"; + break; + case U'\u00B8': + nm = U"CEDILLA"; + break; + case U'\u00B9': + nm = U"SUPERSCRIPT ONE"; + break; + case U'\u00BA': + nm = U"MASCULINE ORDINAL INDICATOR"; + break; + case U'\u00BB': + nm = U"RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"; + break; + case U'\u00BC': + nm = U"VULGAR FRACTION ONE QUARTER"; + break; + case U'\u00BD': + nm = U"VULGAR FRACTION ONE HALF"; + break; + case U'\u00BE': + nm = U"VULGAR FRACTION THREE QUARTERS"; + break; + case U'\u00BF': + nm = U"INVERTED QUESTION MARK"; + break; + case U'\u00C0': + nm = U"LATIN CAPITAL LETTER A WITH GRAVE"; + break; + case U'\u00C1': + nm = U"LATIN CAPITAL LETTER A WITH ACUTE"; + break; + case U'\u00C2': + nm = U"LATIN CAPITAL LETTER A WITH CIRCUMFLEX"; + break; + case U'\u00C3': + nm = U"LATIN CAPITAL LETTER A WITH TILDE"; + break; + case U'\u00C4': + nm = U"LATIN CAPITAL LETTER A WITH DIAERESIS"; + break; + case U'\u00C5': + nm = U"LATIN CAPITAL LETTER A WITH RING ABOVE"; + break; + case U'\u00C6': + nm = U"LATIN CAPITAL LETTER AE"; + break; + case U'\u00C7': + nm = U"LATIN CAPITAL LETTER C WITH CEDILLA"; + break; + case U'\u00C8': + nm = U"LATIN CAPITAL LETTER E WITH GRAVE"; + break; + case U'\u00C9': + nm = U"LATIN CAPITAL LETTER E WITH ACUTE"; + break; + case U'\u00CA': + nm = U"LATIN CAPITAL LETTER E WITH CIRCUMFLEX"; + break; + case U'\u00CB': + nm = U"LATIN CAPITAL LETTER E WITH DIAERESIS"; + break; + case U'\u00CC': + nm = U"LATIN CAPITAL LETTER I WITH GRAVE"; + break; + case U'\u00CD': + nm = U"LATIN CAPITAL LETTER I WITH ACUTE"; + break; + case U'\u00CE': + nm = U"LATIN CAPITAL LETTER I WITH CIRCUMFLEX"; + break; + case U'\u00CF': + nm = U"LATIN CAPITAL LETTER I WITH DIAERESIS"; + break; + case U'\u00D0': + nm = U"LATIN CAPITAL LETTER ETH"; + break; + case U'\u00D1': + nm = U"LATIN CAPITAL LETTER N WITH TILDE"; + break; + case U'\u00D2': + nm = U"LATIN CAPITAL LETTER O WITH GRAVE"; + break; + case U'\u00D3': + nm = U"LATIN CAPITAL LETTER O WITH ACUTE"; + break; + case U'\u00D4': + nm = U"LATIN CAPITAL LETTER O WITH CIRCUMFLEX"; + break; + case U'\u00D5': + nm = U"LATIN CAPITAL LETTER O WITH TILDE"; + break; + case U'\u00D6': + nm = U"LATIN CAPITAL LETTER O WITH DIAERESIS"; + break; + case U'\u00D7': + nm = U"MULTIPLICATION SIGN"; + break; + case U'\u00D8': + nm = U"LATIN CAPITAL LETTER O WITH STROKE"; + break; + case U'\u00D9': + nm = U"LATIN CAPITAL LETTER U WITH GRAVE"; + break; + case U'\u00DA': + nm = U"LATIN CAPITAL LETTER U WITH ACUTE"; + break; + case U'\u00DB': + nm = U"LATIN CAPITAL LETTER U WITH CIRCUMFLEX"; + break; + case U'\u00DC': + nm = U"LATIN CAPITAL LETTER U WITH DIAERESIS"; + break; + case U'\u00DD': + nm = U"LATIN CAPITAL LETTER Y WITH ACUTE"; + break; + case U'\u00DE': + nm = U"LATIN CAPITAL LETTER THORN"; + break; + case U'\u00DF': + nm = U"LATIN SMALL LETTER SHARP S"; + break; + case U'\u00E0': + nm = U"LATIN SMALL LETTER A WITH GRAVE"; + break; + case U'\u00E1': + nm = U"LATIN SMALL LETTER A WITH ACUTE"; + break; + case U'\u00E2': + nm = U"LATIN SMALL LETTER A WITH CIRCUMFLEX"; + break; + case U'\u00E3': + nm = U"LATIN SMALL LETTER A WITH TILDE"; + break; + case U'\u00E4': + nm = U"LATIN SMALL LETTER A WITH DIAERESIS"; + break; + case U'\u00E5': + nm = U"LATIN SMALL LETTER A WITH RING ABOVE"; + break; + case U'\u00E6': + nm = U"LATIN SMALL LETTER AE"; + break; + case U'\u00E7': + nm = U"LATIN SMALL LETTER C WITH CEDILLA"; + break; + case U'\u00E8': + nm = U"LATIN SMALL LETTER E WITH GRAVE"; + break; + case U'\u00E9': + nm = U"LATIN SMALL LETTER E WITH ACUTE"; + break; + case U'\u00EA': + nm = U"LATIN SMALL LETTER E WITH CIRCUMFLEX"; + break; + case U'\u00EB': + nm = U"LATIN SMALL LETTER E WITH DIAERESIS"; + break; + case U'\u00EC': + nm = U"LATIN SMALL LETTER I WITH GRAVE"; + break; + case U'\u00ED': + nm = U"LATIN SMALL LETTER I WITH ACUTE"; + break; + case U'\u00EE': + nm = U"LATIN SMALL LETTER I WITH CIRCUMFLEX"; + break; + case U'\u00EF': + nm = U"LATIN SMALL LETTER I WITH DIAERESIS"; + break; + case U'\u00F0': + nm = U"LATIN SMALL LETTER ETH"; + break; + case U'\u00F1': + nm = U"LATIN SMALL LETTER N WITH TILDE"; + break; + case U'\u00F2': + nm = U"LATIN SMALL LETTER O WITH GRAVE"; + break; + case U'\u00F3': + nm = U"LATIN SMALL LETTER O WITH ACUTE"; + break; + case U'\u00F4': + nm = U"LATIN SMALL LETTER O WITH CIRCUMFLEX"; + break; + case U'\u00F5': + nm = U"LATIN SMALL LETTER O WITH TILDE"; + break; + case U'\u00F6': + nm = U"LATIN SMALL LETTER O WITH DIAERESIS"; + break; + case U'\u00F7': + nm = U"DIVISION SIGN"; + break; + case U'\u00F8': + nm = U"LATIN SMALL LETTER O WITH STROKE"; + break; + case U'\u00F9': + nm = U"LATIN SMALL LETTER U WITH GRAVE"; + break; + case U'\u00FA': + nm = U"LATIN SMALL LETTER U WITH ACUTE"; + break; + case U'\u00FB': + nm = U"LATIN SMALL LETTER U WITH CIRCUMFLEX"; + break; + case U'\u00FC': + nm = U"U WITH TWO DOTS"; + break; + case U'\u00FD': + nm = U"LATIN SMALL LETTER Y WITH ACUTE"; + break; + case U'\u00FE': + nm = U"LATIN SMALL LETTER THORN"; + break; + case U'\u00FF': + nm = U"LATIN SMALL LETTER Y WITH DIAERESIS"; + break; + /* LATIN EXTENDED-A: */ + case U'\u0100': + nm = U"LATIN CAPITAL LETTER A WITH MACRON"; + break; + case U'\u0101': + nm = U"LATIN SMALL LETTER A WITH MACRON"; + break; + case U'\u0102': + nm = U"LATIN CAPITAL LETTER A WITH BREVE"; + break; + case U'\u0103': + nm = U"LATIN SMALL LETTER A WITH BREVE"; + break; + case U'\u0104': + nm = U"LATIN CAPITAL LETTER A WITH OGONEK"; + break; + case U'\u0105': + nm = U"LATIN SMALL LETTER A WITH OGONEK"; + break; + case U'\u0106': + nm = U"LATIN CAPITAL LETTER C WITH ACUTE"; + break; + case U'\u0107': + nm = U"LATIN SMALL LETTER C WITH ACUTE"; + break; + case U'\u0108': + nm = U"LATIN CAPITAL LETTER C WITH CIRCUMFLEX"; + break; + case U'\u0109': + nm = U"LATIN SMALL LETTER C WITH CIRCUMFLEX"; + break; + case U'\u010A': + nm = U"LATIN CAPITAL LETTER C WITH DOT ABOVE"; + break; + case U'\u010B': + nm = U"LATIN SMALL LETTER C WITH DOT ABOVE"; + break; + case U'\u010C': + nm = U"LATIN CAPITAL LETTER C WITH CARON"; + break; + case U'\u010D': + nm = U"LATIN SMALL LETTER C WITH CARON"; + break; + case U'\u010E': + nm = U"LATIN CAPITAL LETTER D WITH CARON"; + break; + case U'\u010F': + nm = U"LATIN SMALL LETTER D WITH CARON"; + break; + case U'\u0110': + nm = U"LATIN CAPITAL LETTER D WITH STROKE"; + break; + case U'\u0120': + nm = U"LATIN CAPITAL LETTER G WITH DOT ABOVE"; + break; + case U'\u0130': + nm = U"LATIN CAPITAL LETTER I WITH DOT ABOVE"; + break; + case U'\u0140': + nm = U"LATIN SMALL LETTER L WITH MIDDLE DOT"; + break; + case U'\u0150': + nm = U"LATIN CAPITAL LETTER O WITH DOUBLE ACUTE"; + break; + case U'\u0160': + nm = U"LATIN CAPITAL LETTER S WITH CARON"; + break; + case U'\u0170': + nm = U"LATIN CAPITAL LETTER U WITH DOUBLE ACUTE"; + break; + /* LATIN EXTENDED-B: */ + case U'\u0180': + nm = U"LATIN SMALL LETTER B WITH STROKE"; + break; + case U'\u0190': + nm = U"LATIN CAPITAL LETTER OPEN E"; + break; + case U'\u01A0': + nm = U"LATIN CAPITAL LETTER O WITH HORN"; + break; + case U'\u01B0': + nm = U"LATIN SMALL LETTER U WITH HORN"; + break; + case U'\u01C0': + nm = U"LATIN LETTER DENTAL CLICK"; + break; + case U'\u01D0': + nm = U"LATIN SMALL LETTER I WITH CARON"; + break; + case U'\u01E0': + nm = U"LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON"; + break; + case U'\u01F0': + nm = U"LATIN SMALL LETTER J WITH CARON"; + break; + case U'\u0200': + nm = U"LATIN CAPITAL LETTER A WITH DOUBLE GRAVE"; + break; + case U'\u0210': + nm = U"LATIN CAPITAL LETTER R WITH DOUBLE GRAVE"; + break; + case U'\u0220': + nm = U"LATIN CAPITAL LETTER N WITH LONG RIGHT LEG"; + break; + case U'\u0230': + nm = U"LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON"; + break; + case U'\u0240': + nm = U"LATIN SMALL LETTER Z WITH SWASH TAIL"; + break; + /* IPA EXTENSIONS: */ + case U'\u0250': + nm = U"LATIN SMALL LETTER TURNED A"; + break; + case U'\u0251': + nm = U"LATIN SMALL LETTER ALPHA"; + break; + case U'\u0252': + nm = U"LATIN SMALL LETTER TURNED ALPHA"; + break; + case U'\u0253': + nm = U"LATIN SMALL LETTER B WITH HOOK"; + break; + case U'\u0254': + nm = U"LATIN SMALL LETTER OPEN O"; + break; + case U'\u0255': + nm = U"LATIN SMALL LETTER C WITH CURL"; + break; + case U'\u0256': + nm = U"LATIN SMALL LETTER D WITH TAIL"; + break; + case U'\u0257': + nm = U"LATIN SMALL LETTER D WITH HOOK"; + break; + case U'\u0258': + nm = U"LATIN SMALL LETTER REVERSED E"; + break; + case U'\u0259': + nm = U"LATIN SMALL LETTER SCHWA"; + break; + case U'\u025A': + nm = U"LATIN SMALL LETTER SCHWA WITH HOOK"; + break; + case U'\u025B': + nm = U"LATIN SMALL LETTER OPEN E"; + break; + case U'\u025C': + nm = U"LATIN SMALL LETTER REVERSED OPEN E"; + break; + case U'\u025D': + nm = U"LATIN SMALL LETTER REVERSED OPEN E WITH HOOK"; + break; + case U'\u025E': + nm = U"LATIN SMALL LETTER CLOSED REVERSED OPEN E"; + break; + case U'\u025F': + nm = U"LATIN SMALL LETTER DOTLESS J WITH STROKE"; + break; + case U'\u0260': + nm = U"LATIN SMALL LETTER G WITH HOOK"; + break; + case U'\u0261': + nm = U"LATIN SMALL LETTER SCRIPT G"; + break; + case U'\u0262': + nm = U"LATIN LETTER SMALL CAPITAL G"; + break; + case U'\u0263': + nm = U"LATIN SMALL LETTER GAMMA"; + break; + case U'\u0264': + nm = U"LATIN SMALL LETTER RAMS HORN"; + break; + case U'\u0265': + nm = U"LATIN SMALL LETTER TURNED H"; + break; + case U'\u0266': + nm = U"LATIN SMALL LETTER H WITH HOOK"; + break; + case U'\u0267': + nm = U"LATIN SMALL LETTER HENG WITH HOOK"; + break; + case U'\u0268': + nm = U"LATIN SMALL LETTER I WITH STROKE"; + break; + case U'\u0269': + nm = U"LATIN SMALL LETTER IOTA"; + break; + case U'\u026A': + nm = U"LATIN LETTER SMALL CAPITAL I"; + break; + case U'\u026B': + nm = U"LATIN SMALL LETTER L WITH MIDDLE TILDE"; + break; + case U'\u026C': + nm = U"LATIN SMALL LETTER L WITH BELT"; + break; + case U'\u026D': + nm = U"LATIN SMALL LETTER L WITH RETROFLEX HOOK"; + break; + case U'\u026E': + nm = U"LATIN SMALL LETTER LEZH"; + break; + case U'\u026F': + nm = U"LATIN SMALL LETTER TURNED M"; + break; + case U'\u0270': + nm = U"LATIN SMALL LETTER TURNED M WITH LONG LEG"; + break; + case U'\u0271': + nm = U"LATIN SMALL LETTER M WITH HOOK"; + break; + case U'\u0272': + nm = U"LATIN SMALL LETTER N WITH LEFT HOOK"; + break; + case U'\u0273': + nm = U"LATIN SMALL LETTER N WITH RETROFLEX HOOK"; + break; + case U'\u0274': + nm = U"LATIN LETTER SMALL CAPITAL N"; + break; + case U'\u0275': + nm = U"LATIN SMALL LETTER BARRED O"; + break; + case U'\u0276': + nm = U"LATIN LETTER SMALL CAPITAL OE"; + break; + case U'\u0277': + nm = U"LATIN SMALL LETTER CLOSED OMEGA"; + break; + case U'\u0278': + nm = U"LATIN SMALL LETTER PHI"; + break; + case U'\u0279': + nm = U"LATIN SMALL LETTER TURNED R"; + break; + case U'\u027A': + nm = U"LATIN SMALL LETTER TURNED R WITH LONG LEG"; + break; + case U'\u027B': + nm = U"LATIN SMALL LETTER TURNED R WITH HOOK"; + break; + case U'\u027C': + nm = U"LATIN SMALL LETTER R WITH LONG LEG"; + break; + case U'\u027D': + nm = U"LATIN SMALL LETTER R WITH TAIL"; + break; + case U'\u027E': + nm = U"LATIN SMALL LETTER R WITH FISHHOOK"; + break; + case U'\u027F': + nm = U"LATIN SMALL LETTER REVERSED R WITH FISHHOOK"; + break; + case U'\u0280': + nm = U"LATIN LETTER SMALL CAPITAL R"; + break; + /* GREEK AND COPTIC: */ + case U'\u0370': + nm = U"GREEK CAPITAL LETTER HETA"; + break; + case U'\u0371': + nm = U"GREEK SMALL LETTER HETA"; + break; + case U'\u0372': + nm = U"GREEK CAPITAL LETTER ARCHAIC SAMPI"; + break; + case U'\u0373': + nm = U"GREEK SMALL LETTER ARCHAIC SAMPI"; + break; + case U'\u0374': + nm = U"GREEK NUMERAL SIGN"; + break; + case U'\u0375': + nm = U"GREEK LOWER NUMERAL SIGN"; + break; + case U'\u0376': + nm = U"GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA"; + break; + case U'\u0377': + nm = U"GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"; + break; + case U'\u037A': + nm = U"GREEK YPOGEGRAMMENI"; + break; + case U'\u037B': + nm = U"GREEK SMALL REVERSED LUNATE SIGMA SYMBOL"; + break; + case U'\u037C': + nm = U"GREEK SMALL DOTTED LUNATE SIGMA SYMBOL"; + break; + case U'\u037D': + nm = U"GREEK SMALL REVERSED DOTTED LUNATE SIGMAL SYMBOL"; + break; + case U'\u037E': + nm = U"GREEK QUESTION MARK"; + break; + case U'\u037F': + nm = U"GREEK CAPITAL LETTER YOT"; + break; + case U'\u0384': + nm = U"GREEK TONOS"; + break; + case U'\u0385': + nm = U"GREEK DIALYTIKA TONOS"; + break; + case U'\u0386': + nm = U"GREEK CAPITAL LETTER ALPHA WITH TONOS"; + break; + case U'\u0387': + nm = U"GREEK ANO TELEIA"; + break; + case U'\u0388': + nm = U"GREEK CAPITAL LETTER EPSILON WITH TONOS"; + break; + case U'\u0389': + nm = U"GREEK CAPITAL LETTER ETA WITH TONOS"; + break; + case U'\u038A': + nm = U"GREEK CAPITAL LETTER IOTA WITH TONOS"; + break; + case U'\u038C': + nm = U"GREEK CAPITAL LETTER OMICRON WITH TONOS"; + break; + case U'\u038E': + nm = U"GREEK CAPITAL LETTER USPILON WITH TONOS"; + break; + case U'\u038F': + nm = U"GREEK CAPITAL LETTER OMEGA WITH TONOS"; + break; + case U'\u0390': + nm = U"GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"; + break; + case U'\u0391': + nm = U"GREEK CAPITAL LETTER ALPHA"; + break; + case U'\u0392': + nm = U"GREEK CAPITAL LETTER BETA"; + break; + case U'\u0393': + nm = U"GREEK CAPITAL LETTER GAMMA"; + break; + case U'\u0394': + nm = U"GREEK CAPITAL LETTER DELTA"; + break; + case U'\u0395': + nm = U"GREEK CAPITAL LETTER EPSILON"; + break; + case U'\u0396': + nm = U"GREEK CAPITAL LETTER ZETA"; + break; + case U'\u0397': + nm = U"GREEK CAPITAL LETTER ETA"; + break; + case U'\u0398': + nm = U"GREEK CAPITAL LETTER THETA"; + break; + case U'\u0399': + nm = U"GREEK CAPITAL LETTER IOTA"; + break; + case U'\u039A': + nm = U"GREEK CAPITAL LETTER KAPPA"; + break; + case U'\u039B': + nm = U"GREEK CAPITAL LETTER LAMBDA"; + break; + case U'\u039C': + nm = U"GREEK CAPITAL LETTER MU"; + break; + case U'\u039D': + nm = U"GREEK CAPITAL LETTER NU"; + break; + case U'\u039E': + nm = U"GREEK CAPITAL LETTER XI"; + break; + case U'\u039F': + nm = U"GREEK CAPITAL LETTER OMICRON"; + break; + case U'\u03A0': + nm = U"GREEK CAPITAL LETTER PI"; + break; + case U'\u03A1': + nm = U"GREEK CAPITAL LETTER RHO"; + break; + case U'\u03A3': + nm = U"GREEK CAPITAL LETTER SIGMA"; + break; + case U'\u03A4': + nm = U"GREEK CAPITAL LETTER TAU"; + break; + case U'\u03A5': + nm = U"GREEK CAPITAL LETTER UPSILON"; + break; + case U'\u03A6': + nm = U"GREEK CAPITAL LETTER PHI"; + break; + case U'\u03A7': + nm = U"GREEK CAPITAL LETTER CHI"; + break; + case U'\u03A8': + nm = U"GREEK CAPITAL LETTER PSI"; + break; + case U'\u03A9': + nm = U"GREEK CAPITAL LETTER OMEGA"; + break; + case U'\u03AA': + nm = U"GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"; + break; + case U'\u03AB': + nm = U"GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"; + break; + case U'\u03AC': + nm = U"GREEK SMALL LETTER ALPHA WITH TONOS"; + break; + case U'\u03AD': + nm = U"GREEK SMALL LETTER EPSILON WITH TONOS"; + break; + case U'\u03AE': + nm = U"GREEK SMALL LETTER ETA WITH TONOS"; + break; + case U'\u03AF': + nm = U"GREEK SMALL LETTER IOTA WITH TONOS"; + break; + case U'\u03B0': + nm = U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"; + break; + case U'\u03B1': + nm = U"GREEK SMALL LETTER ALPHA"; + break; + case U'\u03B2': + nm = U"GREEK SMALL LETTER BETA"; + break; + case U'\u03B3': + nm = U"GREEK SMALL LETTER GAMMA"; + break; + case U'\u03B4': + nm = U"GREEK SMALL LETTER DELTA"; + break; + case U'\u03B5': + nm = U"GREEK SMALL LETTER EPSILON"; + break; + case U'\u03B6': + nm = U"GREEK SMALL LETTER ZETA"; + break; + case U'\u03B7': + nm = U"GREEK SMALL LETTER ETA"; + break; + case U'\u03B8': + nm = U"GREEK SMALL LETTER THETA"; + break; + case U'\u03B9': + nm = U"GREEK SMALL LETTER IOTA"; + break; + case U'\u03BA': + nm = U"GREEK SMALL LETTER KAPPA"; + break; + case U'\u03BB': + nm = U"GREEK SMALL LETTER LAMBDA"; + break; + case U'\u03BC': + nm = U"GREEK SMALL LETTER MU"; + break; + case U'\u03BD': + nm = U"GREEK SMALL LETTER NU"; + break; + case U'\u03BE': + nm = U"GREEK SMALL LETTER XI"; + break; + case U'\u03BF': + nm = U"GREEK SMALL LETTER OMICRON"; + break; + case U'\u03C0': + nm = U"GREEK SMALL LETTER PI"; + break; + case U'\u03C1': + nm = U"GREEK SMALL LETTER RHO"; + break; + case U'\u03C2': + nm = U"GREEK SMALL LETTER FINAL SIGMA"; + break; + case U'\u03C3': + nm = U"GREEK SMALL LETTER SIGMA"; + break; + case U'\u03C4': + nm = U"GREEK SMALL LETTER TAU"; + break; + case U'\u03C5': + nm = U"GREEK SMALL LETTER UPSILON"; + break; + case U'\u03C6': + nm = U"GREEK SMALL LETTER PHI"; + break; + case U'\u03C7': + nm = U"GREEK SMALL LETTER CHI"; + break; + case U'\u03C8': + nm = U"GREEK SMALL LETTER PSI"; + break; + case U'\u03C9': + nm = U"GREEK SMALL LETTER OMEGA"; + break; + case U'\u03CA': + nm = U"GREEK SMALL LETTER IOTA WITH DIALYTIKA"; + break; + case U'\u03CB': + nm = U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA"; + break; + case U'\u03CC': + nm = U"GREEK SMALL LETTER OMICRON WITH TONOS"; + break; + case U'\u03CD': + nm = U"GREEK SMALL LETTER UPSILON WITH TONOS"; + break; + case U'\u03CE': + nm = U"GREEK SMALL LETTER OMEGA WITH TONOS"; + break; + case U'\u03CF': + nm = U"GREEK CAPITAL KAI SYMBOL"; + break; + case U'\u03D0': + nm = U"GREEK BETA SYMBOL"; + break; + case U'\u03D1': + nm = U"GREEK THETA SYMBOL"; + break; + case U'\u03D2': + nm = U"GREEK UPSILON WITH HOOK SYMBOL"; + break; + case U'\u03D3': + nm = U"GREEK UPSILON WITH ACUTE AND HOOK SYMBOL"; + break; + case U'\u03D4': + nm = U"GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL"; + break; + case U'\u03D5': + nm = U"GREEK PHI SYMBOL"; + break; + case U'\u03D6': + nm = U"GREEK PI SYMBOL"; + break; + case U'\u03D7': + nm = U"GREEK KAI SYMBOL"; + break; + case U'\u03D8': + nm = U"GREEK LETTER ARCHAIC KOPPA"; + break; + case U'\u03D9': + nm = U"GREEK SMALL LETTER ARCHAIC KOPPA"; + break; + case U'\u03DA': + nm = U"GREEK LETTER STIGMA"; + break; + case U'\u03DB': + nm = U"GREEK SMALL LETTER STIGMA"; + break; + case U'\u03DC': + nm = U"GREEK LETTER DIGAMMA"; + break; + case U'\u03DD': + nm = U"GREEK SMALL LETTER DIGAMMA"; + break; + case U'\u03DE': + nm = U"GREEK LETTER KOPPA"; + break; + case U'\u03DF': + nm = U"GREEK SMALL LETTER KOPPA"; + break; + case U'\u03E0': + nm = U"GREEK LETTER SAMPI"; + break; + case U'\u03F0': + nm = U"GREEK KAPPA SYMBOL"; + break; + /* HEBREW: */ + case U'\u05D0': + nm = U"HEBREW LETTER ALEF"; + break; + case U'\u05D1': + nm = U"HEBREW LETTER BET"; + break; + case U'\u05D2': + nm = U"HEBREW LETTER GIMEL"; + break; + case U'\u05D3': + nm = U"HEBREW LETTER DALET"; + break; + case U'\u05D4': + nm = U"HEBREW LETTER HE"; + break; + case U'\u05D5': + nm = U"HEBREW LETTER VAV"; + break; + case U'\u05D6': + nm = U"HEBREW LETTER ZAYIN"; + break; + case U'\u05D7': + nm = U"HEBREW LETTER HET"; + break; + case U'\u05D8': + nm = U"HEBREW LETTER TET"; + break; + case U'\u05D9': + nm = U"HEBREW LETTER YOD"; + break; + case U'\u05DA': + nm = U"HEBREW LETTER FINAL KAF"; + break; + case U'\u05DB': + nm = U"HEBREW LETTER KAF"; + break; + case U'\u05DC': + nm = U"HEBREW LETTER LAMED"; + break; + case U'\u05DD': + nm = U"HEBREW LETTER FINAL MEM"; + break; + case U'\u05DE': + nm = U"HEBREW LETTER MEM"; + break; + case U'\u05DF': + nm = U"HEBREW LETTER FINAL NUN"; + break; + case U'\u05E0': + nm = U"HEBREW LETTER NUN"; + break; + case U'\u05E1': + nm = U"HEBREW LETTER SAMEKH"; + break; + case U'\u05E2': + nm = U"HEBREW LETTER AYIN"; + break; + case U'\u05E3': + nm = U"HEBREW LETTER FINAL PE"; + break; + case U'\u05E4': + nm = U"HEBREW LETTER PE"; + break; + case U'\u05E5': + nm = U"HEBREW LETTER FINAL TSADI"; + break; + case U'\u05E6': + nm = U"HEBREW LETTER TSADI"; + break; + case U'\u05E7': + nm = U"HEBREW LETTER QOF"; + break; + case U'\u05E8': + nm = U"HEBREW LETTER RESH"; + break; + case U'\u05E9': + nm = U"HEBREW LETTER SHIN"; + break; + case U'\u05EA': + nm = U"HEBREW LETTER TAV"; + break; + case U'\u05EF': + nm = U"HEBREW YOD TRIANGLE"; + break; + /* CYRILLIC: */ + case U'\u0400': + nm = U"CYRILLIC CAPITAL LETTER LE WITH GRAVE"; + break; + case U'\u0401': + nm = U"CYRILLIC CAPITAL LETTER LO"; + break; + case U'\u0402': + nm = U"CYRILLIC CAPITAL LETTER DJE"; + break; + case U'\u0403': + nm = U"CYRILLIC CAPITAL LETTER GJE"; + break; + case U'\u0404': + nm = U"CYRILLIC CAPITAL LETTER UKRAINIAN LE"; + break; + case U'\u0405': + nm = U"CYRILLIC CAPITAL LETTER DZE"; + break; + case U'\u0406': + nm = U"CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I"; + break; + case U'\u0407': + nm = U"CYRILLIC CAPITAL LETTER YI"; + break; + case U'\u0408': + nm = U"CYRILLIC CAPITAL LETTER JE"; + break; + case U'\u0409': + nm = U"CYRILLIC CAPITAL LETTER LJE"; + break; + case U'\u040A': + nm = U"CYRILLIC CAPITAL LETTER NJE"; + break; + case U'\u040B': + nm = U"CYRILLIC CAPITAL LETTER TSHE"; + break; + case U'\u040C': + nm = U"CYRILLIC CAPITAL LETTER KJE"; + break; + case U'\u040D': + nm = U"CYRILLIC CAPITAL LETTER I WITH GRAVE"; + break; + case U'\u040E': + nm = U"CYRILLIC CAPITAL LETTER SHORT U"; + break; + case U'\u040F': + nm = U"CYRILLIC CAPITAL LETTER DZHE"; + break; + case U'\u0410': + nm = U"CYRILLIC CAPITAL LETTER A"; + break; + case U'\u0420': + nm = U"CYRILLIC CAPITAL LETTER ER"; + break; + case U'\u0430': + nm = U"CYRILLIC SMALL LETTER A"; + break; + case U'\u0440': + nm = U"CYRILLIC SMALL LETTER ER"; + break; + case U'\u0450': + nm = U"CYRILLIC SMALL LETTER LE WITH GRAVE"; + break; + case U'\u0460': + nm = U"CYRILLIC CAPITAL LETTER OMEGA"; + break; + case U'\u0470': + nm = U"CYRILLIC CAPITAL LETTER PSI"; + break; + case U'\u0480': + nm = U"CYRILLIC CAPITAL LETTER KOPPA"; + break; + case U'\u0490': + nm = U"CYRILLIC CAPITAL LETTER GHE WITH UPTURN"; + break; + case U'\u04A0': + nm = U"CYRILLIC CAPITAL LETTER BASHKIR KA"; + break; + case U'\u04B0': + nm = U"CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE"; + break; + case U'\u04C0': + nm = U"CYRILLIC LETTER PALOCHKA"; + break; + case U'\u04D0': + nm = U"CYRILLIC CAPITAL LETTER A WITH BREVE"; + break; + case U'\u04E0': + nm = U"CYRILLIC CAPITAL LETTER ABKHASIAN DZE"; + break; + case U'\u04F0': + nm = U"CYRILLIC CAPITAL LETTER U WITH DIAERESIS"; + break; + /* SYRIAC SUPPLEMENT: */ + case U'\u0860': + nm = U"SYRIAC LETTER MALAYALAM NGA"; + break; + case U'\u0861': + nm = U"SYRIAC LETTER MALAYALAM JA"; + break; + case U'\u0862': + nm = U"SYRIAC LETTER MALAYALAM NYA"; + break; + case U'\u0863': + nm = U"SYRIAC LETTER MALAYALAM TTA"; + break; + case U'\u0864': + nm = U"SYRIAC LETTER MALAYALAM NNA"; + break; + case U'\u0865': + nm = U"SYRIAC LETTER MALAYALAM NNNA"; + break; + case U'\u0866': + nm = U"SYRIAC LETTER MALAYALAM BHA"; + break; + case U'\u0867': + nm = U"SYRIAC LETTER MALAYALAM RA"; + break; + case U'\u0868': + nm = U"SYRIAC LETTER MALAYALAM LLA"; + break; + case U'\u0869': + nm = U"SYRIAC LETTER MALAYALAM LLLA"; + break; + case U'\u086A': + nm = U"SYRIAC LETTER MALAYALAM SSA"; + break; + /* RUNIC: */ + case U'\u16A0': + nm = U"RUNIC LETTER FEHU FEOH FE F"; + break; + case U'\u16A1': + nm = U"RUNIC LETTER V"; + break; + case U'\u16A2': + nm = U"RUNIC LETTER URUZ UR U"; + break; + case U'\u16A3': + nm = U"RUNIC LETTER YR"; + break; + case U'\u16A4': + nm = U"RUNIC LETTER Y"; + break; + case U'\u16A5': + nm = U"RUNIC LETTER W"; + break; + case U'\u16A6': + nm = U"RUNIC LETTER THURISAZ THURS THORN"; + break; + case U'\u16A7': + nm = U"RUNIC LETTER ETH"; + break; + case U'\u16A8': + nm = U"RUNIC LETTER ANSUZ A"; + break; + case U'\u16A9': + nm = U"RUNIC LETTER OS O"; + break; + case U'\u16AA': + nm = U"RUNIC LETTER AC A"; + break; + case U'\u16AB': + nm = U"RUNIC LETTER AESC"; + break; + case U'\u16AC': + nm = U"RUNIC LETTER LONG-BRANCHED-OSS O"; + break; + case U'\u16AD': + nm = U"RUNIC LETTER SHORT-TWIG-OSS O"; + break; + case U'\u16AE': + nm = U"RUNIC LETTER O"; + break; + case U'\u16AF': + nm = U"RUNIC LETTER OE"; + break; + case U'\u16B0': + nm = U"RUNIC LETTER ON"; + break; + case U'\u16C0': + nm = U"RUNIC LETTER DOTTED-N"; + break; + case U'\u16D0': + nm = U"RUNIC LETTER SHORT-TWIG-TYR T"; + break; + case U'\u16E0': + nm = U"RUNIC LETTER EAR"; + break; + case U'\u16F0': + nm = U"RUNIC BELGTHOR SYMBOL"; + break; + /* CYRILLIC EXTENDED C: */ + case U'\u1C80': + nm = U"CYRILLIC SMALL LETTER ROUNDED VE"; + break; + case U'\u1C81': + nm = U"CYRILLIC SMALL LETTER LONG-LEGGED DE"; + break; + case U'\u1C82': + nm = U"CYRILLIC SMALL LETTER NARROW O"; + break; + case U'\u1C83': + nm = U"CYRILLIC SMALL LETTER WIDE ES"; + break; + case U'\u1C84': + nm = U"CYRILLIC SMALL LETTER TALL TE"; + break; + case U'\u1C85': + nm = U"CYRILLIC SMALL LETTER THREE-LEGGED TE"; + break; + case U'\u1C86': + nm = U"CYRILLIC SMALL LETTER TALL HARD SIGN"; + break; + case U'\u1C87': + nm = U"CYRILLIC SMALL LETTER TALL YAT"; + break; + case U'\u1C88': + nm = U"CYRILLIC SMALL LETTER UNBLENDED UK"; + break; + /* GENERAL PUNCTUATION: */ + case U'\u2000': + nm = U"EN QUAD"; + break; + case U'\u2001': + nm = U"EM QUAD"; + break; + case U'\u2002': + nm = U"EN SPACE"; + break; + case U'\u2003': + nm = U"EM SPACE"; + break; + case U'\u2004': + nm = U"THREE-PER-EM SPACE"; + break; + case U'\u2005': + nm = U"FOUR-PER-EM SPACE"; + break; + case U'\u2006': + nm = U"SIX-PER-EM SPACE"; + break; + case U'\u2007': + nm = U"FIGURE SPACE"; + break; + case U'\u2008': + nm = U"PUNCTUATION SPACE"; + break; + case U'\u2009': + nm = U"THIN SPACE"; + break; + case U'\u200A': + nm = U"HAIR SPACE"; + break; + case U'\u203C': + nm = U"DOUBLE EXCLAMATION MARK"; + break; + case U'\u2047': + nm = U"DOUBLE QUOTATION MARK"; + break; + case U'\u2048': + nm = U"QUESTION EXCLAMATION MARK"; + break; + case U'\u2049': + nm = U"EXCLAMATION QUESTION MARK"; + break; + /* CURRENCY SYMBOLS: */ + case U'\u20A0': + nm = U"EURO-CURRENCY SIGN"; + break; + case U'\u20A1': + nm = U"COLON SIGN"; + break; + case U'\u20A2': + nm = U"CRUZEIRO SIGN"; + break; + case U'\u20A3': + nm = U"FRENCH FRANC SIGN"; + break; + case U'\u20A4': + nm = U"LIRA SIGN"; + break; + case U'\u20A5': + nm = U"MILL SIGN"; + break; + case U'\u20A6': + nm = U"NAIRA SIGN"; + break; + case U'\u20A7': + nm = U"PESETA SIGN"; + break; + case U'\u20A8': + nm = U"RUPEE SIGN"; + break; + case U'\u20A9': + nm = U"WON SIGN"; + break; + case U'\u20AA': + nm = U"NEW SHEQEL SIGN"; + break; + case U'\u20AB': + nm = U"DONG SIGN"; + break; + case U'\u20AC': + nm = U"EURO SIGN"; + break; + case U'\u20AD': + nm = U"KIP SIGN"; + break; + case U'\u20AE': + nm = U"TUGRIK SIGN"; + break; + case U'\u20AF': + nm = U"DRACHMA SIGN"; + break; + case U'\u20B0': + nm = U"GERMAN PENNY SIGN"; + break; + case U'\u20B1': + nm = U"PESO SIGN"; + break; + case U'\u20B2': + nm = U"GUARANI SIGN"; + break; + case U'\u20B3': + nm = U"AUSTRAL SIGN"; + break; + case U'\u20B4': + nm = U"HRYVNIA SIGN"; + break; + case U'\u20B5': + nm = U"CEDI SIGN"; + break; + case U'\u20B6': + nm = U"LIVRE TOURNOIS SIGN"; + break; + case U'\u20B7': + nm = U"SPESMILO SIGN"; + break; + case U'\u20B8': + nm = U"TENGE SIGN"; + break; + case U'\u20BA': + nm = U"TURKISH LIRA SIGN"; + break; + case U'\u20BB': + nm = U"NORDIC MARK SIGN"; + break; + case U'\u20BC': + nm = U"MANAT SIGN"; + break; + case U'\u20BD': + nm = U"RUBLE SYMBOL"; + break; + case U'\u20BE': + nm = U"LARI SIGN"; + break; + case U'\u20BF': + nm = U"BITCOIN SIGN"; + break; + /* LETTERLIKE SYMBOLS: */ + case U'\u2100': + nm = U"ACCOUNT OF"; + break; + case U'\u2101': + nm = U"ADRESSED TO THE SUBJECT"; + break; + case U'\u2102': + nm = U"DOUBLE-STRUCK CAPITAL C"; + break; + case U'\u2103': + nm = U"DEGREE CELSIUS"; + break; + case U'\u2104': + nm = U"CENTRE LINE SYMBOL"; + break; + case U'\u2105': + nm = U"CARE OF"; + break; + case U'\u2106': + nm = U"CADA UNA"; + break; + case U'\u2107': + nm = U"EULER CONSTANT"; + break; + case U'\u2108': + nm = U"SCRUPLE"; + break; + case U'\u2109': + nm = U"DEGREE FAHRENHEIT"; + break; + case U'\u210A': + nm = U"SCRIPT SMALL G"; + break; + case U'\u210B': + nm = U"SCRIPT CAPITAL H"; + break; + case U'\u210C': + nm = U"BLACK-LETTER CAPITAL H"; + break; + case U'\u210D': + nm = U"DOUBLE-STRUCK CAPITAL H"; + break; + case U'\u210E': + nm = U"PLANCK CONSTANT"; + break; + case U'\u210F': + nm = U"PLANCK CONSTANT OVER TWO PI"; + break; + case U'\u2110': + nm = U"SCRIPT CAPITAL I"; + break; + case U'\u2111': + nm = U"BLACK-LETTER CAPITAL I"; + break; + case U'\u2112': + nm = U"SCRIPT CAPITAL L"; + break; + case U'\u2113': + nm = U"SCRIPT SMALL L"; + break; + case U'\u2114': + nm = U"L B BAR SYMBOL"; + break; + case U'\u2115': + nm = U"DOUBLE-STRUCK CAPITAL N"; + break; + case U'\u2116': + nm = U"NUMERO SIGN"; + break; + case U'\u2117': + nm = U"SOUND RECORDING COPYRIGHT"; + break; + case U'\u2118': + nm = U"SCRIPT CAPITAL P"; + break; + case U'\u2119': + nm = U"DOUBLE-STRUCK CAPITAL P"; + break; + case U'\u211A': + nm = U"DOUBLE-STRUCK CAPITAL Q"; + break; + case U'\u211B': + nm = U"SCRIPT CAPITAL R"; + break; + case U'\u211C': + nm = U"BLACK-LETTER CAPITAL R"; + break; + case U'\u211D': + nm = U"DOUBLE-STRUCK CAPITAL R"; + break; + case U'\u211E': + nm = U"PRESCRIPTION TAKE"; + break; + case U'\u211F': + nm = U"RESPONSE"; + break; + case U'\u2120': + nm = U"SERVICE MARK"; + break; + case U'\u2121': + nm = U"TELEPHONE SIGN"; + break; + case U'\u2122': + nm = U"TRADE MARK SIGN"; + break; + case U'\u2123': + nm = U"VERSICLE"; + break; + case U'\u2124': + nm = U"DOUBLE-STRUCK CAPITAL Z"; + break; + case U'\u2125': + nm = U"OUNCE SIGN"; + break; + case U'\u2126': + nm = U"OHM SIGN"; + break; + case U'\u2127': + nm = U"INVERTED OHM SIGN"; + break; + case U'\u2128': + nm = U"BLACK-LETTER CAPITAL Z"; + break; + case U'\u2129': + nm = U"TURNED GREEK SMALL LETTER IOTA"; + break; + case U'\u212A': + nm = U"KELVIN SIGN"; + break; + case U'\u212B': + nm = U"ANGSTROM SIGN"; + break; + case U'\u212C': + nm = U"SCRIPT CAPITAL B"; + break; + case U'\u212D': + nm = U"BLACK-LETTER CAPITAL C"; + break; + case U'\u212E': + nm = U"ESTIMATED SYMBOL"; + break; + case U'\u212F': + nm = U"SCRIPT SMALL E"; + break; + case U'\u2130': + nm = U"SCRIPT CAPITAL E"; + break; + case U'\u2131': + nm = U"SCRIPT CAPITAL F"; + break; + case U'\u2132': + nm = U"TURNED CAPITAL F"; + break; + case U'\u2133': + nm = U"SCRIPT CAPITAL M"; + break; + case U'\u2134': + nm = U"SCRIPT SMALL O"; + break; + case U'\u2135': + nm = U"ALEF SYMBOL"; + break; + case U'\u2136': + nm = U"BET SYMBOL"; + break; + case U'\u2137': + nm = U"GIMEL SYMBOL"; + break; + case U'\u2138': + nm = U"DALET SYMBOL"; + break; + case U'\u2139': + nm = U"INFORMATION SOURCE"; + break; + case U'\u213A': + nm = U"ROTATED CAPITAL Q"; + break; + case U'\u213B': + nm = U"FACSIMILE SIGN"; + break; + case U'\u213C': + nm = U"DOUBLE-STRUCK SMALL PI"; + break; + case U'\u213D': + nm = U"DOUBLE-STRUCK SMALL GAMMA"; + break; + case U'\u213E': + nm = U"DOUBLE-STRUCK CAPITAL GAMMA"; + break; + case U'\u213F': + nm = U"DOUBLE-STRUCK CAPITAL PI"; + break; + case U'\u2140': + nm = U"DOUBLE-STRUCK N-ARY SUMMATION"; + break; + case U'\u2141': + nm = U"TURNED SANS-SERIF CAPITAL G"; + break; + case U'\u2142': + nm = U"TURNED SANS-SERIF CAPITAL L"; + break; + case U'\u2143': + nm = U"REVERSED SANS-SERIF CAPITAL L"; + break; + case U'\u2144': + nm = U"TURNED SANS-SERIF CAPITAL Y"; + break; + case U'\u2145': + nm = U"DOUBLE-STRUCK ITALIC CAPITAL D"; + break; + case U'\u2146': + nm = U"DOUBLE-STRUCK ITALIC SMALL D"; + break; + case U'\u2147': + nm = U"DOUBLE-STRUCK ITALIC SMALL E"; + break; + case U'\u2148': + nm = U"DOUBLE-STRUCK ITALIC SMALL I"; + break; + case U'\u2149': + nm = U"DOUBLE-STRUCK ITALIC SMALL J"; + break; + case U'\u214A': + nm = U"PROPERTY LINE"; + break; + case U'\u214B': + nm = U"TURNED AMPERSAND"; + break; + case U'\u214C': + nm = U"PER SIGN"; + break; + case U'\u214D': + nm = U"AKTIESELSKAB"; + break; + case U'\u214E': + nm = U"TURNED SMALL F"; + break; + case U'\u214F': + nm = U"SYMBOL FOR SAMARITAN SOURCE"; + break; + /* NUMBER FORMS: */ + case U'\u2150': + nm = U"VULGAR FRACTION ONE SEVENTH"; + break; + case U'\u2151': + nm = U"VULGAR FRACTION ONE NINTH"; + break; + case U'\u2152': + nm = U"VULGAR FRACTION ONE TENTH"; + break; + case U'\u2153': + nm = U"VULGAR FRACTION ONE THIRD"; + break; + case U'\u2154': + nm = U"VULGAR FRACTION TWO THIRDS"; + break; + case U'\u2155': + nm = U"VULGAR FRACTION ONE FIFTH"; + break; + case U'\u2156': + nm = U"VULGAR FRACTION TWO FIFTHS"; + break; + case U'\u2157': + nm = U"VULGAR FRACTION THREE FIFTHS"; + break; + case U'\u2158': + nm = U"VULGAR FRACTION FOUR FIFTHS"; + break; + case U'\u2159': + nm = U"VULGAR FRACTION ONE SIXTH"; + break; + case U'\u215A': + nm = U"VULGAR FRACTION FIVE SIXTHS"; + break; + case U'\u215B': + nm = U"VULGAR FRACTION ONE EIGTH"; + break; + case U'\u215C': + nm = U"VULGAR FRACTION THREE EIGTHS"; + break; + case U'\u215D': + nm = U"VULGAR FRACTION FIVE EIGHTS"; + break; + case U'\u215E': + nm = U"VULGAR FRACTION SEVEN EIGTHS"; + break; + case U'\u215F': + nm = U"FRACTION NUMERATOR ONE"; + break; + case U'\u2160': + nm = U"ROMAN NUMERAL ONE"; + break; + case U'\u2161': + nm = U"ROMAN NUMERAL TWO"; + break; + case U'\u2162': + nm = U"ROMAN NUMERAL THREE"; + break; + case U'\u2163': + nm = U"ROMAN NUMERAL FOUR"; + break; + case U'\u2164': + nm = U"ROMAN NUMERAL FIVE"; + break; + case U'\u2165': + nm = U"ROMAN NUMERAL SIX"; + break; + case U'\u2166': + nm = U"ROMAN NUMERAL SEVEN"; + break; + case U'\u2167': + nm = U"ROMAN NUMERAL EIGHT"; + break; + case U'\u2168': + nm = U"ROMAN NUMERAL NINE"; + break; + case U'\u2169': + nm = U"ROMAN NUMERAL TEN"; + break; + case U'\u216A': + nm = U"ROMAN NUMERAL ELEVEN"; + break; + case U'\u216B': + nm = U"ROMAN NUMERAL TWELVE"; + break; + case U'\u216C': + nm = U"ROMAN NUMERAL FIFTY"; + break; + case U'\u216D': + nm = U"ROMAN NUMERAL ONE HUNDRED"; + break; + case U'\u216E': + nm = U"ROMAN NUMERAL FIVE HUNDRED"; + break; + case U'\u216F': + nm = U"ROMAN NUMERAL ONE THOUSAND"; + break; + case U'\u2170': + nm = U"SMALL ROMAN NUMERAL ONE"; + break; + case U'\u2171': + nm = U"SMALL ROMAN NUMERAL TWO"; + break; + case U'\u2172': + nm = U"SMALL ROMAN NUMERAL THREE"; + break; + case U'\u2173': + nm = U"SMALL ROMAN NUMERAL FOUR"; + break; + case U'\u2174': + nm = U"SMALL ROMAN NUMERAL FIVE"; + break; + case U'\u2175': + nm = U"SMALL ROMAN NUMERAL SIX"; + break; + case U'\u2176': + nm = U"SMALL ROMAN NUMERAL SEVEN"; + break; + case U'\u2177': + nm = U"SMALL ROMAN NUMERAL EIGHT"; + break; + case U'\u2178': + nm = U"SMALL ROMAN NUMERAL NINE"; + break; + case U'\u2179': + nm = U"SMALL ROMAN NUMERAL TEN"; + break; + case U'\u217A': + nm = U"SMALL ROMAN NUMERAL ELEVEN"; + break; + case U'\u217B': + nm = U"SMALL ROMAN NUMERAL TWELVE"; + break; + case U'\u217C': + nm = U"SMALL ROMAN NUMERAL FIFTY"; + break; + case U'\u217D': + nm = U"SMALL ROMAN NUMERAL ONE HUNDRED"; + break; + case U'\u217E': + nm = U"SMALL ROMAN NUMERAL FIVE HUNDRED"; + break; + case U'\u217F': + nm = U"SMALL ROMAN NUMERAL ONE THOUSAND"; + break; + case U'\u2180': + nm = U"ROMAN NUMERAL ONE THOUSAND C D"; + break; + case U'\u2181': + nm = U"ROMAN NUMERAL FIVE THOUSAND"; + break; + case U'\u2182': + nm = U"ROMAN NUMERAL TEN THOUSAND"; + break; + case U'\u2183': + nm = U"ROMAN NUMERAL REVERSED ONE HUNDRED"; + break; + case U'\u2184': + nm = U"LATIN SMALL LETTER REVERSED C"; + break; + case U'\u2185': + nm = U"ROMAN NUMERAL SIX LATE FORM"; + break; + case U'\u2186': + nm = U"ROMAN NUMERAL FIFTY EARLY FORM"; + break; + case U'\u2187': + nm = U"ROMAN NUMERAL FIFTY THOUSAND"; + break; + case U'\u2188': + nm = U"ROMAN NUMERAL ONE HUNDRED THOUSAND"; + break; + case U'\u2189': + nm = U"VULGAR FRACTION ZERO THIRDS"; + break; + case U'\u218A': + nm = U"TURNED DIGIT TWO"; + break; + case U'\u218B': + nm = U"TURNED DIGIT THREE"; + break; + /* MISCELLANEOUS SYMBOLS: */ + case U'\u26B9': + nm = U"SEXTILE"; + break; + /* DINGBATS: */ + case U'\u271D': + nm = U"LATIN CROSS"; + break; + case U'\u2721': + nm = U"STAR OF DAVID"; + break; + /* SUPPLEMENTAL PUNCTUATION: */ + case U'\u2E3B': + nm = U"THREE-EM DASH"; + break; + /* ARABIC PRESENTATION FORMS-A: */ + case U'\uFDFD': + nm = U"ARABIC LIGATURE BISMILLAH AL-RAHMAN AR-RAHEEM"; + break; + /* ANCIENT SYMBOLS: */ + case U'\U00010190': + nm = U"ROMAN SEXTANS SIGN"; + break; + case U'\U00010191': + nm = U"ROMAN UNCIA SIGN"; + break; + case U'\U00010192': + nm = U"ROMAN SEMUNCIA SIGN"; + break; + case U'\U00010193': + nm = U"ROMAN SEXTULA SIGN"; + break; + case U'\U00010194': + nm = U"ROMAN DIMIDIA SEXTULA SIGN"; + break; + case U'\U00010195': + nm = U"ROMAN SILIQUA SIGN"; + break; + case U'\U00010196': + nm = U"ROMAN DENARIUS SIGN"; + break; + case U'\U00010197': + nm = U"ROMAN QUINARIUS SIGN"; + break; + case U'\U00010198': + nm = U"ROMAN SESTERTIUS SIGN"; + break; + case U'\U00010199': + nm = U"ROMAN DUPONDIUS SIGN"; + break; + case U'\U0001019A': + nm = U"ROMAN AS SIGN"; + break; + case U'\U0001019B': + nm = U"ROMAN CENTURIAL SIGN"; + break; + case U'\U0001019C': + nm = U"ASCIA SIGN"; + break; + /* BRAHMI: */ + case U'\U00011066': + nm = U"BRAHMI DIGIT ZERO"; + break; + case U'\U00011067': + nm = U"BRAHMI DIGIT ONE"; + break; + case U'\U00011068': + nm = U"BRAHMI DIGIT TWO"; + break; + case U'\U00011069': + nm = U"BRAHMI DIGIT THREE"; + break; + case U'\U0001106A': + nm = U"BRAHMI DIGIT FOUR"; + break; + case U'\U0001106B': + nm = U"BRAHMI DIGIT FIVE"; + break; + case U'\U0001106C': + nm = U"BRAHMI DIGIT SIX"; + break; + case U'\U0001106D': + nm = U"BRAHMI DIGIT SEVEN"; + break; + case U'\U0001106E': + nm = U"BRAHMI DIGIT EIGHT"; + break; + case U'\U0001106F': + nm = U"BRAHMI DIGIT NINE"; + break; + /* CUNEIFORM: */ + case U'\U00012031': + nm = U"CUNEIFORM SIGN AN PLUS NAGA SQUARED"; + break; + /* CUNEIFORM NUMBERS AND PUNCTUATION: */ + case U'\U0001242B': + nm = U"CUNEIFORM NUMERIC SIGN NINE SHAR2"; + break; + /* EGYPTIAN HIEROGLYPHS: */ + case U'\U000130B8': + nm = U"EGYPTIAN HIEROGLYPH D052"; + break; + /* COUNTING ROD NUMERALS: */ + case U'\U0001D372': + nm = U"IDEOGRAPHIC TALLY MARK ONE"; + break; + case U'\U0001D373': + nm = U"IDEOGRAPHIC TALLY MARK TWO"; + break; + case U'\U0001D374': + nm = U"IDEOGRAPHIC TALLY MARK THREE"; + break; + case U'\U0001D375': + nm = U"IDEOGRAPHIC TALLY MARK FOUR"; + break; + case U'\U0001D376': + nm = U"IDEOGRAPHIC TALLY MARK FIVE"; + break; + case U'\U0001D377': + nm = U"TALLY MARK ONE"; + break; + case U'\U0001D378': + nm = U"TALLY MARK FIVE"; + break; + /* ENCLOSED ALPHANUMERIC SUPPLEMENT: */ + case U'\U0001F10D': + nm = U"CIRCLED ZERO WITH SLASH"; + break; + case U'\U0001F10E': + nm = U"CIRCLED ANTICKLOCKWISE ARROW"; + break; + case U'\U0001F10F': + nm = U"CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH"; + break; + case U'\U0001F12F': + nm = U"COPYLEFT SYMBOL"; + break; + case U'\U0001F16D': + nm = U"CIRCLED CC"; + break; + case U'\U0001F16E': + nm = U"CIRCLED C WITH OVERLAID BACKSLASH"; + break; + case U'\U0001F16F': + nm = U"CIRCLED HUMAN FIGURE"; + break; + /* EMOTICONS: */ + case U'\U0001F600': + nm = U"GRINNING FACE"; + break; + case U'\U0001F601': + nm = U"GRINNING FACE WITH SMIRKING EYES"; + break; + case U'\U0001F602': + nm = U"FACE WITH TEARS OF JOY"; + break; + case U'\U0001F603': + nm = U"SMILING FACE WITH OPEN MOUTH"; + break; + case U'\U0001F604': + nm = U"SMILING FACE WITH OPEN MOUTH AND SMILING EYES"; + break; + case U'\U0001F605': + nm = U"SMILING FACE WITH OPEN MOUTH AND COULD SWEAT"; + break; + case U'\U0001F606': + nm = U"SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES"; + break; + case U'\U0001F607': + nm = U"SMILING FACE WITH HALO"; + break; + case U'\U0001F608': + nm = U"SMILING FACE WITH HORNS"; + break; + case U'\U0001F609': + nm = U"WINKING FACE"; + break; + case U'\U0001F60A': + nm = U"SMILING FACE WITH SMILING EYES"; + break; + case U'\U0001F60B': + nm = U"FACE SAVOURING DELICIOUS FOOD"; + break; + case U'\U0001F60C': + nm = U"RELIEVED FACE"; + break; + case U'\U0001F60D': + nm = U"SMILLING FACE HEART-SHAPED EYES"; + break; + case U'\U0001F60E': + nm = U"SMILLING FACE WITH SUNGLASSES"; + break; + case U'\U0001F60F': + nm = U"SMIRKING FACE"; + break; + case U'\U0001F610': + nm = U"NEUTRAL FACE"; + break; + case U'\U0001F611': + nm = U"EXPRESSIONLESS FACE"; + break; + case U'\U0001F612': + nm = U"UNAMUSED FACE"; + break; + case U'\U0001F613': + nm = U"FACE WITH COLD SWEAT"; + break; + case U'\U0001F614': + nm = U"PENSIVE FACE"; + break; + case U'\U0001F615': + nm = U"CONFUSED FACE"; + break; + case U'\U0001F616': + nm = U"CONFOUNDED FACE"; + break; + case U'\U0001F617': + nm = U"KISSING FACE"; + break; + case U'\U0001F618': + nm = U"FACE THROWING A KISS"; + break; + case U'\U0001F619': + nm = U"KISSING FACE WITH SMILLING EYES"; + break; + case U'\U0001F61A': + nm = U"KISSING FACE WITH CLOSED EYES"; + break; + case U'\U0001F61B': + nm = U"FACE WITH STUCK-OUT TONGUE"; + break; + case U'\U0001F61C': + nm = U"FACE WITH STUCK-OUT TONGUE AND WINKING EYE"; + break; + case U'\U0001F61D': + nm = U"FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES"; + break; + case U'\U0001F61E': + nm = U"DISSAPOINTED FACE"; + break; + case U'\U0001F61F': + nm = U"WORRIED FACE"; + break; + case U'\U0001F620': + nm = U"ANGRY FACE"; + break; + case U'\U0001F621': + nm = U"POUTING FACE"; + break; + case U'\U0001F622': + nm = U"CRYING FACE"; + break; + case U'\U0001F623': + nm = U"PERSEVERING FACE"; + break; + case U'\U0001F624': + nm = U"FACE WITH LOOK OF TRIUMPH"; + break; + case U'\U0001F625': + nm = U"DISSAPOINTED BUT RELIEVED FACE"; + break; + case U'\U0001F626': + nm = U"FROWNING FACE WITH OPEN MOUTH"; + break; + case U'\U0001F627': + nm = U"ANGUISHED FACE"; + break; + case U'\U0001F628': + nm = U"FEARFUL FACE"; + break; + case U'\U0001F629': + nm = U"WEARY FACE"; + break; + case U'\U0001F62A': + nm = U"SLEEPY FACE"; + break; + case U'\U0001F62B': + nm = U"TIRED FACE"; + break; + case U'\U0001F62C': + nm = U"GRIMACING FACE"; + break; + case U'\U0001F62D': + nm = U"LOUDLY CRYING FACE"; + break; + case U'\U0001F62E': + nm = U"FACE WITH OPEN MOUTH"; + break; + case U'\U0001F62F': + nm = U"HUSHED FACE"; + break; + case U'\U0001F630': + nm = U"FACE WITH OPEN MOUTH AND COLD SWEAT"; + break; + case U'\U0001F631': + nm = U"FACE SCREAMING IN FEAR"; + break; + case U'\U0001F632': + nm = U"ASTONISHED FACE"; + break; + case U'\U0001F633': + nm = U"FLUSHED FACE"; + break; + case U'\U0001F634': + nm = U"SLEEPING FACE"; + break; + case U'\U0001F635': + nm = U"DIZZY FACE"; + break; + case U'\U0001F636': + nm = U"FACE WITHOUT MOUTH"; + break; + case U'\U0001F637': + nm = U"FACE WITH MEDICAL MASK"; + break; + case U'\U0001F641': + nm = U"SLIGHTLY FROWNING FACE"; + break; + case U'\U0001F642': + nm = U"SLIGHTLY SMILING FACE"; + break; + case U'\U0001F643': + nm = U"UPSIDE-DOWN FACE"; + break; + case U'\U0001F644': + nm = U"FACE WITH ROLLING EYES"; + break; + /* CJK UNIFIED IDEOGRAPHS EXTENSION G: */ + case U'\U0003106C': + nm = U"CJK UNIFIED IDEOGRAPH-3106C"; + break; + } + { + struct u8c_strcp_tuple const tuple = u8c_strcp(nm); + ret.nm = tuple.str; + ret.nmsz = tuple.strsz; + } + return ret; +} diff --git a/src/u8c/u32.h.d/u32alloc.c b/src/u8c/str.h.d/stralloc.c index b64a1ee..f9addcd 100644 --- a/src/u8c/u32.h.d/u32alloc.c +++ b/src/u8c/str.h.d/stralloc.c @@ -16,14 +16,18 @@ # include <stdbool.h> # include <stdlib.h> # include <u8c/err.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <uchar.h> -bool u8c_u32alloc(char32_t * * const _u32,size_t const _sz) { +struct u8c_stralloc_tuple u8c_stralloc(size_t const _sz) { + struct u8c_stralloc_tuple ret = { + .stat = false, + }; char32_t * arr = NULL; if((arr = calloc(sizeof *arr,_sz)) == NULL) { - u8c_seterr(U"u8c_u32alloc: Unable to allocate resources (not enough memory?).",u8c_errtyp_badalloc); - return false; + u8c_seterr(u8c_errtyp_badalloc,U"u8c_stralloc: Unable to allocate resources (not enough memory?)."); + ret.stat = true; + return ret; } - *_u32 = arr; - return false; + ret.str = arr; + return ret; } diff --git a/src/u8c/u32.h.d/u32cat.c b/src/u8c/str.h.d/strcat.c index 600e0dc..5e5f693 100644 --- a/src/u8c/u32.h.d/u32cat.c +++ b/src/u8c/str.h.d/strcat.c @@ -13,29 +13,27 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stdlib.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <uchar.h> -bool u8c_u32cat(size_t * const _sz,char32_t const * * const _out,char32_t const * const _lstr,char32_t const * const _rstr) { - assert(_out != NULL); - assert(_lstr != NULL); - assert(_rstr != NULL); - size_t sz = SIZE_C(0x0); - size_t lsz = SIZE_C(0x0); - size_t rsz = SIZE_C(0x0); - u8c_u32sz(&lsz,_lstr); - u8c_u32sz(&rsz,_rstr); - sz = lsz + rsz; - if(_sz != NULL) { - *_sz = sz; - } +struct u8c_strcat_tuple u8c_strcat(char32_t const * const restrict _lstr,char32_t const * const restrict _rstr) { + struct u8c_strcat_tuple ret = { + .stat = false, + }; + size_t lsz = u8c_strsz(_lstr).sz; + size_t rsz = u8c_strsz(_rstr).sz; + ret.strsz = lsz + rsz; char32_t * out = NULL; - if(u8c_u32alloc(&out,sz + SIZE_C(0x1))) { - return true; + { + struct u8c_stralloc_tuple const tuple = u8c_stralloc(ret.strsz + SIZE_C(0x1)); + if(tuple.stat) { + ret.stat = true; + return ret; + } + out = tuple.str; } for(register size_t n = SIZE_C(0x0);n < lsz;n += SIZE_C(0x1)) { out[n] = _lstr[n]; @@ -43,7 +41,6 @@ bool u8c_u32cat(size_t * const _sz,char32_t const * * const _out,char32_t const for(register size_t n = SIZE_C(0x0);n < rsz;n += SIZE_C(0x1)) { out[n + lsz] = _rstr[n]; } - u8c_u32free(_out); - *_out = out; - return false; + ret.str = out; + return ret; } diff --git a/src/u8c/u32.h.d/u32cmp.c b/src/u8c/str.h.d/strcmp.c index 8a6617d..31654d0 100644 --- a/src/u8c/u32.h.d/u32cmp.c +++ b/src/u8c/str.h.d/strcmp.c @@ -13,33 +13,33 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> -bool u8c_u32cmp(uint_least8_t * const _res,char32_t const * const _lstr,char32_t const * const _rstr) { - assert(_res != NULL); - assert(_lstr != NULL); - assert(_rstr != NULL); +# include <u8c/str.h> +struct u8c_strcmp_tuple u8c_strcmp(char32_t const * const restrict _lstr,char32_t const * const restrict _rstr) { + struct u8c_strcmp_tuple ret = { + .stat = false, + }; for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { register char32_t const lchr = _lstr[n]; register char32_t const rchr = _rstr[n]; if(lchr != rchr) { if(lchr < rchr) { - *_res = UINT8_C(0x0); - return false; + ret.res = UINT8_C(0x0); + return ret; } - *_res = UINT8_C(0x2); - return false; + ret.res = UINT8_C(0x2); + return ret; } - if(lchr == UINT32_C(0x0)) { - *_res = UINT8_C(0x1); - return false; + if(lchr == U'\x0') { + ret.res = UINT8_C(0x1); + return ret; } } - u8c_seterr(U"u8c_u32cmp: Unterminated input.",u8c_errtyp_untermin); - return true; + u8c_seterr(u8c_errtyp_untermin,U"u8c_strcmp: Unterminated input."); + ret.stat = true; + return ret; } diff --git a/src/u8c/u32.h.d/u32cp.c b/src/u8c/str.h.d/strcp.c index 95a9b35..1343bf1 100644 --- a/src/u8c/u32.h.d/u32cp.c +++ b/src/u8c/str.h.d/strcp.c @@ -13,30 +13,28 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stdlib.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> -bool u8c_u32cp(size_t * const _sz,char32_t const * * const _out,char32_t const * const _in) { - assert(_out != NULL); - assert(_in != NULL); - if(*_out != NULL) { - u8c_u32free(&*_out); - } - size_t insz = SIZE_C(0x0); - u8c_u32sz(&insz,_in); - if(_sz != NULL) { - *_sz = insz; - } +# include <u8c/str.h> +struct u8c_strcp_tuple u8c_strcp(char32_t const * const restrict _in) { + struct u8c_strcp_tuple ret = { + .stat = false, + }; + ret.strsz = u8c_strsz(_in).sz; uint_least32_t * out = NULL; - if(u8c_u32alloc(&out,insz + SIZE_C(0x1))) { - return false; + { + struct u8c_stralloc_tuple const tuple = u8c_stralloc(ret.strsz + SIZE_C(0x1)); + if(tuple.stat) { + ret.stat = true; + return ret; + } + out = tuple.str; } - for(register size_t n = SIZE_C(0x0);n < insz;n += SIZE_C(0x1)) { + for(register size_t n = SIZE_C(0x0);n < ret.strsz;n += SIZE_C(0x1)) { out[n] = _in[n]; } - *_out = out; - return false; + ret.str = out; + return ret; } diff --git a/src/u8c/u32.h.d/u32fndchr.c b/src/u8c/str.h.d/strfndchr.c index 228c553..93bb77c 100644 --- a/src/u8c/u32.h.d/u32fndchr.c +++ b/src/u8c/str.h.d/strfndchr.c @@ -13,32 +13,33 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> -bool u8c_u32fndchr(size_t * const _pos,char32_t const * const _in,char32_t const _chr) { - assert(_pos != NULL); - assert(_in != NULL); +# include <u8c/str.h> +struct u8c_strfndchr_tuple u8c_strfndchr(char32_t const * const restrict _in,char32_t const _chr) { + struct u8c_strfndchr_tuple ret = { + .stat = false, + }; for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { register uint_least32_t const tmp = _in[n]; if(tmp == U'\x0') { if(_chr == U'\x0') { - *_pos = n; - return false; + ret.pos = n; + return ret; } - *_pos = SIZE_C(-0x1); - return true; + ret.pos = SIZE_C(-0x1); + return ret; } if(tmp == _chr) { - *_pos = n; - return false; + ret.pos = n; + return ret; } } - u8c_seterr(U"u8c_u32fndchr: Unterminated input.",u8c_errtyp_badio); - *_pos = SIZE_C(-0x1); - return true; + u8c_seterr(u8c_errtyp_untermin,U"u8c_strfndchr: Unterminated input."); + ret.pos = SIZE_C(-0x1); + ret.stat = true; + return ret; } diff --git a/src/u8c/u32.h.d/u32fndpat.c b/src/u8c/str.h.d/strfndpat.c index 5a1b5d2..1091238 100644 --- a/src/u8c/u32.h.d/u32fndpat.c +++ b/src/u8c/str.h.d/strfndpat.c @@ -13,35 +13,31 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> -bool u8c_u32fndpat(size_t * const _pos,char32_t const * const _in,char32_t const * const _pat) { - assert(_pos != NULL); - assert(_in != NULL); - size_t insz = SIZE_C(0x0); - size_t patsz = SIZE_C(0x0); - u8c_u32sz(&insz,_in); - u8c_u32sz(&patsz,_pat); +# include <u8c/str.h> +struct u8c_strfndpat_tuple u8c_strfndpat(char32_t const * const restrict _in,char32_t const * const restrict _pat) { + struct u8c_strfndpat_tuple ret = { + .stat = false, + }; + size_t insz = u8c_strsz(_in).sz; + size_t patsz = u8c_strsz(_pat).sz; if(insz == SIZE_C(0x1) || insz < patsz) { - *_pos = SIZE_C(-0x1); - return false; + ret.pos = SIZE_C(-0x1); + return ret; } for(register size_t n = SIZE_C(0x0);n < insz - patsz;n += SIZE_C(0x1)) { - char32_t const * str = NULL; - u8c_u32substr(&str,n,patsz - SIZE_C(0x1),_in); - uint_least8_t cmpres = UINT8_C(0x0); - u8c_u32cmp(&cmpres,str,_pat); - u8c_u32free(&str); + char32_t const * str = u8c_strsubstr(n,patsz - SIZE_C(0x1),_in).str; + uint_least8_t const cmpres = u8c_strcmp(str,_pat).res; + u8c_strfree(str); if(cmpres == UINT8_C(0x1)) { - *_pos = n; - return false; + ret.pos = n; + return ret; } } - *_pos = SIZE_C(-0x1); - return false; + ret.pos = SIZE_C(-0x1); + return ret; } diff --git a/src/u8c/u32.h.d/u32free.c b/src/u8c/str.h.d/strfree.c index a0b120b..bf6d477 100644 --- a/src/u8c/u32.h.d/u32free.c +++ b/src/u8c/str.h.d/strfree.c @@ -16,9 +16,11 @@ # include <stdbool.h> # include <stdint.h> # include <stdlib.h> -# include <u8c/u32.h> -bool u8c_u32free(char32_t const * * const _u32) { - free((char32_t *)*_u32); /* This cast does indeed discard a const-qualifier, but it is not undefined behaviour, as the array must have been allocated by calloc or malloc, meaning it's original type is not const-qualified. */ - *_u32 = NULL; - return false; +# include <u8c/str.h> +struct u8c_strfree_tuple u8c_strfree(char32_t const * const restrict _str) { + struct u8c_strfree_tuple ret = { + .stat = false, + }; + free((char32_t *)_str); /* This cast does indeed discard a const-qualifier, but it is not undefined behaviour, as the array must have been allocated by calloc or malloc, meaning it's original type is not const-qualified. */ + return ret; } diff --git a/src/u8c/str.h.d/strins.c b/src/u8c/str.h.d/strins.c new file mode 100644 index 0000000..89173ae --- /dev/null +++ b/src/u8c/str.h.d/strins.c @@ -0,0 +1,38 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <u8c/SIZE_C.h> +# include <u8c/str.h> +# include <uchar.h> +struct u8c_strins_tuple u8c_strins(size_t const _pos,char32_t const * const restrict _str0,char32_t const * const restrict _str1) { + struct u8c_strins_tuple ret = { + .stat = false, + }; + char32_t const * lstr = u8c_strsubstr(SIZE_C(0x0),_pos - SIZE_C(0x1),_str0).str; + char32_t const * rstr = u8c_strsubstr(_pos,SIZE_C(0x0),_str0).str; + ret.strsz = SIZE_C(0x0); + char32_t const * out = NULL; + { + char32_t const * tmp = u8c_strcat(lstr,_str1).str; + u8c_strfree(lstr); + out = u8c_strcat(tmp,rstr).str; + u8c_strfree(rstr); + u8c_strfree(tmp); + } + ret.str = out; + return ret; +} diff --git a/src/u8c/u32.h.d/u32substr.c b/src/u8c/str.h.d/strsubstr.c index 855d062..b9daac5 100644 --- a/src/u8c/u32.h.d/u32substr.c +++ b/src/u8c/str.h.d/strsubstr.c @@ -13,33 +13,36 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stdlib.h> # include <u8c/SIZE_C.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <uchar.h> -bool u8c_u32substr(char32_t const * * const _out,size_t const _start,size_t const _len,char32_t const * const _in) { - assert(_out != NULL); - assert(_in != NULL); - u8c_u32free(_out); - size_t insz = SIZE_C(0x0); - u8c_u32sz(&insz,_in); +struct u8c_strsubstr_tuple u8c_strsubstr(size_t const _start,size_t const _len,char32_t const * const restrict _in) { + struct u8c_strsubstr_tuple ret = { + .stat = false, + }; + size_t insz = u8c_strsz(_in).sz; size_t len = _len; if(_len == SIZE_C(0x0)) { len = insz - _start; } if(insz < _start + len) { - return true; + return ret; } size_t const outsz = len + SIZE_C(0x2); char32_t * out = NULL; - if(u8c_u32alloc(&out,outsz)) { - return true; + { + struct u8c_stralloc_tuple const tuple = u8c_stralloc(outsz); + if(tuple.stat) { + ret.stat = true; + return ret; + } + out = tuple.str; } for(register size_t n = SIZE_C(0x0);n <= len;n += SIZE_C(0x1)) { out[n] = _in[n + _start]; } - *_out = out; - return false; + ret.str = out; + return ret; } diff --git a/src/u8c/str.h.d/strsz.c b/src/u8c/str.h.d/strsz.c new file mode 100644 index 0000000..f1b348a --- /dev/null +++ b/src/u8c/str.h.d/strsz.c @@ -0,0 +1,35 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <u8c/SIZE_C.h> +# include <u8c/str.h> +# include <uchar.h> +struct u8c_strsz_tuple u8c_strsz(char32_t const * const restrict _in) { + struct u8c_strsz_tuple ret = { + .stat = false, + }; + { + struct u8c_strfndchr_tuple const tuple = u8c_strfndchr(_in,UINT8_C(0x0)); + if(tuple.stat) { + ret.stat = true; + return ret; + } + ret.sz = tuple.pos; + } + return ret; +} diff --git a/src/u8c/u16.h.d/u16alloc.c b/src/u8c/u16.h.d/u16alloc.c index 3906017..ce20ecb 100644 --- a/src/u8c/u16.h.d/u16alloc.c +++ b/src/u8c/u16.h.d/u16alloc.c @@ -18,12 +18,16 @@ # include <u8c/err.h> # include <u8c/u16.h> # include <uchar.h> -bool u8c_u16alloc(char16_t * * const _u16,size_t const _sz) { +struct u8c_u16alloc_tuple u8c_u16alloc(size_t const _sz) { + struct u8c_u16alloc_tuple ret = { + .stat = false, + }; char16_t * arr = NULL; if((arr = calloc(sizeof *arr,_sz)) == NULL) { - u8c_seterr(U"u8c_u16alloc: Unable to allocate resources (not enough memory?).",u8c_errtyp_badalloc); - return false; + u8c_seterr(u8c_errtyp_badalloc,U"u8c_u16alloc: Unable to allocate resources (not enough memory?)."); + ret.stat = true; + return ret; } - *_u16 = arr; - return false; + ret.u16 = arr; + return ret; } diff --git a/src/u8c/u16.h.d/u16free.c b/src/u8c/u16.h.d/u16free.c index d447562..43e7503 100644 --- a/src/u8c/u16.h.d/u16free.c +++ b/src/u8c/u16.h.d/u16free.c @@ -17,8 +17,10 @@ # include <stdint.h> # include <stdlib.h> # include <u8c/u16.h> -bool u8c_u16free(char16_t const * * const _u16) { - free((char16_t *)*_u16); - *_u16 = NULL; - return false; +struct u8c_u16free_tuple u8c_u16free(char16_t const * const restrict _u16) { + struct u8c_u16free_tuple ret = { + .stat = false, + }; + free((char16_t *)_u16); + return ret; } diff --git a/src/u8c/u32.h.d/u32ins.c b/src/u8c/u32.h.d/u32ins.c deleted file mode 100644 index 7fccb7c..0000000 --- a/src/u8c/u32.h.d/u32ins.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <assert.h> -# include <stdbool.h> -# include <stddef.h> -# include <u8c/SIZE_C.h> -# include <u8c/u32.h> -# include <uchar.h> -bool u8c_u32ins(size_t * const _sz,char32_t const * * const _out,size_t const _pos,char32_t const * const _str0,char32_t const * const _str1) { - assert(_out != NULL); - assert(_str0 != NULL); - assert(_str1 != NULL); - char32_t const * lstr = NULL; - char32_t const * rstr = NULL; - u8c_u32substr(&lstr,SIZE_C(0x0),_pos - SIZE_C(0x1),_str0); - u8c_u32substr(&rstr,_pos,SIZE_C(0x0),_str0); - size_t sz = SIZE_C(0x0); - char32_t const * out = NULL; - { - char32_t const * tmp = NULL; - u8c_u32cat(NULL,&tmp,lstr,_str1); - u8c_u32free(&lstr); - u8c_u32cat(NULL,&out,tmp,rstr); - u8c_u32free(&rstr); - u8c_u32free(&tmp); - } - if(_sz != NULL) { - *_sz = sz; - } - *_out = out; - return false; -} diff --git a/src/u8c/u8.h.d/u8alloc.c b/src/u8c/u8.h.d/u8alloc.c index ba02bc8..ba28243 100644 --- a/src/u8c/u8.h.d/u8alloc.c +++ b/src/u8c/u8.h.d/u8alloc.c @@ -17,12 +17,16 @@ # include <stdlib.h> # include <u8c/err.h> # include <u8c/u8.h> -bool u8c_u8alloc(unsigned char * * const _u8,size_t const _sz) { +struct u8c_u8alloc_tuple u8c_u8alloc(size_t const _sz) { + struct u8c_u8alloc_tuple ret = { + .stat = false, + }; unsigned char * arr = NULL; if((arr = calloc(sizeof *arr,_sz)) == NULL) { - u8c_seterr(U"u8c_u8alloc: Unable to allocate resources (not enough memory?).",u8c_errtyp_badalloc); - return false; + u8c_seterr(u8c_errtyp_badalloc,U"u8c_u8alloc: Unable to allocate resources (not enough memory?)."); + ret.stat = true; + return ret; } - *_u8 = arr; - return false; + ret.u8 = arr; + return ret; } diff --git a/src/u8c/u8.h.d/u8dec.c b/src/u8c/u8.h.d/u8dec.c index 365c81a..4cba14f 100644 --- a/src/u8c/u8.h.d/u8dec.c +++ b/src/u8c/u8.h.d/u8dec.c @@ -19,23 +19,24 @@ # include <stdint.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <u8c/u8.h> # include <uchar.h> -bool u8c_u8dec(size_t * const _sz,char32_t const * * const _out,unsigned char const * const _in) { - assert(_out != NULL); - assert(_in != NULL); - register size_t insz = SIZE_C(0x0); - register size_t outsz = SIZE_C(0x1); - for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;outsz += SIZE_C(0x1)) { /* First pass: get size of input array and determine size of output array. */ +struct u8c_u8dec_tuple u8c_u8dec(unsigned char const * const restrict _in) { + struct u8c_u8dec_tuple ret = { + .stat = false, + }; + register size_t insz = SIZE_C(0x0); + for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;ret.strsz += SIZE_C(0x1)) { /* First pass: get size of input array and determine size of output array. */ register unsigned char const tmp = _in[n]; if(tmp == UINT8_C(0x0)) { /* Null-terminator: end of string has been reached. */ insz = n; goto nottoobig; } if(tmp >= UINT8_C(0b11111000)) { /* Too big. */ - u8c_seterr(U"u8c_u8dec: Character out of range (too big).",u8c_errtyp_u8oor); - return true; + u8c_seterr(u8c_errtyp_u8oor,U"u8c_u8dec: Character out of range (too big)."); + ret.stat = true; + return ret; } if(tmp >= UINT8_C(0b11110000)) { /* Four byte. */ n += SIZE_C(0x4); @@ -53,15 +54,18 @@ bool u8c_u8dec(size_t * const _sz,char32_t const * * const _out,unsigned char co n += SIZE_C(0x1); } /* Input is not null-terminated. */ - u8c_seterr(U"u8c_u8dec: Unterminated input.",u8c_errtyp_untermin); - return true; + u8c_seterr(u8c_errtyp_untermin,U"u8c_u8dec: Unterminated input."); + ret.stat = true; + return ret; nottoobig:; - if(_sz != NULL) { - *_sz = outsz; - } uint_least32_t * out = NULL; - if(u8c_u32alloc(&out,outsz + SIZE_C(0x1))) { - return false; + { + struct u8c_stralloc_tuple const tuple = u8c_stralloc(ret.strsz + SIZE_C(0x1)); + if(tuple.stat) { + ret.stat = true; + return ret; + } + out = tuple.str; } for(register size_t n = SIZE_C(0x0),outn = SIZE_C(0x0);n < insz;outn += SIZE_C(0x1)) { /* Second pass: decode UTF-8. */ if(_in[n] >= UINT8_C(0b11110000)) { /* Four bytes. */ @@ -99,7 +103,6 @@ nottoobig:; n += SIZE_C(0x1); continue; } - u8c_u32free(_out); - *_out = out; - return false; + ret.str = out; + return ret; } diff --git a/src/u8c/u8.h.d/u8enc.c b/src/u8c/u8.h.d/u8enc.c index f3f3570..2ac0007 100644 --- a/src/u8c/u8.h.d/u8enc.c +++ b/src/u8c/u8.h.d/u8enc.c @@ -19,48 +19,53 @@ # include <stdint.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> +# include <u8c/main.h> +# include <u8c/str.h> # include <u8c/u8.h> # include <uchar.h> -bool u8c_u8enc(size_t * const _sz,unsigned char const * * const _out,char32_t const * const _in) { - assert(_out != NULL); - assert(_in != NULL); - size_t insz = SIZE_C(0x0); /* Size of input array (bytes). */ - size_t outsz = SIZE_C(0x0); /* Size of output array /bytes). */ +struct u8c_u8enc_tuple u8c_u8enc(char32_t const * const restrict _in) { + struct u8c_u8enc_tuple ret = { + .stat = false, + }; + size_t insz = SIZE_C(0x0); /* Size of input array (bytes). */ for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { /* First pass: get size of input array, and determine size of output array. */ register char32_t const tmp = _in[n]; - if(tmp > u8c_u32max) { /* Codepoint out of range. */ - u8c_seterr(U"u8c_u8enc: Codepoint out of range (too big).",u8c_errtyp_u32oor); - return true; + if(tmp > u8c_unimax) { /* Codepoint out of range. */ + u8c_seterr(u8c_errtyp_stroor,U"u8c_u8enc: Codepoint out of range (too big)."); + ret.stat = true; + return ret; } if(tmp >= UINT32_C(0x10000)) { /* 4 bytes. */ - outsz += SIZE_C(0x4); + ret.u8sz += SIZE_C(0x4); continue; } if(tmp >= UINT32_C(0x800)) { /* 3 bytes. */ - outsz += SIZE_C(0x3); + ret.u8sz += SIZE_C(0x3); continue; } if(tmp >= UINT32_C(0x80)) { /* 2 bytes. */ - outsz += SIZE_C(0x2); + ret.u8sz += SIZE_C(0x2); continue; } /* 1 byte. */ - outsz += SIZE_C(0x1); + ret.u8sz += SIZE_C(0x1); if(tmp == UINT32_C(0x0)) { insz = n + SIZE_C(0x1); goto nottoobig; } } - u8c_seterr(U"u8c_u8enc: Unterminated input.",u8c_errtyp_untermin); - return true; + u8c_seterr(u8c_errtyp_untermin,U"u8c_u8enc: Unterminated input."); + ret.stat = true; + return ret; nottoobig:; - if(_sz != NULL) { - *_sz = outsz; - } unsigned char * out = NULL; - if(u8c_u8alloc(&out,outsz + SIZE_C(0x1))) { - return true; + { + struct u8c_u8alloc_tuple const tuple = u8c_u8alloc(ret.u8sz + SIZE_C(0x1)); + if(tuple.stat) { + ret.stat = true; + return ret; + } + out = tuple.u8; } for(register size_t n = SIZE_C(0x0), outn = SIZE_C(0x0);n < insz;n += SIZE_C(0x1),outn += SIZE_C(0x1)) { /* Second pass: encode each codepoint into UTF-8. */ register char32_t const tmp = _in[n]; @@ -91,7 +96,6 @@ nottoobig:; /* One byte. */ out[outn] = (uint_least8_t)tmp; } - u8c_u8free(_out); - *_out = out; - return false; + ret.u8 = out; + return ret; } diff --git a/src/u8c/u8.h.d/u8free.c b/src/u8c/u8.h.d/u8free.c index af5a6bd..a0b61a8 100644 --- a/src/u8c/u8.h.d/u8free.c +++ b/src/u8c/u8.h.d/u8free.c @@ -17,8 +17,10 @@ # include <stdint.h> # include <stdlib.h> # include <u8c/u8.h> -bool u8c_u8free(unsigned char const * * const _u8) { - free((unsigned char *)*_u8); - *_u8 = NULL; - return false; +struct u8c_u8free_tuple u8c_u8free(unsigned char const * const restrict _u8) { + struct u8c_u8free_tuple ret = { + .stat = false, + }; + free((unsigned char *)_u8); + return ret; } @@ -3,85 +3,87 @@ # include <stdint.h> # include <stdio.h> # include <stdlib.h> +# include <string.h> # include <u8c/SIZE_C.h> +# include <u8c/chk.h> # include <u8c/err.h> # include <u8c/fmt.h> -# include <u8c/is.h> # include <u8c/main.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <u8c/u8.h> -static void errhandl(enum u8c_errtyp errtyp); -static void test( int n); -static void testmsg( char const * fmt,...); -static void testmsgdone( void); +static void errhandl(enum u8c_errtyp errtyp); +static int hlpscrn( char const * const restrict nm); +static void test( int n); +static char const * testnm( int n); +static const int maxtest = 0x17; static void errhandl(enum u8c_errtyp errtyp) { printf(":: Error handler called with type %d.\n",(int)errtyp); } +static int hlpscrn(char const * const restrict nm) { + printf("u8c-test: Test u8c\n"); + printf("Usage: %s [test number]\n",nm); + printf("\n"); + printf("Test numbers:\n"); + for(int n = 0x0;n <= maxtest;n += 0x1) { + printf("\t %i - \"%s\"\n",n,testnm(n)); + } + printf("\n"); + return EXIT_SUCCESS; +} static void test(int n) { + char const * const restrict _testnm = testnm(n); + /* printf("\n+->\n| \x1b[38:2::169:225:61mTesting\x1b[0m \""%s"\"...\n+->\n\n",_testnm); */ /* This command works in all of the terminals I tested, except Konsole (whic is funny, because it's xterm-based (and sets the TERM and COLORTERM environemnt variable to "xterm-256color" and "truecolor"), and xterm supports it). */ + printf("\n+->\n| \x1b[38;2;169;225;61mTesting\x1b[0m #%i \"%s\"...\n+->\n\n",n,_testnm); switch(n) { case 0x0: - testmsg("(0) Error messages"); { - char32_t const * err = NULL; - u8c_geterr(NULL,&err); + char32_t const * err = u8c_geterr().err; printf("default error message: "); u8c_println(stdout,err); - u8c_seterr(U"Gluchwein!",u8c_errtyp_deferr); - u8c_geterr(NULL,&err); + u8c_seterr(u8c_errtyp_deferr,U"Gluchwein!"); + u8c_strfree(err); + err = u8c_geterr().err; printf("set error message: "); u8c_println(stdout,err); - u8c_u32free(&err); + u8c_strfree(err); } - testmsgdone(); break; case 0x1: - testmsg("(1) UTF-8 encoding/decoding"); { - char32_t const * msg0 = U"¢,ह,𐍈,€,↊,👋"; - unsigned char const * msg1 = NULL; - u8c_u8enc(NULL,&msg1,msg0); - msg0 = NULL; + char32_t const * msg0 = U"¢,ह,𐍈,€,↊,👋"; + unsigned char const * msg1 = u8c_u8enc(msg0).u8; printf("Encoded: %s\n",msg1); - u8c_u8dec(NULL,&msg0,msg1); - u8c_u8enc(NULL,&msg1,msg0); + u8c_u8free(msg1); + msg0 = u8c_u8dec(msg1).str; + msg1 = u8c_u8enc(msg0).u8; printf("Encoded -> Decoded -> Encoded: %s\n",msg1); - u8c_u32free(&msg0); - u8c_u8free(&msg1); + u8c_strfree(msg0); + u8c_u8free(msg1); } - testmsgdone(); break; case 0x2: - testmsg("Printing (u8c_print)"); { u8c_print(stdout,U"Hello"); - u8c_print(stdout,U" ðere!"); + u8c_print(stdout,U" ðere!\n"); } - testmsgdone(); break; case 0x3: - testmsg("(3) Printing (u8c_println)"); { u8c_println(stdout,U"Hello"); u8c_println(stdout,U" ðere!"); } - testmsgdone(); break; case 0x4: - testmsg("(4) Text formatting"); { u8c_println(stdout,U"The \uFFFCnumber\uFFFC is \uFFFC.",u8c_fmttyp_fgcol,u8c_col_mint,u8c_fmttyp_fgcol0,u8c_fmttyp_int,(int_least64_t){-0x10}); } - testmsgdone(); break; case 0x5: - testmsg("(5) Colour text"); { u8c_println(stdout,U"\uFFFCred\uFFFCorange\uFFFCyellow\uFFFCchartreuse\uFFFCgreen\uFFFCmint\uFFFCcyan\uFFFCazure\uFFFCblue\uFFFCviolet\uFFFCmagenta\uFFFCrose\uFFFC",u8c_fmttyp_fgcol,u8c_col_red,u8c_fmttyp_fgcol,u8c_col_orange,u8c_fmttyp_fgcol,u8c_col_yellow,u8c_fmttyp_fgcol,u8c_col_chartreuse,u8c_fmttyp_fgcol,u8c_col_green,u8c_fmttyp_fgcol,u8c_col_mint,u8c_fmttyp_fgcol,u8c_col_cyan,u8c_fmttyp_fgcol,u8c_col_azure,u8c_fmttyp_fgcol,u8c_col_blue,u8c_fmttyp_fgcol,u8c_col_violet,u8c_fmttyp_fgcol,u8c_col_magenta,u8c_fmttyp_fgcol,u8c_col_rose,u8c_fmttyp_fgcol0); } - testmsgdone(); break; case 0x6: - testmsg("(6) Combining characters"); { for(register uint_least32_t n = UINT32_C(0x300);n <= UINT32_C(0x36F);n += UINT32_C(0x1)) { u8c_print(stdout,(uint_least32_t[]){UINT32_C(0x61),n,UINT32_C(0x20),UINT32_C(0x0),}); @@ -89,10 +91,8 @@ static void test(int n) { } u8c_print(stdout,U"\n"); } - testmsgdone(); break; case 0x7: - testmsg("(7) String comparison"); { char32_t const * str0 = U"Hello"; char32_t const * str1 = U"Hello"; @@ -103,215 +103,287 @@ static void test(int n) { u8c_println(stdout,str1); printf("str2: "); u8c_println(stdout,str2); - uint_least8_t res = UINT8_C(0x0); - u8c_u32cmp(&res,str0,str1); + uint_least8_t res = u8c_strcmp(str0,str1).res; printf("str0,str1: %" PRIXLEAST8 ".\n",res); - u8c_u32cmp(&res,str1,str2); + res = u8c_strcmp(str1,str2).res; printf("str0,str2: %" PRIXLEAST8 ".\n",res); - u8c_u32cmp(&res,str2,str1); + res = u8c_strcmp(str2,str1).res; printf("str2,str1: %" PRIXLEAST8 ".\n",res); } - testmsgdone(); break; case 0x8: - testmsg("(8) u8c_isalnum"); { for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - uint_least8_t res = UINT8_C(0x0); - u8c_isalnum(&res,n); + bool const res = u8c_isalnum(n).res; if(res) { u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); } } u8c_println(stdout,U""); } - testmsgdone(); break; case 0x9: - testmsg("(9) u8c_isalpha"); { for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - uint_least8_t res = UINT8_C(0x0); - u8c_isalpha(&res,n); + bool const res = u8c_isalpha(n).res; if(res) { u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); } } u8c_println(stdout,U""); } - testmsgdone(); break; case 0xA: - testmsg("(10) u8c_isdigit"); { for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - uint_least8_t res = UINT8_C(0x0); - u8c_isdigit(&res,n); + bool const res = u8c_isdigit(n).res; if(res) { u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); } } u8c_println(stdout,U""); } - testmsgdone(); break; case 0xB: - testmsg("(11) u8c_ispunct"); /* This test appears broken on some incomplete fonts, altough it is not. */ { for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - uint_least8_t res = UINT8_C(0x0); - u8c_ispunct(&res,n); + bool const res = u8c_ispunct(n).res; if(res) { u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); } } u8c_println(stdout,U""); } - testmsgdone(); break; case 0xC: - testmsg("(12) UTF-32 string literals"); { u8c_println(stdout,U"Can you see ðis?"); } - testmsgdone(); break; case 0xD: - testmsg("(13) string concatenation"); { char32_t const * str0 = U"Free_as_in"; char32_t const * str1 = U"_freedom!"; - char32_t const * str2 = NULL; - u8c_u32cat(NULL,&str2,str0,str1); + char32_t const * str2 = u8c_strcat(str0,str1).str; printf("string #0: "); u8c_println(stdout,str0); printf("string #1: "); u8c_println(stdout,str1); printf("string #2: "); u8c_println(stdout,str2); - u8c_u32free(&str2); + u8c_strfree(str2); } - testmsgdone(); break; case 0xE: - testmsg("(14) sub-strings"); { char32_t const * str0 = U"I_wish_to_suck_big_duck."; - char32_t const * str1 = NULL; - u8c_u32substr(&str1,SIZE_C(0x0),SIZE_C(0xE),str0); - char32_t const * str2 = NULL; - u8c_u32substr(&str2,SIZE_C(0xF),SIZE_C(0x0),str0); + char32_t const * str1 = u8c_strsubstr(SIZE_C(0x0),SIZE_C(0xE),str0).str; + char32_t const * str2 = u8c_strsubstr(SIZE_C(0xF),SIZE_C(0x0),str0).str; printf("string #0: "); u8c_println(stdout,str0); printf("string #1: "); u8c_println(stdout,str1); printf("string #2: "); u8c_println(stdout,str2); - u8c_u32free(&str1); - u8c_u32free(&str2); + u8c_strfree(str1); + u8c_strfree(str2); } - testmsgdone(); break; case 0xF: - testmsg("(15) u8c_isxdigit"); { for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - uint_least8_t res = UINT8_C(0x0); - u8c_isxdigit(&res,n); + bool const res = u8c_isxdigit(n).res; if(res) { u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); } } - u8c_println(stdout,(uint_least32_t[]){UINT32_C(0x0),}); + u8c_println(stdout,U""); } - testmsgdone(); break; case 0x10: - testmsg("(16) Printing (u8c_dbgprint)"); { u8c_dbgprint(U"Hello"); u8c_dbgprint(U" ðere!"); } - testmsgdone(); break; case 0x11: - testmsg("(17) u8c_u32fndchr"); { char32_t const * str = U"Proprietary as in Micro$oft."; - size_t pos0 = SIZE_C(0x0); - size_t pos1 = SIZE_C(0x0); - u8c_u32fndchr(&pos0,str,U'M'); - u8c_u32fndchr(&pos1,str,U'ŋ'); + size_t pos0 = u8c_strfndchr(str,U'M').pos; + size_t pos1 = u8c_strfndchr(str,U'ŋ').pos; printf("string: "); u8c_println(stdout,str); printf("Position of 'M': %zu\n",pos0); printf("Position of 'ŋ': %zu\n",pos1); } - testmsgdone(); break; case 0x12: - testmsg("(18) u8c_u32fndpat"); { char32_t const * str = U"Proprietary as in Micro$oft."; - size_t pos0 = SIZE_C(0x0); - size_t pos1 = SIZE_C(0x0); - u8c_u32fndpat(&pos0,str,U"as in"); - u8c_u32fndpat(&pos1,str,U"forever"); + size_t pos0 = u8c_strfndpat(str,U"as in").pos; + size_t pos1 = u8c_strfndpat(str,U"forever").pos; printf("string: "); u8c_println(stdout,str); printf("Position of \"as in\": %zu\n",pos0); printf("Position of \"forever\": %zu\n",pos1); } - testmsgdone(); break; case 0x13: - testmsg("(19) string insertion"); { char32_t const * str0 = U"There_is_I_love."; char32_t const * str1 = U"just_somebody_that_"; - char32_t const * str2 = NULL; - u8c_u32ins(NULL,&str2,SIZE_C(0x9),str0,str1); + char32_t const * str2 = u8c_strins(SIZE_C(0x9),str0,str1).str; printf("String #0: "); u8c_println(stdout,str0); printf("String #1: "); u8c_println(stdout,str1); printf("String #2: "); u8c_println(stdout,str2); - u8c_u32free(&str2); + u8c_strfree(str2); + } + break; + case 0x14: + { + for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { + bool const res = u8c_islower(n).res; + if(res) { + u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); + } + } + u8c_println(stdout,U""); + } + break; + case 0x15: + { + for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { + bool const res = u8c_isupper(n).res; + if(res) { + u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); + } + } + u8c_println(stdout,U""); + } + break; + case 0x16: + { + register uint_least32_t num = UINT32_C(0x0); + for(register uint_least32_t n = UINT32_C(0x0);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1),num += UINT32_C(0x1)) { + { + bool const res = u8c_issurro(n).res; + if(res) { + num -= UINT32_C(0x1); + continue; + } + } + char32_t const * nm = u8c_uninm(n).nm; + uint_least8_t res = u8c_strcmp(nm,U"UNDEFINED IN UNICODE").res; + if(res == UINT8_C(0x1)) { + num -= UINT32_C(0x1); + u8c_strfree(nm); + continue; + } + printf("U+%" PRIXLEAST32 " ",n); + u8c_println(stdout,nm); + u8c_strfree(nm); + } + printf("\n:: The number of mapped (named) codepoints is %" PRIuLEAST32 ", which is %f%% of the total number of defined Unicode codepoints (143859).\n",num,((double)num) / (double)UINT32_C(0x231F3) * (double)UINT8_C(0x64)); + } + break; + case 0x17: + { + char32_t const * lastblk = u8c_strcp(U"").str; + for(register uint_least32_t n = UINT32_C(0x0);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { + char32_t const * blk = u8c_uniblk(n).blk; + if(u8c_strcmp(blk,lastblk).res == UINT8_C(0x1) || u8c_strcmp(blk,U"UNDEFINED IN UNICODE").res == UINT8_C(0x1)) { + u8c_strfree(blk); + continue; + } + u8c_strfree(lastblk); + lastblk = u8c_strcp(blk).str; + printf("U+%" PRIXLEAST32 " = ",n); + u8c_println(stdout,blk); + u8c_strfree(blk); + } + u8c_strfree(lastblk); } - testmsgdone(); break; } + /* printf("\n+->\n| \x1b[38:2::61:225:169mDone\x1b[0m testing (%i) \"%s\"!\n+->\n",n,_testnm); */ + printf("\n+->\n| \x1b[38;2;61;225;169mDone\x1b[0m testing #%i \"%s\"!\n+->\n\n",n,_testnm); } -static void testmsg(char const * fmt,...) { - va_list args; - va_start(args,fmt); - /* printf("\n+->\n| \x1b[38:2:169:225:61mTesting\x1b[0m \""); */ /* This command works in all of the terminals I tested, except Konsole (whic is funny, because it's xterm-based, and xterm supports it). */ - printf("\n+->\n| \x1b[38;2;169;225;61mTesting\x1b[0m \""); - vprintf(fmt,args); - printf("\"...\n+->\n\n"); - va_end(args); -} -static void testmsgdone() { - /* printf("\n+->\n| \x1b[38:2::61:225:169mDone\x1b[0m!\n+->\n"); */ - printf("\n+->\n| \x1b[38;2;61;225;169mDone\x1b[0m!\n+->\n"); +static char const * testnm(int n) { + switch(n) { + default: + return "N/A"; + case 0x0: + return "Error messages"; + case 0x1: + return "UTF-8 encoding/decoding"; + case 0x2: + return "Printing (u8c_print)"; + case 0x3: + return "Printing (u8c_println)"; + case 0x4: + return "Text formatting"; + case 0x5: + return "Colour text"; + case 0x6: + return "Combining characters"; + case 0x7: + return "String comparison"; + case 0x8: + return "u8c_isalnum"; + case 0x9: + return "u8c_isalpha"; + case 0xA: + return "u8c_isdigit"; + case 0xB: + return "u8c_ispunct"; + case 0xC: + return "UTF-32 string literals"; + case 0xD: + return "String concatenation"; + case 0xE: + return "Sub-strings"; + case 0xF: + return "u8c_isxdigit"; + case 0x10: + return "Printing (u8c_dbgprint)"; + case 0x11: + return "u8c_strfndchr"; + case 0x12: + return "u8c_strfndpat"; + case 0x13: + return "String insertion"; + case 0x14: + return "u8c_islower"; + case 0x15: + return "u8c_isupper"; + case 0x16: + return "u8c_uninm"; + case 0x17: + return "u8c_uniblk"; + } } int main(int const argc,char const * * argv) { - if(u8c_init()) { + if(argc == 0x2 && !strcmp(argv[SIZE_C(0x1)],"--help")) { + exit(hlpscrn(argv[SIZE_C(0x0)])); + } + if(u8c_init().stat) { printf("Unable to initialise u8c!\n"); exit(EXIT_FAILURE); } u8c_regerrhandl(u8c_errtyp_all,errhandl); u8c_setfmt(UINT8_C(0xC),UINT8_C(0x1)); - printf("u8c version: %" PRIXLEAST64 ".\n",u8c_ver); - printf("Debug build: %" PRIXLEAST8 ".\n",u8c_dbg); - printf("Thread safe: %" PRIXLEAST8 ".\n",u8c_thrdsafe); + printf("u8c version: %" PRIXLEAST64 "\n",u8c_ver); + printf("Debug build: %s\n",u8c_dbg ? "true" : "false"); + printf("Thread safe: %s\n",u8c_thrdsafe ? "true" : "false"); if(argc == 0x2) { test(atoi(argv[SIZE_C(0x1)])); } else { - for(int n = 0x0;n <= 0x13;n += 0x1) { + for(int n = 0x0;n <= 0x17;n += 0x1) { test(n); } } |