diff options
103 files changed, 6675 insertions, 6109 deletions
@@ -1,4 +1,2 @@ -*.o -*.so -/test -vgcore.* +/.vscode +/build
\ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..d9d99ba --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,138 @@ +cmake_minimum_required( + VERSION + 3.20 +) +project( + u8c + VERSION + 27 + DESCRIPTION + "Unicode manipulation library." + HOMEPAGE_URL + "https://mandelbrot.dk/delta/u8c" + LANGUAGES + CXX +) +set( + CMAKE_CXX_STANDARD + 23 +) +set( + CMAKE_CXX_EXTENSIONS + OFF +) + +# Options: +option( + U8C_TEST + "Build test program." + OFF +) + +# Disable in-souce builds: +if( + "${PROJECT_BINARY_DIR}" + STREQUAL + "${PROJECT_SOURCE_DIR}" +) + message( + FATAL_ERROR + "In-source building is not allowed." + ) +endif() + +# Compiler Settings: +message( + STATUS + "Enabling colour output for Clang or GCC..." +) +if( + "${CMAKE_CXX_COMPILER_ID}" + STREQUAL + "Clang" +) + add_compile_options( + "-fcolor-diagnostics" + ) +elseif( + "${CMAKE_CXX_COMPILER_ID}" + STREQUAL + "GNU" +) + add_compile_options ( + "-fdiagnostics-color=always" + ) +endif() +message( + STATUS + "Enabling compile warnings..." +) +if( + MSVC +) + add_compile_options( + "/W4" + "/WX" + ) +else() + add_compile_options( + "-Wfatal-errors" + "-Wall" + "-Werror" + "-Wextra" + "-Wno-attributes" + "-pedantic-errors" + ) +endif() +if( + CMAKE_BUILD_TYPE + MATCHES + Release +) + message( + STATUS + "Setting optimisation level..." + ) + if( + MSVC + ) + add_compile_options( + "/Os" + ) + else() + add_compile_options( + "-Os" + ) + endif() +endif() +include_directories( + "${PROJECT_SOURCE_DIR}/u8c/include" +) + +# u8c settings: +add_library( + u8c + SHARED + "u8c/src/operator.cc" + "u8c/src/u8c/fmt.cc" + "u8c/src/u8c/print.cc" + "u8c/src/u8c/println.cc" +) + +# Test settings: +if( + U8C_TEST +) + add_executable( + test + "u8c-check/src/test.cc" + ) + add_dependencies( + test + u8c + ) + target_link_libraries( + test + u8c + ) +endif() diff --git a/Makefile b/Makefile deleted file mode 100644 index c42ee6e..0000000 --- a/Makefile +++ /dev/null @@ -1,86 +0,0 @@ -.DEFAULT_GOAL = $(LIB) -CC = gcc -CFLAGS = -std=c2x -Wall -Wextra -Wmissing-prototypes -pedantic-errors -Iinclude -fPIC -ifeq ($(thrdsafe),1) -CFLAGS += -Du8c_bethrdsafe -endif -ifeq ($(debug),1) -CFLAGS += -O0 -g -else -CFLAGS += -Os -DNDEBUG -endif -LDFLAGS = -shared -ifeq ($(thrdsafe),1) -LDFLAGS += -lpthread -endif -HDRS = \ - include/u8c/SIZE_C.h \ - include/u8c/chk.h \ - include/u8c/err.h \ - include/u8c/fmt.h \ - include/u8c/main.h \ - include/u8c/str.h \ - include/u8c/u16.h \ - include/u8c/u8.h -HDRS_PRIV = \ - include/u8c/intern.h -SRCS = \ - src/u8c/chk.h.d/isalnum.c \ - src/u8c/chk.h.d/isalpha.c \ - src/u8c/chk.h.d/iscntrl.c \ - src/u8c/chk.h.d/isdigit.c \ - src/u8c/chk.h.d/islower.c \ - src/u8c/chk.h.d/ispunct.c \ - src/u8c/chk.h.d/isspace.c \ - src/u8c/chk.h.d/isspace.c \ - src/u8c/chk.h.d/issurro.c \ - src/u8c/chk.h.d/isupper.c \ - src/u8c/chk.h.d/isxdigit.c \ - src/u8c/err.h.d/geterr.c \ - src/u8c/err.h.d/regerrhandl.c \ - src/u8c/err.h.d/seterr.c \ - src/u8c/fmt.h.d/fmt.c \ - src/u8c/fmt.h.d/print.c \ - src/u8c/fmt.h.d/println.c \ - src/u8c/fmt.h.d/setfmt.c \ - src/u8c/fmt.h.d/vfmt.c \ - src/u8c/fmt.h.d/vprint.c \ - src/u8c/intern.h.d/dat.c \ - src/u8c/main.h.d/abrtfn.c \ - src/u8c/main.h.d/debug.c \ - src/u8c/main.h.d/end.c \ - src/u8c/main.h.d/init.c \ - src/u8c/main.h.d/thrdsafe.c \ - src/u8c/main.h.d/uniblk.c \ - src/u8c/main.h.d/uninm.c \ - src/u8c/str.h.d/stralloc.c \ - src/u8c/str.h.d/strcat.c \ - src/u8c/str.h.d/strcmp.c \ - src/u8c/str.h.d/strcp.c \ - src/u8c/str.h.d/strfndchr.c \ - src/u8c/str.h.d/strfndpat.c \ - src/u8c/str.h.d/strfree.c \ - src/u8c/str.h.d/strins.c \ - src/u8c/str.h.d/strsubstr.c \ - src/u8c/str.h.d/strsz.c \ - src/u8c/u8.h.d/u8alloc.c \ - src/u8c/u8.h.d/u8dec.c \ - src/u8c/u8.h.d/u8enc.c \ - src/u8c/u8.h.d/u8free.c \ - src/u8c/u16.h.d/u16alloc.c \ - src/u8c/u16.h.d/u16free.c -OBJS = $(SRCS:.c=.o) -LIB = libu8c.so -$(LIB): $(OBJS) - $(CC) $(LDFLAGS) $^ -o $@ -$(OBJS): $(HDRS) $(HDRS_PRIV) -test: $(LIB) test.c - $(CC) -std=c2x -Wall -Wextra -Wpedantic -Iinclude -O3 -g -L. -lu8c -o $@ [email protected] -.PHONY: clean install -clean: - rm --force test $(LIB) $(OBJS) -install: $(LIB) - mkdir --parents $(DESTDIR)/include/u8c - mkdir --parents $(DESTDIR)/lib - install --mode=444 --verbose $(HDRS) $(DESTDIR)/include/u8c - install --mode=555 --verbose $(LIB) $(DESTDIR)/lib @@ -1,46 +1,61 @@ # u8c -[*u8c*](https://mandelbrot.dk/delta/u8c) is a free, open-source and (very) portable library for Unicode manipulation. +[*u8c*](https://mandelbrot.dk/delta/u8c) is a free, open-source and portable general-purpose library. It is written in the C++ *(C++2b)* programming language. A wrapper for C may arrive in the future. -*u8c* is very portable, and should work (with a trivial build system) on **any** hosted conforming C23 implementation, be it one with 64 bits a byte, 4096 byte shorts, with or without multithreading. In the event it doesn't for you, I really want you to open an issue on GitLab (<https://mandelbrot.dk>). +## Features -## Compiling +u8c has the following features: -*u8c* can be compiled via the provided *Makefile* using the command `make`. +* Array container with support for compile-time, dynamic, and static arrays. +* Compile-time-compatible alternatives to the C maths library *(incomplete)*. +* Compile-time-compatible alternatives to the C string manipulation facilities *(incomplete)*. +* Facilities for determining the target platform using immediate functions *(Supports major platforms, including AIX, FreeBSD, Linux, macOS, OpenBSD, Windows NT)*. +* Optional platform-specific behaviour, including vendor-specific attributes, pointer-restriction. +* Quota type for fractional mathematics *(incomplete)*. +* String container with built-in UTF-conversions +* Unicode *(UTF-8, UTF-16 and UTF-32)* conversions and manipulations facilities *(UTF-16 lacking full support)*. -By default, GCC-11 is used to build *u8c*. Clang-12 doesn't support C23 to the same extend, and may therefore require some modifications. +With more to come in the future. -Currently, the following C23 features are required: +## Installing -* Attributes. -* Binary literals. +TBA -Yet even with only these two seemingly trivial features, the newest version of Clang (Clang-12 at this time) is unable to compile the library without the `-Wno-gnu-binary-literal`. +## Compiling -To enable debug mode, the option `debug=1` must be passed to Make. +u8c has been tested to work with Clang 14. -To enable thread-safe operations (where logical), the option `thrdsafe=1` must be passed to Make. +1. Checkout u8c using `git`: -## Installing + * Clone the repository: `git clone https://mandelbrot.dk/delta/u8c.git + * Or do a shallow cone: `git clone --depth 1 https://mandelbrot.dk/delta/u8c.git` -*u8c* can be installed, either via the `install` target in the provided Makefile, or using the PKGBUILD found [here](https://mandelbrot.dk/pkgbuild/delta/u8c). +2. Configure the compilation of u8c: -If it's installed so, one must make a note of the output, as it logs what system files have changed. + * `cd u8c` + * `cmake -B build` -Using the PKGBUILD is as simple as `git clone https://mandelbrot.dk/pkg/u8c.git && cd u8c && makepkg --clean --install --syncdeps` (on Arch-based distributions). + You can append the following options to the command: -## Copyright & License + * `-DCMAKE_BUILD_TYPE` — Set the build type. Can be either `Debug`or `Release`. + * `-DU8C_TEST` — Enables compilation of the test program if set to `ON`. -Copyright 2021 Gabriel Jensen. +3. Build u8c build -All rights reserved. + * `cmake --build build` + +## Contributing + +u8c does currently not accept **any** merge requests. + +## Copyright & License -This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. +Copyright 2021 Gabriel Jensen -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +This file is part of u8c. -See the GNU Affero General Public License for more details. +u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. -You should have received a copy of the GNU Affero General Public License along with this program. +u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. -If not, see <https://www.gnu.org/licenses/>. +You should have received a copy of the GNU Affero General Public License along with u8c. If not, see <https://www.gnu.org/licenses/>.
\ No newline at end of file diff --git a/changelog.md b/changelog.md index bc97b12..795b6bd 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,9 @@ +# 23 + +* Rewrite for C++ *(read readme for list of current features)*. +* Use CMake for building. +* Update logo. + # 22 * Remove documentation (too hight-maintainence). diff --git a/include/u8c/SIZE_C.h b/include/u8c/SIZE_C.h deleted file mode 100644 index be253f3..0000000 --- a/include/u8c/SIZE_C.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(SIZE_C) -# include <limits.h> -# include <stdint.h> -# if SIZE_MAX == UINT_LEAST8_MAX -# define SIZE_C(val) UINT8_C(val) -# elif SIZE_MAX == UINT_LEAST16_MAX -# define SIZE_C(val) UINT16_C(val) -# elif SIZE_MAX == UINT_LEAST32_MAX -# define SIZE_C(val) UINT32_C(val) -# elif SIZE_MAX == UINT_LEAST64_MAX -# define SIZE_C(val) UINT64_C(val) -# else -# define SIZE_C(val) ((size_t)UINTMAX_C(val)) -# endif -# endif diff --git a/include/u8c/chk.h b/include/u8c/chk.h deleted file mode 100644 index 03e4ff2..0000000 --- a/include/u8c/chk.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(u8c_hdr_chk) -# define u8c_hdr_chk -# include <stdbool.h> -# include <uchar.h> -/* Enumerations: */ -/* Type definitions: */ -/* Structures: */ -struct u8c_isalnum_tuple { - bool res; - bool stat; -}; -struct u8c_isalpha_tuple { - bool res; - bool stat; -}; -struct u8c_iscntrl_tuple { - bool res; - bool stat; -}; -struct u8c_isdigit_tuple { - bool res; - bool stat; -}; -struct u8c_islower_tuple { - bool res; - bool stat; -}; -struct u8c_ispunct_tuple { - bool res; - bool stat; -}; -struct u8c_isspace_tuple { - bool res; - bool stat; -}; -struct u8c_issurro_tuple { - bool res; - bool stat; -}; -struct u8c_isupper_tuple { - bool res; - bool stat; -}; -struct u8c_isxdigit_tuple { - bool res; - bool stat; -}; -/* Functions: */ -extern struct u8c_isalnum_tuple u8c_isalnum( char32_t const chr); /* Is alphanumeric */ -extern struct u8c_isalpha_tuple u8c_isalpha( char32_t const chr); /* Is alphabetic */ -extern struct u8c_iscntrl_tuple u8c_iscntrl( char32_t const chr); /* Is control character */ -extern struct u8c_isdigit_tuple u8c_isdigit( char32_t const chr); /* Is digit */ -extern struct u8c_islower_tuple u8c_islower( char32_t const chr); /* Is lowercase */ -extern struct u8c_ispunct_tuple u8c_ispunct( char32_t const chr); /* Is punctuation */ -extern struct u8c_isspace_tuple u8c_isspace( char32_t const chr); /* Is space */ -extern struct u8c_issurro_tuple u8c_issurro( char32_t const chr); /* Is surrogate point */ -extern struct u8c_isupper_tuple u8c_isupper( char32_t const chr); /* Is uppercase */ -extern struct u8c_isxdigit_tuple u8c_isxdigit(char32_t const chr); /* Is hexadecimal digit */ -/* Constants & Variables: */ -/* Macros: */ -# endif diff --git a/include/u8c/err.h b/include/u8c/err.h deleted file mode 100644 index 26d4b2f..0000000 --- a/include/u8c/err.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(u8c_hdr_err) -# define u8c_hdr_err -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -/* Enumerations: */ -enum u8c_errtyp { - u8c_errtyp_badalloc, /* Bad allocation */ - u8c_errtyp_badio, /* Bad input or output */ - u8c_errtyp_stroor, /* UTF-32 out of range */ - u8c_errtyp_u8oor, /* UTF-8 out of range */ - u8c_errtyp_deferr, /* Default error */ - u8c_errtyp_untermin, /* Unterminated input */ - u8c_errtyp_all, /* All */ -}; -/* Type definitions: */ -typedef void (* u8c_errhandltyp)(enum u8c_errtyp); /* Error handler type */ -/* Structures: */ -struct u8c_geterr_tuple { - char32_t const * err; - size_t errsz; - bool stat; -}; -struct u8c_regerrhandl_tuple { - bool stat; -}; -struct u8c_seterr_tuple { - bool stat; -}; -/* Functions: */ -extern struct u8c_geterr_tuple u8c_geterr( void); /* Get error */ -extern struct u8c_regerrhandl_tuple u8c_regerrhandl(enum u8c_errtyp typ,u8c_errhandltyp errhandl); /* Register error handler */ -extern struct u8c_seterr_tuple u8c_seterr( enum u8c_errtyp typ,char32_t const * const restrict msg); /* Set error */ -/* Constants & Variables: */ -/* Macros: */ -# endif diff --git a/include/u8c/fmt.h b/include/u8c/fmt.h deleted file mode 100644 index 5ddfb02..0000000 --- a/include/u8c/fmt.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(u8c_hdr_fmt) -# define u8c_hdr_fmt -# include <stdarg.h> -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <stdio.h> -# include <uchar.h> -/* Enumerations: */ -enum u8c_fmttyp { - u8c_fmttyp_bgcol, /* Background colour (uint_least32_t) */ - u8c_fmttyp_bgcol0, /* Background colour #0 */ - u8c_fmttyp_bool, /* Boolean (bool) */ - u8c_fmttyp_byt, /* Byte (char) */ - u8c_fmttyp_chr, /* Character (uint_least32_t) */ - u8c_fmttyp_fgcol, /* Foreground colour (uint_least32_t) */ - u8c_fmttyp_fgcol0, /* Foreground colour #0 */ - u8c_fmttyp_int, /* Integer (int) */ - u8c_fmttyp_int16, /* Integer 16 bit (int_least16_t) */ - u8c_fmttyp_int32, /* Integer 32 bit (int_least32_t) */ - u8c_fmttyp_int64, /* Integer 64 bit (int_least64_t) */ - u8c_fmttyp_int8, /* Integer 8 bit (int_least8_t) */ - u8c_fmttyp_llong, /* Long long (long long) */ - u8c_fmttyp_long, /* Long (long) */ - u8c_fmttyp_sbyt, /* Signed byte (signed char) */ - u8c_fmttyp_shrt, /* Short (short) */ - u8c_fmttyp_str, /* String (char32_t const *) */ - u8c_fmttyp_sz, /* Size (size_t) */ - u8c_fmttyp_tm, /* Time (uint_least64_t) */ - u8c_fmttyp_ubyt, /* Unsigned byte (unsigned char) */ - u8c_fmttyp_uint, /* Unsigned integer (unsigned int) */ - u8c_fmttyp_uint16, /* Unsigned integer 16 bit (uint_least16_t) */ - u8c_fmttyp_uint32, /* Unsigned integer 32 bit (uint_least32_t) */ - u8c_fmttyp_uint64, /* Unsigned integer 64 bit (uint_least64_t) */ - u8c_fmttyp_uint8, /* Unsigned integer 8 bit (uint_least8_t) */ - u8c_fmttyp_ulong, /* Unsigned long (unsigned long) */ - u8c_fmttyp_ullong, /* Unsigned long long (unsigned long long) */ - u8c_fmttyp_ushrt, /* Unsigned short (unsigned short) */ -}; -/* Type definitions: */ -/* Structures: */ -struct u8c_fmt_tuple { - char32_t const * str; - size_t strsz; - bool stat; -}; -struct u8c_print_tuple { - bool stat; -}; -struct u8c_println_tuple { - bool stat; -}; -struct u8c_setfmt_tuple { - bool stat; -}; -struct u8c_vfmt_tuple { - char32_t const * str; - size_t strsz; - bool stat; -}; -struct u8c_vprint_tuple { - bool stat; -}; -/* Functions: */ -extern struct u8c_fmt_tuple u8c_fmt( char32_t const * const restrict in, ...); /* Format */ -extern struct u8c_print_tuple u8c_print( FILE * restrict fp, char32_t const * const restrict msg, ...); /* Print */ -extern struct u8c_println_tuple u8c_println(FILE * restrict fp, char32_t const * const restrict msg, ...); /* Print line */ -extern struct u8c_setfmt_tuple u8c_setfmt( uint_least8_t const base,bool const endian); /* Set format */ -extern struct u8c_vfmt_tuple u8c_vfmt( char32_t const * const restrict in, va_list args); /* Variadic format */ -extern struct u8c_vprint_tuple u8c_vprint( FILE * restrict fp, char32_t const * const restrict msg, va_list args); /* Variadic print */ -/* Constants & Variables: */ -static uint_least32_t const u8c_col_azure = UINT32_C(0x3DA9E1); -static uint_least32_t const u8c_col_ash = UINT32_C(0xD2D2CC); -static uint_least32_t const u8c_col_black = UINT32_C(0x444747); -static uint_least32_t const u8c_col_blue = UINT32_C(0x3D3DE1); -static uint_least32_t const u8c_col_chartreuse = UINT32_C(0xA9E13D); -static uint_least32_t const u8c_col_cyan = UINT32_C(0x3DE1E1); -static uint_least32_t const u8c_col_green = UINT32_C(0x3ED13D); -static uint_least32_t const u8c_col_magenta = UINT32_C(0xE13DE1); -static uint_least32_t const u8c_col_mint = UINT32_C(0x3DE1A9); -static uint_least32_t const u8c_col_orange = UINT32_C(0xE1A93D); -static uint_least32_t const u8c_col_red = UINT32_C(0xE13D3D); -static uint_least32_t const u8c_col_rose = UINT32_C(0xE13DA9); -static uint_least32_t const u8c_col_silver = UINT32_C(0x9CA1A1); -static uint_least32_t const u8c_col_violet = UINT32_C(0xA93dE1); -static uint_least32_t const u8c_col_white = UINT32_C(0xF8F8F1); -static uint_least32_t const u8c_col_yellow = UINT32_C(0xE1E13D); -/* Macros: */ -# if defined(NDEBUG) -/* Debug print */ -# define u8c_dbgprint(...) ((void)0x0) -# else -# define u8c_dbgprint(...) u8c_println(stderr,__VA_ARGS__) -# endif -# endif diff --git a/include/u8c/intern.h b/include/u8c/intern.h deleted file mode 100644 index 4549337..0000000 --- a/include/u8c/intern.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(u8c_sym_dattyp) -# define u8c_sym_dattyp -# if defined(u8c_bethrdsafe) && defined(__STDC_NO_THREADS__) -# error u8c is set to be thread-safe, but the implementation does not support multithreading. -# endif -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <uchar.h> -# include <u8c/err.h> -# if defined(u8c_bethrdsafe) -# include <threads.h> -# endif -/* Enumerations: */ -/* Type definitions: */ -/* Structures: */ -struct u8c_dattyp { /* Data type */ - char32_t const * err; /* Error */ - u8c_errhandltyp errhandls[(size_t)u8c_errtyp_all]; /* Error handlers */ - uint_least8_t fmtbase; /* Format base */ - bool fmtendian; /* Format endian */ - uint_least8_t stat; /* Status */ -# if defined(u8c_bethrdsafe) - mtx_t errhandlslock; /* Error handlers lock */ - mtx_t errlock; /* Error lock */ - mtx_t fmtlock; /* Format lock */ - mtx_t outlock; /* Output lock */ -# endif -}; -/* Functions: */ -/* Constants & Variables: */ -extern struct u8c_dattyp u8c_dat; /* Data */ -/* Macros: */ -# endif diff --git a/include/u8c/main.h b/include/u8c/main.h deleted file mode 100644 index 4004550..0000000 --- a/include/u8c/main.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(u8c_hdr_main) -# define u8c_hdr_main -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <stdnoreturn.h> -# include <uchar.h> -/* Enumerations: */ -/* Type definitions: */ -/* Structures: */ -struct u8c_end_tuple { - bool stat; -}; -struct u8c_init_tuple { - bool stat; -}; -struct u8c_uniblk_tuple { - char32_t const * blk; - size_t blksz; - bool stat; -}; -struct u8c_uninm_tuple { - char32_t const * nm; - size_t nmsz; - bool stat; -}; -/* Functions: */ -noreturn extern void u8c_abrtfn(char const * const restrict fl, long const ln,char const * const restrict fn,char const * const restrict why); /* Abort function */ -extern struct u8c_end_tuple u8c_end( void); /* End */ -extern struct u8c_init_tuple u8c_init( void); /* Initialise */ -extern struct u8c_uniblk_tuple u8c_uniblk(char32_t const chr); /* Unicode block */ -extern struct u8c_uninm_tuple u8c_uninm( char32_t const chr); /* Unicode name */ -/* Constants & Variables: */ -extern bool const u8c_dbg; /* Debug */ -extern bool const u8c_thrdsafe; /* Thread-safe */ -static char32_t const u8c_unimax = U'\U0010FFFF'; /* Unicode maximum */ -static uint_least64_t const u8c_ver = UINT64_C(0x1A); /* Version */ -/* Macros: */ -# define u8c_abrt(why) u8c_abrtfn(__FILE__,(long)__LINE__,__func__,why) /* Abort */ -# endif diff --git a/include/u8c/str.h b/include/u8c/str.h deleted file mode 100644 index eecea26..0000000 --- a/include/u8c/str.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(u8c_hdr_str) -# define u8c_hdr_str -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <uchar.h> -/* Enumerations: */ -/* Type definitions: */ -/* Structures: */ -struct u8c_stralloc_tuple { - bool stat; - char32_t * str; -}; -struct u8c_strcat_tuple { - bool stat; - char32_t const * str; - size_t strsz; -}; -struct u8c_strcmp_tuple { - bool stat; - uint_least8_t res; -}; -struct u8c_strcp_tuple { - bool stat; - char32_t const * str; - size_t strsz; -}; -struct u8c_strfndchr_tuple { - size_t pos; - bool stat; -}; -struct u8c_strfndpat_tuple { - size_t pos; - bool stat; -}; -struct u8c_strfree_tuple { - bool stat; -}; -struct u8c_strins_tuple { - bool stat; - char32_t const * str; - size_t strsz; -}; -struct u8c_strsubstr_tuple { - bool stat; - char32_t const * str; - size_t strsz; -}; -struct u8c_strsz_tuple { - bool stat; - size_t sz; -}; -/* Functions: */ -extern struct u8c_stralloc_tuple u8c_stralloc( size_t const sz); /* String allocate */ -extern struct u8c_strcat_tuple u8c_strcat( char32_t const * const restrict str, char32_t const * const rstr); /* String concatenate */ -extern struct u8c_strcmp_tuple u8c_strcmp( char32_t const * const restrict lstr,char32_t const * const restrict rstr); /* String compare */ -extern struct u8c_strcp_tuple u8c_strcp( char32_t const * const restrict in); /* String copy */ -extern struct u8c_strfndchr_tuple u8c_strfndchr(char32_t const * const restrict in, char32_t const chr); /* String find character */ -extern struct u8c_strfndpat_tuple u8c_strfndpat(char32_t const * const restrict in, char32_t const * const restrict pat); /* String find pattern */ -extern struct u8c_strfree_tuple u8c_strfree( char32_t const * const restrict str); /* String free */ -extern struct u8c_strins_tuple u8c_strins( size_t const pos, char32_t const * const restrict str0,char32_t const * const restrict str1); /* String insert */ -extern struct u8c_strsubstr_tuple u8c_strsubstr(size_t const start,size_t const len, char32_t const * const restrict in); /* String sub-string */ -extern struct u8c_strsz_tuple u8c_strsz( char32_t const * const restrict in); /* String size */ -/* Constants & Variables: */ -/* Macros: */ -# endif diff --git a/include/u8c/u16.h b/include/u8c/u16.h deleted file mode 100644 index 629d39a..0000000 --- a/include/u8c/u16.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(u8c_hdr_u16) -# define u8c_hdr_u16 -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -/* Enumerations: */ -/* Type definitions: */ -/* Structures: */ -struct u8c_u16alloc_tuple { - bool stat; - char16_t * u16; -}; -struct u8c_u16free_tuple { - bool stat; -}; -/* Functions: */ -extern struct u8c_u16alloc_tuple u8c_u16alloc(size_t const sz); /* UTF-16 allocate */ -extern struct u8c_u16free_tuple u8c_u16free( char16_t const * const restrict u16); /* UTF-16 free */ -/* Constants & Variables: */ -/* Macros: */ -# endif diff --git a/include/u8c/u8.h b/include/u8c/u8.h deleted file mode 100644 index c7b6cc5..0000000 --- a/include/u8c/u8.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(u8c_hdr_u8) -# define u8c_hdr_u8 -# include <stdbool.h> -# include <stddef.h> -# include <uchar.h> -/* Enumerations: */ -/* Type definitions: */ -/* Structures: */ -struct u8c_u8alloc_tuple { - bool stat; - unsigned char * u8; -}; -struct u8c_u8dec_tuple { - bool stat; - char32_t const * str; - size_t strsz; -}; -struct u8c_u8enc_tuple { - bool stat; - unsigned char const * u8; - size_t u8sz; -}; -struct u8c_u8free_tuple { - bool stat; -}; -/* Functions: */ -extern struct u8c_u8alloc_tuple u8c_u8alloc(size_t const sz); /* UTF-8 allocate */ -extern struct u8c_u8dec_tuple u8c_u8dec( unsigned char const * const restrict u8); /* UTF-8 decode */ -extern struct u8c_u8enc_tuple u8c_u8enc( char32_t const * const restrict u8); /* UTF-8 encode */ -extern struct u8c_u8free_tuple u8c_u8free( unsigned char const * const restrict u8); /* UTF-8 free */ -/* Constants & Variables: */ -/* Macros: */ -# endif diff --git a/src/u8c/chk.h.d/isalnum.c b/src/u8c/chk.h.d/isalnum.c deleted file mode 100644 index 2c8115b..0000000 --- a/src/u8c/chk.h.d/isalnum.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/chk.h> -struct u8c_isalnum_tuple u8c_isalnum(char32_t const _chr) { - struct u8c_isalnum_tuple ret = { - .stat = false, - }; - ret.res = u8c_isalpha(_chr).res || u8c_isdigit(_chr).res; - return ret; -} diff --git a/src/u8c/chk.h.d/isdigit.c b/src/u8c/chk.h.d/isdigit.c deleted file mode 100644 index 61665cf..0000000 --- a/src/u8c/chk.h.d/isdigit.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/chk.h> -struct u8c_isdigit_tuple u8c_isdigit(char32_t const _chr) { - struct u8c_isdigit_tuple ret = { - .stat = false, - }; - switch(_chr) { - default: - ret.res = false; - break; - case U'0': /* DIGIT ZERO */ - case U'1': /* DIGIT ONE */ - case U'2': /* DIGIT TWO */ - case U'3': /* DIGIT THREE */ - case U'4': /* DIGIT FOUR */ - case U'5': /* DIGIT FIVE */ - case U'6': /* DIGIT SIX */ - case U'7': /* DIGIT SEVEN */ - case U'8': /* DIGIT EIGHT */ - case U'9': /* DIGIT NINE */ - case U'\u218A': /* TURNED DIGIT TWO */ - case U'\u218B': /* TURNED DIGIT THREE */ - ret.res = true; - break; - } - return ret; -} diff --git a/src/u8c/chk.h.d/ispunct.c b/src/u8c/chk.h.d/ispunct.c deleted file mode 100644 index 2d2a276..0000000 --- a/src/u8c/chk.h.d/ispunct.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/chk.h> -struct u8c_ispunct_tuple u8c_ispunct(char32_t const _chr) { - struct u8c_ispunct_tuple ret = { - .stat = false, - }; - switch(_chr) { - default: - ret.res = false; - break; - case U'!': /* EXCLAMATION MARK */ - case U'\"': /* QUOTATION MARK */ - case U'#': /* NUMBER SIGN */ - case U'\u0024': /* DOLLAR SIGN */ - case U'%': /* PERCENT SIGN */ - case U'&': /* AMPERSAND */ - case U'\'': /* APOSTROPHE */ - case U'(': /* LEFT PARANTHESIS */ - case U')': /* RIGHT PARANTHESIS */ - case U'*': /* ASTERISK */ - case U'+': /* PLUS SIGN */ - case U',': /* COMMA */ - case U'-': /* HYPHEN-MINUS */ - case U'.': /* FULL STOP */ - case U'/': /* SOLIDUS */ - case U':': /* COLON */ - case U';': /* SEMICOLON */ - case U'<': /* LESS-THAN SIGN */ - case U'=': /* EQUALS SIGN */ - case U'>': /* GREATER-THAN SIGN */ - case U'\?': /* QUESTION MARK */ - case U'\u0040': /* COMMERCIAL AT */ - case U'[': /* LEFT SQUARE BRACKET */ - case U'\\': /* REVERSE SOLIDUS */ - case U']': /* RIGHT SQUARE BRACKET */ - case U'^': /* CIRCUMFLEX ACCENT */ - case U'_': /* LOW LINE */ - case U'\u0060': /* GRAVE ACCENT */ - case U'{': /* LEFT CURLY BRACKET */ - case U'|': /* VERTICAL LINE */ - case U'}': /* RIGHT CURLY BRACKET */ - case U'~': /* TILDE */ - case U'\u00A1': /* INVERT EXCLAMATION MARK */ - case U'\u00A2': /* CENT SIGN */ - case U'\u00A3': /* POUND SIGN */ - case U'\u00A4': /* CURRENCY SIGN */ - case U'\u00A5': /* YEN SIGN */ - case U'\u00A7': /* SECTION SIGN */ - case U'\u00A8': /* DIAERESIS */ - case U'\u00A9': /* COPYRIGHT SIGN */ - case U'\u00AB': /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ - case U'\u00AE': /* REGISTERED SIGN */ - case U'\u00B0': /* DEGREE SIGN */ - case U'\u00B4': /* ACUTE ACCENT */ - case U'\u00B6': /* PILCROW SIGN */ - case U'\u00BB': /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ - case U'\u00B1': /* PLUS MINUS SIGN */ - case U'\u00BF': /* INVERT QUESTION MARK */ - case U'\u00D7': /* MULTIPLICATION SIGN */ - case U'\u00F7': /* DIVISION SIGN */ - case U'\u2010': /* HYPHEN */ - case U'\u2013': /* EN DASH */ - case U'\u2014': /* EM DASH */ - case U'\u2018': /* LEFT SINGLE QUOTATION MARK */ - case U'\u2019': /* RIGHT SINGLE QUOTATION MARK */ - case U'\u201C': /* LEFT DOUBLE QUOTATION MARK */ - case U'\u201D': /* RIGHT DOUBLE QUOTATION MARK */ - case U'\u2026': /* HORIZONTAL ELLIPSIS */ - case U'\u2030': /* PER MILLE SIGN */ - case U'\u2031': /* PER TEN THOUSAND SIGN */ - case U'\u2032': /* PRIME */ - case U'\u2033': /* DOUBLE PRIME */ - case U'\u2034': /* TRIPLE PRIME */ - case U'\u2035': /* REVERSED PRIME */ - case U'\u2036': /* REVERSED DOUBLE PRIME */ - case U'\u2037': /* REVERSED TRIPLE PRIME */ - case U'\u203C': /* DOUBLE EXCLAMATION MARK */ - case U'\u203D': /* INTERROBANG */ - case U'\u2047': /* DOUBLE QUOTATION MARK */ - case U'\u2048': /* QUESTION EXCLAMATION MARK */ - case U'\u2049': /* EXCLAMATION QUESTION MARK */ - case U'\u20A3': /* FRENCH FRANC SIGN */ - case U'\u20A4': /* LIRA SIGN */ - case U'\u20A8': /* RUPEE SIGN */ - case U'\u20A9': /* WON SIGN */ - case U'\u20AC': /* EURO SIGN */ - case U'\u20B9': /* INDIAN RUPEE SIGN */ - case U'\u20BF': /* BITCOIN SIGN */ - case U'\u2103': /* DEGREE CELSIUS */ - case U'\u2107': /* EULER CONSTANT */ - case U'\u2109': /* DEGREE FAHRENHEIT */ - case U'\u210E': /* PLANCK CONSTANT */ - case U'\u2117': /* SOUND RECORDING COPYRIGHT */ - case U'\u2122': /* TRADE MARK SIGN */ - case U'\u2125': /* OUNCE SIGN */ - case U'\u2126': /* OHM SIGN */ - case U'\u212A': /* KELVIN SIGN */ - case U'\u214D': /* AKTIESELSKAB */ - case U'\u2205': /* EMPTY SET */ - case U'\u2212': /* MINUS SIGN */ - case U'\u221A': /* SQUARE ROOT */ - case U'\u221B': /* CUBE ROOT */ - case U'\u221C': /* FOURTH ROOT */ - case U'\u221E': /* INFINITY */ - case U'\u2228': /* LOGICAL OR */ - case U'\u2248': /* ALMOST EQUAL TO */ - case U'\u2260': /* NOT EQUAL TO */ - case U'\u2264': /* LESS-THAN OR EQUAL TO */ - case U'\u2265': /* GREATER-THAN OR EQUAL TO */ - case U'\u2609': /* SUN */ - case U'\u263F': /* MERCURY */ - case U'\u2640': /* FEMALE SIGN */ - case U'\u2641': /* EARTH */ - case U'\u2642': /* MALE SIGN */ - case U'\u2643': /* JUPITER */ - case U'\u2644': /* SATURN */ - case U'\u2645': /* URANUS */ - case U'\u2646': /* NEPTUNE */ - case U'\u2647': /* PLUTO */ - case U'\u26A2': /* DOUBLED FEMALE SIGN */ - case U'\u26A3': /* DOUBLED MALE SIGN */ - case U'\u26A4': /* INTERLOCKED FEMALE AND MALE SIGN */ - case U'\u26A5': /* MALE AND FEMALE SIGN */ - case U'\u26B3': /* CERES */ - case U'\u26B4': /* PALLAS */ - case U'\u26B5': /* JUNO */ - case U'\u26B6': /* VESTA */ - case U'\u26B7': /* CHIRON */ - case U'\u2BD8': /* PROSERPINA */ - case U'\u2BD9': /* ASTRAEA */ - case U'\u2BDA': /* HYGIEA */ - case U'\u2BDB': /* PHOLOS */ - case U'\u2BDC': /* NESSUS */ - case U'\u2E2E': /* INVERTED QUESTION MARK */ - case U'\u33D7': /* SQUARE PH */ - case U'\uFDFC': /* RIAL SIGN */ - case U'\U0001F10D': /* CIRCLED ZERO WITH SLASH */ - case U'\U0001F10E': /* CIRCLED ANTICKLOCKWISE ARROW */ - case U'\U0001F10F': /* CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH */ - case U'\U0001F12F': /* COPYLEFT SYMBOL */ - case U'\U0001F16D': /* CIRCLED CC */ - case U'\U0001F16E': /* CIRCLED C WITH OVERLAID BACKSLASH */ - case U'\U0001F16F': /* CIRCLED HUMAN FIGURE */ - ret.res = true; - break; - } - return ret; -} diff --git a/src/u8c/chk.h.d/isspace.c b/src/u8c/chk.h.d/isspace.c deleted file mode 100644 index 478e7a7..0000000 --- a/src/u8c/chk.h.d/isspace.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/chk.h> -struct u8c_isspace_tuple u8c_isspace(char32_t const _chr) { - struct u8c_isspace_tuple ret = { - .stat = false, - }; - switch(_chr) { - default: - ret.res = false; - break; - case U'\t': /* HORIZONTAL TABULATION */ - case U'\n': /* NEW LINE */ - case U'\v': /* VERTICAL TABULATION */ - case U'\f': /* FORM FEED */ - case U'\r': /* CARRIAGE RETURN */ - case U' ': /* SPACE */ - ret.res = true; - break; - } - return ret; -} diff --git a/src/u8c/chk.h.d/issurro.c b/src/u8c/chk.h.d/issurro.c deleted file mode 100644 index e6873cd..0000000 --- a/src/u8c/chk.h.d/issurro.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/chk.h> -struct u8c_issurro_tuple u8c_issurro(char32_t const _chr) { - struct u8c_issurro_tuple ret = { - .stat = false, - }; - bool res = false; - if(_chr >= U'\xD800' && _chr <= U'\xDFFF') { - res = true; - } - ret.res = res; - return ret; -} diff --git a/src/u8c/chk.h.d/isxdigit.c b/src/u8c/chk.h.d/isxdigit.c deleted file mode 100644 index 5100624..0000000 --- a/src/u8c/chk.h.d/isxdigit.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/chk.h> -struct u8c_isxdigit_tuple u8c_isxdigit(char32_t const _chr) { - struct u8c_isxdigit_tuple ret = { - .stat = false, - }; - switch(_chr) { - default: - ret.res = false; - break; - case U'0': /* DIGIT ZERO */ - case U'1': /* DIGIT ONE */ - case U'2': /* DIGIT TWO */ - case U'3': /* DIGIT THREE */ - case U'4': /* DIGIT FOUR */ - case U'5': /* DIGIT FIVE */ - case U'6': /* DIGIT SIX */ - case U'7': /* DIGIT SEVEN */ - case U'8': /* DIGIT EIGHT */ - case U'9': /* DIGIT NINE */ - case U'A': /* LATIN CAPITAL LETTER A */ - case U'B': /* LATIN CAPITAL LETTER B */ - case U'C': /* LATIN CAPITAL LETTER C */ - case U'D': /* LATIN CAPITAL LETTER D */ - case U'E': /* LATIN CAPITAL LETTER E */ - case U'F': /* LATIN CAPITAL LETTER F */ - ret.res = true; - break; - } - return ret; -} diff --git a/src/u8c/err.h.d/geterr.c b/src/u8c/err.h.d/geterr.c deleted file mode 100644 index 5219348..0000000 --- a/src/u8c/err.h.d/geterr.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/err.h> -# include <u8c/intern.h> -# include <u8c/str.h> -struct u8c_geterr_tuple u8c_geterr(void) { - struct u8c_geterr_tuple ret = { - .stat = false, - }; - { -# if defined(u8c_bethrdsafe) - mtx_lock(&u8c_dat.errlock); -# endif - struct u8c_strcp_tuple const tuple = u8c_strcp(u8c_dat.err); -# if defined(u8c_bethrdsafe) - mtx_unlock(&u8c_dat.errlock); -# endif - ret.err = tuple.str; - ret.errsz = tuple.strsz; - } - return ret; -} diff --git a/src/u8c/err.h.d/regerrhandl.c b/src/u8c/err.h.d/regerrhandl.c deleted file mode 100644 index 5ac43a5..0000000 --- a/src/u8c/err.h.d/regerrhandl.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <u8c/err.h> -# include <u8c/intern.h> -static void u8c_regerrhandl_seterrhandl(enum u8c_errtyp _typ,u8c_errhandltyp _errhandl) { - u8c_dat.errhandls[(size_t)_typ] = _errhandl; -} -struct u8c_regerrhandl_tuple u8c_regerrhandl(enum u8c_errtyp _typ,u8c_errhandltyp _errhandl) { - struct u8c_regerrhandl_tuple ret = { - .stat = false, - }; -# if defined(u8c_bethrdsafe) - mtx_lock(&u8c_dat.errhandlslock); -# endif - if(_typ == u8c_errtyp_all) { - for(register int n = 0x0;n < (int)u8c_errtyp_all;n += 0x1) { - u8c_regerrhandl_seterrhandl((enum u8c_errtyp)n,_errhandl); - } - } - else { - u8c_regerrhandl_seterrhandl(_typ,_errhandl); - } -# if defined(u8c_bethrdsafe) - mtx_unlock(&u8c_dat.errhandlslock); -# endif - return ret; -} diff --git a/src/u8c/err.h.d/seterr.c b/src/u8c/err.h.d/seterr.c deleted file mode 100644 index 89edf19..0000000 --- a/src/u8c/err.h.d/seterr.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/err.h> -# include <u8c/fmt.h> -# include <u8c/intern.h> -# include <u8c/str.h> -# if defined(u8c_bethrdsafe) -# include <threads.h> -# endif -struct u8c_seterr_tuple u8c_seterr(enum u8c_errtyp _typ,char32_t const * const restrict _msg) { - struct u8c_seterr_tuple ret = { - .stat = false, - }; - /* u8c_dbgprint(_msg); */ -# if defined(u8c_bethrdsafe) - mtx_lock(&u8c_dat.errlock); -# endif - u8c_strfree(u8c_dat.err); - u8c_dat.err = u8c_strcp(_msg).str; -# if defined(u8c_bethrdsafe) - mtx_unlock(&u8c_dat.errlock); -# endif -# if defined(u8c_bethrdsafe) - mtx_lock(&u8c_dat.errhandlslock); -# endif - if(u8c_dat.errhandls[(size_t)_typ] != NULL) { - u8c_dat.errhandls[(size_t)_typ](_typ); - } -# if defined(u8c_bethrdsafe) - mtx_unlock(&u8c_dat.errhandlslock); -# endif - return ret; -} diff --git a/src/u8c/fmt.h.d/fmt.c b/src/u8c/fmt.h.d/fmt.c deleted file mode 100644 index 59feb74..0000000 --- a/src/u8c/fmt.h.d/fmt.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdarg.h> -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/fmt.h> -# include <u8c/str.h> -struct u8c_fmt_tuple u8c_fmt(char32_t const * const restrict _in,...) { - struct u8c_fmt_tuple ret; - va_list args; - va_start(args,_in); - struct u8c_vfmt_tuple tuple = u8c_vfmt(_in,args); - va_end(args); - ret.stat = tuple.stat; - ret.str = tuple.str; - ret.strsz = tuple.strsz; - return ret; -} diff --git a/src/u8c/fmt.h.d/print.c b/src/u8c/fmt.h.d/print.c deleted file mode 100644 index e2f4802..0000000 --- a/src/u8c/fmt.h.d/print.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdarg.h> -# include <stdbool.h> -# include <stdint.h> -# include <u8c/fmt.h> -struct u8c_print_tuple u8c_print(FILE * restrict _fp,char32_t const * const restrict _msg,...) { - struct u8c_print_tuple ret; - va_list args; - va_start(args,_msg); - struct u8c_vprint_tuple tuple = u8c_vprint(_fp,_msg,args); - va_end(args); - ret.stat = tuple.stat; - return ret; -} diff --git a/src/u8c/fmt.h.d/println.c b/src/u8c/fmt.h.d/println.c deleted file mode 100644 index d73a897..0000000 --- a/src/u8c/fmt.h.d/println.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdarg.h> -# include <stdbool.h> -# include <stdint.h> -# include <stdio.h> -# include <u8c/fmt.h> -# include <u8c/str.h> -# include <uchar.h> -struct u8c_println_tuple u8c_println(FILE * restrict _fp,char32_t const * const restrict _msg,...) { - struct u8c_println_tuple ret; - va_list args; - va_start(args,_msg); - char32_t const * msg = u8c_strcat(_msg,U"\n").str; - register struct u8c_vprint_tuple const tuple = u8c_vprint(_fp,msg,args); - u8c_strfree(msg); - va_end(args); - ret.stat = tuple.stat; - return ret; -} diff --git a/src/u8c/fmt.h.d/setfmt.c b/src/u8c/fmt.h.d/setfmt.c deleted file mode 100644 index 4c6d3a3..0000000 --- a/src/u8c/fmt.h.d/setfmt.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdint.h> -# include <u8c/fmt.h> -# include <u8c/intern.h> -# if defined(u8c_bethrdsafe) -# include <threads.h> -# endif -struct u8c_setfmt_tuple u8c_setfmt(uint_least8_t const _base,bool const _endian) { - struct u8c_setfmt_tuple ret = { - .stat = false, - }; - register uint_least8_t base = _base; - register bool endian = _endian; - if(_base > UINT8_C(0x20)) { - base = UINT8_C(0xC); - } -# if defined(u8c_bethrdsafe) - mtx_lock(&u8c_dat.fmtlock); -# endif - u8c_dat.fmtbase = base; - u8c_dat.fmtendian = endian; -# if defined(u8c_bethrdsafe) - mtx_unlock(&u8c_dat.fmtlock); -# endif - return ret; -} diff --git a/src/u8c/fmt.h.d/vfmt.c b/src/u8c/fmt.h.d/vfmt.c deleted file mode 100644 index 5148784..0000000 --- a/src/u8c/fmt.h.d/vfmt.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdarg.h> -# include <stdbool.h> -# include <u8c/fmt.h> -# include <u8c/str.h> -# include <uchar.h> -# if defined(u8c_bethrdsafe) -# include <threads.h> -# endif -struct u8c_vfmt_tuple u8c_vfmt(char32_t const * const restrict _in,[[maybe_unused]] va_list _args) { - struct u8c_vfmt_tuple ret = { - .stat = false, - }; - struct u8c_strcp_tuple const tuple = u8c_strcp(_in); - ret.stat = tuple.stat; - ret.str = tuple.str; - ret.strsz = tuple.strsz; - return ret; -} diff --git a/src/u8c/fmt.h.d/vprint.c b/src/u8c/fmt.h.d/vprint.c deleted file mode 100644 index cfcb850..0000000 --- a/src/u8c/fmt.h.d/vprint.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdarg.h> -# include <stdbool.h> -# include <stdint.h> -# include <stdio.h> -# include <stdlib.h> -# include <u8c/SIZE_C.h> -# include <u8c/err.h> -# include <u8c/fmt.h> -# include <u8c/intern.h> -# include <u8c/str.h> -# include <u8c/u8.h> -# include <uchar.h> -# if defined(u8c_bethrdsafe) -# include <threads.h> -# endif -struct u8c_vprint_tuple u8c_vprint(FILE * restrict _fp,char32_t const * const restrict _msg,va_list _args) { - struct u8c_vprint_tuple ret = { - .stat = false, - }; - char32_t const * str0 = u8c_vfmt(_msg,_args).str; - size_t str1sz = SIZE_C(0x0); - unsigned char const * str1 = NULL; - { - struct u8c_u8enc_tuple const tuple = u8c_u8enc(str0); - str1 = tuple.u8; - str1sz = tuple.u8sz; - } -# if defined(u8c_bethrdsafe) - mtx_lock(&u8c_dat.outlock); -# endif - { - register size_t const val = fwrite(str1,sizeof(uint_least8_t),str1sz - SIZE_C(0x1),_fp); -# if defined(u8c_bethrdsafe) - mtx_unlock(&u8c_dat.outlock); -# endif - if(val < str1sz - SIZE_C(0x1)) { - u8c_seterr(u8c_errtyp_badio,U"u8c_vprint: Unable to write to stdout."); - ret.stat = true; - return ret; - } - } - u8c_strfree(str0); - u8c_u8free(str1); - return ret; -} diff --git a/src/u8c/intern.h.d/dat.c b/src/u8c/intern.h.d/dat.c deleted file mode 100644 index 77d4962..0000000 --- a/src/u8c/intern.h.d/dat.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/intern.h> -struct u8c_dattyp u8c_dat = { - .err = NULL, - .fmtendian = false, - .stat = UINT8_C(0x0), -}; diff --git a/src/u8c/main.h.d/abrtfn.c b/src/u8c/main.h.d/abrtfn.c deleted file mode 100644 index bab21f2..0000000 --- a/src/u8c/main.h.d/abrtfn.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <inttypes.h> -# include <stdbool.h> -# include <stdint.h> -# include <stdio.h> -# include <stdlib.h> -# include <stdnoreturn.h> -# include <time.h> -# include <u8c/intern.h> -# include <u8c/main.h> -noreturn void u8c_abrtfn(char const * const restrict _fl,long const _ln,char const * const restrict _fn,char const * const restrict _why) { - fprintf(stderr,"u8c: *** Aborted (\"%s\":%li in function \"%s\": \"%s\" @ %" PRIuMAX ") ***\nLibrary diagnostics:\n debug:%s\n status:%" PRIuLEAST8 "\n thread-safe:%s\n version:%" PRIuLEAST64 "\n",_fl,_ln,_fn,_why,(intmax_t)time(NULL),u8c_dbg ? "true" : "false",u8c_dat.stat,u8c_thrdsafe ? "true" : "false",u8c_ver); - fprintf(stderr,"Trying to clean up...\n"); - u8c_end(); - fprintf(stderr,"Aborting...\n"); - abort(); -} diff --git a/src/u8c/main.h.d/debug.c b/src/u8c/main.h.d/debug.c deleted file mode 100644 index e42275c..0000000 --- a/src/u8c/main.h.d/debug.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdint.h> -# include <u8c/main.h> -bool const u8c_dbg = -# if defined(NDEBUG) - false; -# else - true; -# endif diff --git a/src/u8c/main.h.d/end.c b/src/u8c/main.h.d/end.c deleted file mode 100644 index 741a961..0000000 --- a/src/u8c/main.h.d/end.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdint.h> -# include <stdlib.h> -# include <u8c/SIZE_C.h> -# include <u8c/intern.h> -# include <u8c/main.h> -# include <u8c/str.h> -# if defined(u8c_bethrdsafe) -# include <threads.h> -# endif -struct u8c_end_tuple u8c_end(void) { - struct u8c_end_tuple ret = { - .stat = false, - }; - if(!u8c_dat.stat) { - return ret; - } -# if defined(u8c_bethrdsafe) - /* Destroy mutexes: */ - mtx_destroy(&u8c_dat.errhandlslock); - mtx_destroy(&u8c_dat.errlock); - mtx_destroy(&u8c_dat.fmtlock); - mtx_destroy(&u8c_dat.outlock); -# endif - /* Free error message: */ - u8c_strfree(u8c_dat.err); - /* Set default formatting options: */ - u8c_dat.fmtbase = UINT8_C(0xC); - u8c_dat.fmtendian = UINT8_C(0x0); - /* Set status: */ - u8c_dat.stat = UINT8_C(0x0); - return ret; -} diff --git a/src/u8c/main.h.d/init.c b/src/u8c/main.h.d/init.c deleted file mode 100644 index 1762822..0000000 --- a/src/u8c/main.h.d/init.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# if !defined(__STDC_UTF_32__) -# error UTF-32 is required. -# endif -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/err.h> -# include <u8c/intern.h> -# include <u8c/main.h> -# include <u8c/str.h> -# if defined(u8c_bethrdsafe) -# include <threads.h> -# endif -struct u8c_init_tuple u8c_init() { - struct u8c_init_tuple ret = { - .stat = false, - }; - /* Initialise mutexes: */ -# if defined(u8c_bethrdsafe) - if(mtx_init(&u8c_dat.errhandlslock,mtx_plain) == thrd_error) { - ret.stat = true; - return ret; - } - if(mtx_init(&u8c_dat.errlock,mtx_plain) == thrd_error) { - ret.stat = true; - return ret; - } - if(mtx_init(&u8c_dat.fmtlock,mtx_plain) == thrd_error) { - ret.stat = true; - return ret; - } - if(mtx_init(&u8c_dat.outlock,mtx_plain) == thrd_error) { - ret.stat = true; - return ret; - } -# endif - /* Set default error message: */ - u8c_dat.err = NULL; - u8c_seterr(u8c_errtyp_deferr,U""); - /* Initialise error handler array: */ - for(register size_t n = SIZE_C(0x0);n < u8c_errtyp_all;n += SIZE_C(0x1)) { - u8c_dat.errhandls[n] = NULL; - } - /* Set status: */ - u8c_dat.stat = UINT8_C(0x1); - return ret; -} diff --git a/src/u8c/main.h.d/thrdsafe.c b/src/u8c/main.h.d/thrdsafe.c deleted file mode 100644 index 4e8c40c..0000000 --- a/src/u8c/main.h.d/thrdsafe.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdint.h> -# include <u8c/main.h> -bool const u8c_thrdsafe = -# if defined(u8c_bethrdsafe) - true; -# else - false; -# endif -/* -1720 173F HANUNOO -1740 175F BUHID -1760 177F TAGBANWA -1780 17FF KHMER -1800 18AF MONGOLIAN -18B0 18FF UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED -1900 194F LIMBU -1950 197F TAI LE -1980 19DF NEW TAI LUE -19E0 19FF KHMER SYMBOLS -1A00 1A1F BUGINESE -1A20 1AAF TAI THAM -1AB0 1AFF COMBINING DIACRITICAL MARKS EXTENDED -1B00 1B7F BALINESE -1B80 1BBF SUNDANESE -1BC0 1BFF BATAK -1C00 1C4F LEPCHA -1C50 1C7F OL CHIKI -1C80 1C8F CYRILLIC EXTENDED C -1C90 1CBF GEORGIAN EXTENDED -1CC0 1CCF SUNDANESE SUPPLEMENT -1CD0 1CFF VEDIC EXTENSIONS -1D00 1D7F PHONETIC EXTENSIONS -1D80 1DBF PHONETIC EXTENSIONS SUPPLEMENT -1DC0 1DFF COMBINING DIACRITICAL MARKS SUPPLEMENT -1E00 1EFF LATIN EXTENDED ADDITIONAL -1F00 1FFF GREEK EXTENDED -2000 206F GENERAL PUNCTUATION -2070 209F SUPERSCRIPTS AND SUBSCRIPTS -20A0 20CF CURRENCY SYMBOLS -20D0 20FF COMBINING DIACRITICAL MARKS FOR SYMBOLS -2100 214F LETTERLIKE SYMBOLS -2150 218F NUMBER FORMS -2190 21FF ARROWS -2200 22FF MATHEMATICAL OPERATORS -2300 23FF MISCELLANEOUS TECHNICAL -2400 243F CONTROL PICTURES -2440 245F OPTICAL CHARACTER RECOGNITION -2460 24FF ENCLOSED ALPHANUMERICS -2500 257F BOX DRAWING -2580 259F BLOCK ELEMENTS -25A0 25FF GEOMETRIC SHAPES -2600 26FF MISCELLANEOUS SYMBOLS -2700 27BF DINGBATS -27C0 27EF MISCELLANEOUS MATHEMATICAL SYMBOLS-A -27F0 27FF SUPPLEMENTAL ARROWS-A -2800 28FF BRAILLE PATTERNS -2900 297F SUPPLEMENTAL ARROWS-B -2980 29FF MISCELLANEOUS MATHEMATICAL SYMBOLS-B -2A00 2AFF SUPPLEMENTAL MATHEMATICAL OPERATORS -2B00 2BFF MISCELLANEOUS SYMBOLS AND ARROWS -2C00 2C5F GLAGOLITIC -2C60 2C7F LATIN EXTENDED-C -2C80 2CFF COPTIC -2D00 2D2F GEORGIAN SUPPLEMENT -2D30 2D7F TIFINAGH -2D80 2DDF ETHIOPIC EXTENDED -2DE0 2DFF CYRILLIC EXTENDED-A -2E00 2E7F SUPPLEMENTAL PUNCTUATION -2E80 2EFF CJK RADICALS SUPPLEMENT -2F00 2FDF KANGXI RADICALS -2FF0 2FFF IDEOGRAPHIC DESCRIPTION CHARACTERS -3000 303F CJK SYMBOLS AND PUNCTUATION -3040 309F HIRAGANA -30A0 30FF KATAKANA -3100 312F BOPOMOFO -3130 318F HANGUL COMPATIBILITY JAMO -3190 319F KANBUN -31A0 31BF BOPOMOFO EXTENDED -31C0 31EF CJK STROKES -31F0 31FF KATAKANA PHONETIC EXTENSIONS -3200 32FF ENCLOSED CJK LETTERS AND MONTHS -3300 33FF CJK COMPATIBILITY -3400 4DBF CJK UNIFIED IDEOGRAPHS EXTENSION A -4DC0 4DFF YIJING HEXAGRAM SYMBOLS -4E00 9FFF CJK UNIFIED IDEOGRAPHS -A000 A48F YI SYLLABLES -A490 A4CF YI RADICALS -A4D0 A4FF LISU -A500 A63F VAI -A640 A69F CYRILLIC EXTENDED-B -A6A0 A6FF BAMUM -A700 A71F MODIFIER TONE LETTERS -A720 A7FF LATIN EXTENDED-D -A800 A82F SYLOTI NAGRI -A830 A83F COMMON INDIC NUMBER FORMS -A840 A87F PHAGS-PA -A880 A8DF SAURASHTRA -A8E0 A8FF DEVANAGARI EXTENDED -A900 A92F KAYAH LI -A930 A95F REJANG -A960 A97F HANGUL JAMO EXTENDED-A -A980 A9DF JAVANESE -A9E0 A9FF MYANMAR EXTENDED-B -AA00 AA5F CHAM -AA60 AA7F MYANMAR EXTENDED-A -AA80 AADF TAI VIET -AAE0 AAFF MEETEI MAYEK EXTENSIONS -AB00 AB2F ETHIOPIC EXTENDED-A -AB30 AB6F LATIN EXTENDED-E -AB70 ABBF CHEROKEE SUPPLEMENT -ABC0 ABFF MEETEI MAYEK -AC00 D7AF HANGUL SYLLABLES -D7B0 D7FF HANGUL JAMO EXTENDED-B -D800 DB7F HIGH SURROGATES -DB80 DBFF HIGH PRIVATE USE SURROGATES -DC00 DFFF LOW SURROGATES -E000 F8FF PRIVATE USE AREA -F900 FAFF CJK COMPATIBILITY IDEOGRAPHS -FB00 FB4F ALPHABETIC PRESENTATION FORMS -FB50 FDFF ARABIC PRESENTATION FORMS-A -FE00 FE0F VARIATION SELECTORS -FE10 FE1F VERTICAL FORMS -FE20 FE2F COMBINING HALF MARKS -FE30 FE4F CJK COMPATIBILITY FORMS -FE50 FE6F SMALL FORM VARIANTS -FE70 FEFF ARABIC PRESENTATION FORMS-B -FF00 FFEF HALFWIDTH AND FULLWIDTH FORMS -FFF0 FFFF SPECIALS -10000 1007F LINEAR B SYLLABARY -10080 100FF LINEAR B IDEOGRAMS -10100 1013F AEGEAN NUMBERS -10140 1018F ANCIENT GREEK NUMBERS -10190 101CF ANCIENT SYMBOLS -101D0 101FF PHAISTOS DISC -10280 1029F LYCIAN -102A0 102DF CARIAN -102E0 102FF COPTIC EPACT NUMBERS -10300 1032F OLD ITALIC -10330 1034F GOTHIC -10350 1037F OLD PERMIC -10380 1039F UGARITIC -103A0 103DF OLD PERSIAN -10400 1044F DESERET -10450 1047F SHAVIAN -10480 104AF OSMANYA -104B0 104FF OSAGE -10500 1052F ELBASAN -10530 1056F CAUCASIAN ALBANIAN -10600 1077F LINEAR A -10800 1083F CYPRIOT SYLLABARY -10840 1085F IMPERIAL ARAMAIC -10860 1087F PALMYRENE -10880 108AF NABATAEAN -108E0 108FF HATRAN -10900 1091F PHOENICIAN -10920 1093F LYDIAN -10980 1099F MEROITIC HIEROGLYPHS -109A0 109FF MEROITIC CURSIVE -10A00 10A5F KHAROSHTHI -10A60 10A7F OLD SOUTH ARABIAN -10A80 10A9F OLD NORTH ARABIAN -10AC0 10AFF MANICHAEAN -10B00 10B3F AVESTAN -10B40 10B5F INSCRIPTIONAL PARTHIAN -10B60 10B7F INSCRIPTIONAL PAHLAVI -10B80 10BAF PSALTER PAHLAVI -10C00 10C4F OLD TURKIC -10C80 10CFF OLD HUNGARIAN -10D00 10D3F HANIFI ROHINGYA -10E60 10E7F RUMI NUMERAL SYMBOLS -10E80 10EBF YEZIDI -10F00 10F2F OLD SOGDIAN -10F30 10F6F SOGDIAN -10FB0 10FDF CHORASMIAN -10FE0 10FFF ELYMAIC -11000 1107F BRAHMI -11080 110CF KAITHI -110D0 110FF SORA SOMPENG -11100 1114F CHAKMA -11150 1117F MAHAJANI -11180 111DF SHARADA -111E0 111FF SINHALA ARCHAIC NUMBERS -11200 1124F KHOJKI -11280 112AF MULTANI -112B0 112FF KHUDAWADI -11300 1137F GRANTHA -11400 1147F NEWA -11480 114DF TIRHUTA -11580 115FF SIDDHAM -11600 1165F MODI -11660 1167F MONGOLIAN SUPPLEMENT -11680 116CF TAKRI -11700 1173F AHOM -11800 1184F DOGRA -118A0 118FF WARANG CITI -11900 1195F DIVES AKURU -119A0 119FF NANDINAGARI -*/ diff --git a/src/u8c/main.h.d/uniblk.c b/src/u8c/main.h.d/uniblk.c deleted file mode 100644 index 332f8aa..0000000 --- a/src/u8c/main.h.d/uniblk.c +++ /dev/null @@ -1,517 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <u8c/main.h> -# include <u8c/str.h> -# include <uchar.h> -# define u8c_uniblk_isinrng(val,start,end) (val >= start && val <= end) -struct u8c_uniblk_tuple u8c_uniblk(char32_t const _chr) { - struct u8c_uniblk_tuple ret = { - .stat = false, - }; - char32_t const * blk = U"UNDEFINED IN UNICODE"; - if(_chr <= U'\x7F') { - blk = U"BASIC LATIN"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\x80',U'\u00FF')) { - blk = U"LATIN-1 SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0100',U'\u017F')) { - blk = U"LATIN EXTENDED-A"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0180',U'\u024F')) { - blk = U"LATIN EXTENDED-B"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0250',U'\u02AF')) { - blk = U"IPA EXTENSIONS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u02B0',U'\u02FF')) { - blk = U"SPACING MODIFIER LETTERS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0300',U'\u036F')) { - blk = U"COMBINING DIRACITICAL MARKS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0370',U'\u03FF')) { - blk = U"GREEK AND COPTIC"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0400',U'\u04FF')) { - blk = U"CYRILLIC"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0500',U'\u052F')) { - blk = U"CYRILLIC SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0530',U'\u058F')) { - blk = U"ARMENIAN"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0590',U'\u05FF')) { - blk = U"HEBREW"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0600',U'\u06FF')) { - blk = U"ARABIC"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0700',U'\u074F')) { - blk = U"SYRIAC"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0750',U'\u077F')) { - blk = U"ARABIC SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0780',U'\u07BF')) { - blk = U"THAANA"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u07C0',U'\u07FF')) { - blk = U"NKO"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0800',U'\u083F')) { - blk = U"SAMARITAN"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0840',U'\u085F')) { - blk = U"MANDAIC"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0860',U'\u086F')) { - blk = U"SYRIAC SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u08A0',U'\u08FF')) { - blk = U"ARABIC EXTENDED-A"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0900',U'\u097F')) { - blk = U"DEVANAGARI"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0980',U'\u09FF')) { - blk = U"BENGALI"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0A00',U'\u0A7F')) { - blk = U"GURMUKHI"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0A80',U'\u0AFF')) { - blk = U"GUJARATI"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0B00',U'\u0B7F')) { - blk = U"ORIYAS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0B80',U'\u0BFF')) { - blk = U"TAMIL"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0C00',U'\u0C7F')) { - blk = U"TELUGU"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0C80',U'\u0CFF')) { - blk = U"KANNADA"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0D00',U'\u0D7F')) { - blk = U"MALAYALAM"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0D80',U'\u0DFF')) { - blk = U"SINHALA"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0E00',U'\u0E7F')) { - blk = U"THAI"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0E80',U'\u0EFF')) { - blk = U"LAO"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u0F00',U'\u0FFF')) { - blk = U"TIBETAN"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u1000',U'\u109F')) { - blk = U"MYANMAR"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u10A0',U'\u10FF')) { - blk = U"GEORGIAN"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u1100',U'\u11FF')) { - blk = U"HANGUL JAMO"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u1200',U'\u137F')) { - blk = U"ETHIOPIC"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u1380',U'\u139F')) { - blk = U"ETHIOPIC SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u13A0',U'\u13FF')) { - blk = U"CHEROKEE"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u1400',U'\u167F')) { - blk = U"UNIFIED CANADIAN ABORIGINAL SYLLABICS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u1680',U'\u169F')) { - blk = U"OGHAM"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u16A0',U'\u16FF')) { - blk = U"RUNIC"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\u1700',U'\u171F')) { - blk = U"TAGALOG"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00011A00',U'\U00011A4F')) { - blk = U"ZANABAZAR SQUARE"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00011A50',U'\U00011AAF')) { - blk = U"SOYOMBO"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00011AC0',U'\U00011AFF')) { - blk = U"PAU CIN HAU"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00011C00',U'\U00011C6F')) { - blk = U"BHAIKSUKI"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00011C70',U'\U00011CBF')) { - blk = U"MARCHEN"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00011D00',U'\U00011D5F')) { - blk = U"MASARAM GONDI"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00011D60',U'\U00011DAF')) { - blk = U"GUNJALA GONDI"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00011EE0',U'\U00011EFF')) { - blk = U"MAKASAR"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00011FB0',U'\U00011FBF')) { - blk = U"LISU SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00011FC0',U'\U00011FFF')) { - blk = U"TAMIL SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00012000',U'\U000123FF')) { - blk = U"CUNEIFORM"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00012400',U'\U0001247F')) { - blk = U"CUNEIFORM NUMBERS AND PUNCTUATION"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00012480',U'\U0001254F')) { - blk = U"EARLY DYNASTIC CUNEIFORM"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00013000',U'\U0001342F')) { - blk = U"EGYPTIAN HIEROGLYPHS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00013430',U'\U0001343F')) { - blk = U"EGYPTIAN HIEROGLYPH FORMAT CONTROLS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00014400',U'\U0001467F')) { - blk = U"ANATOLIAN HIEROGLYPHS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00016800',U'\U00016A3F')) { - blk = U"BAMUM SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00016A40',U'\U00016A6F')) { - blk = U"MRO"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00016AD0',U'\U00016AFF')) { - blk = U"BASSA VAH"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00016B00',U'\U00016B8F')) { - blk = U"PAHAWH HMONG"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00016E40',U'\U00016E9F')) { - blk = U"MEDEFAIDRIN"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00016F00',U'\U00016F9F')) { - blk = U"MIAO"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00016FE0',U'\U00016FFF')) { - blk = U"IDEOGRAPHIC SYMBOLS AND PUNCTUATION"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00017000',U'\U000187FF')) { - blk = U"TANGUT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00018800',U'\U00018AFF')) { - blk = U"TANGUT COMPONENTS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00018B00',U'\U00018CFF')) { - blk = U"KHITAN SMALL SCRIPT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00018D00',U'\U00018D8F')) { - blk = U"TANGUT SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001B000',U'\U0001B0FF')) { - blk = U"KANA SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001B100',U'\U0001B12F')) { - blk = U"KANA EXTENDED-A"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001B130',U'\U0001B16F')) { - blk = U"SMALL KANA EXTENSION"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001B170',U'\U0001B2FF')) { - blk = U"NUSHU"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001BC00',U'\U0001BC9F')) { - blk = U"DUPLOYAN"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001BCA0',U'\U0001BCAF')) { - blk = U"SHORTHAND FORMAT CONTROLS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001D000',U'\U0001D0FF')) { - blk = U"BYZANTINE MUSICAL SYMBOLS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001D100',U'\U0001D1FF')) { - blk = U"MUSICAL SYMBOLS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001D200',U'\U0001D24F')) { - blk = U"ANCIENT GREEK MUSICAL NOTATION"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001D2E0',U'\U0001D2FF')) { - blk = U"MAYAN NUMERALS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001D300',U'\U0001D35F')) { - blk = U"TAI XUAN JING SYMBOLS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001D360',U'\U0001D37F')) { - blk = U"COUNTING ROD NUMERALS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001D400',U'\U0001D7FF')) { - blk = U"MATHEMATICAL ALPHANUMERIC SYMBOLS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001D800',U'\U0001DAAF')) { - blk = U"SUTTON SIGNWRITING"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001E000',U'\U0001E02F')) { - blk = U"GLAGOLITIC SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001E100',U'\U0001E14F')) { - blk = U"NYIAKENG PUACHUE HMONG"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001E2C0',U'\U0001E2FF')) { - blk = U"WANCHO"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001E800',U'\U0001E8DF')) { - blk = U"MENDE KIKAKUI"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001E900',U'\U0001E95F')) { - blk = U"ADLAM"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001EC70',U'\U0001ECBF')) { - blk = U"INDIC SIYAQ NUMBERS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001ED00',U'\U0001ED4F')) { - blk = U"OTTOMAN SIYAQ NUMBERS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001EE00',U'\U0001EEFF')) { - blk = U"ARABIC MATHEMATICAL ALPHABETIC SYMBOLS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F000',U'\U0001F02F')) { - blk = U"MAHJONG TILES"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F030',U'\U0001F09F')) { - blk = U"DOMINO TILES"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F0A0',U'\U0001F0FF')) { - blk = U"PLAYING CARDS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F100',U'\U0001F1FF')) { - blk = U"ENCLOSED ALPHANUMERIC SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F200',U'\U0001F2FF')) { - blk = U"ENCLOSED IDEOGRAPHIC SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F300',U'\U0001F5FF')) { - blk = U"MISCELLANEOUS SYMBOLS AND PICTOGRAPHS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F600',U'\U0001F64F')) { - blk = U"EMOTICONS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F650',U'\U0001F67F')) { - blk = U"ORNAMENTAL DINGBATS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F680',U'\U0001F6FF')) { - blk = U"TRANSPORT AND MAP SYMBOLS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F700',U'\U0001F77F')) { - blk = U"ALCHEMICAL SYMBOLS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F780',U'\U0001F7FF')) { - blk = U"GEOMETRIC SHAPES EXTENDED"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F800',U'\U0001F8FF')) { - blk = U"SUPPLEMENTAL ARROWS-C"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001F900',U'\U0001F9FF')) { - blk = U"SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001FA00',U'\U0001FA6F')) { - blk = U"CHESS SYMBOLS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001FA70',U'\U0001FAFF')) { - blk = U"SYMBOLS AND PICTOGRAPHS EXTENDED-A"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0001FB00',U'\U0001FBFF')) { - blk = U"SYMBOLS FOR LEGACY COMPUTING"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00020000',U'\U0002A6DF')) { - blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION B"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0002A700',U'\U0002B73F')) { - blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION C"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0002B740',U'\U0002B81F')) { - blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION D"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0002B820',U'\U0002CEAF')) { - blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION E"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0002CEB0',U'\U0002EBEF')) { - blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION F"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U0002F800',U'\U0002FA1F')) { - blk = U"CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00030000',U'\U0003134F')) { - blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION G"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U000E0000',U'\U000E007F')) { - blk = U"TAGS"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U000E0100',U'\U000E1EFF')) { - blk = U"VARIATION SELECTORS SUPPLEMENT"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U000F0000',U'\U000FFFFF')) { - blk = U"SUPPLEMENTARY PRIVATE USE AREA-A"; - goto end; - } - if(u8c_uniblk_isinrng(_chr,U'\U00100000',U'\U0010FFFF')) { - blk = U"SUPPLEMENTARY PRIVATE USE AREA-B"; - goto end; - } - if(_chr > u8c_unimax) { - ret.stat = true; - return ret; - } -end:; - { - struct u8c_strcp_tuple const tuple = u8c_strcp(blk); - ret.blk = tuple.str; - ret.blksz = tuple.strsz; - } - return ret; -} diff --git a/src/u8c/main.h.d/uninm.c b/src/u8c/main.h.d/uninm.c deleted file mode 100644 index d409263..0000000 --- a/src/u8c/main.h.d/uninm.c +++ /dev/null @@ -1,2582 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <u8c/err.h> -# include <u8c/main.h> -# include <u8c/str.h> -# include <uchar.h> -struct u8c_uninm_tuple u8c_uninm(char32_t const _chr) { - struct u8c_uninm_tuple ret; - if(_chr > u8c_unimax) { - u8c_seterr(u8c_errtyp_stroor,U"u8c_uninm: Character out of range."); - ret.stat = true; - return ret; - } - char32_t const * nm = U""; - switch(_chr) { - default: - nm = U"UNDEFINED IN UNICODE"; - break; - /* BASIC LATIN: */ - case U'\x0': - nm = U"NULL"; - break; - case U'\x1': - nm = U"START OF HEADING"; - break; - case U'\x2': - nm = U"START OF TEXT"; - break; - case U'\x3': - nm = U"END OF TEXT"; - break; - case U'\x4': - nm = U"END OF TRANSMISSION"; - break; - case U'\x5': - nm = U"ENQUIRY"; - break; - case U'\x6': - nm = U"ACKNOWLEDGE"; - break; - case U'\a': - nm = U"BELL"; - break; - case U'\b': - nm = U"BACKSPACE"; - break; - case U'\t': - nm = U"HORIZONTAL TABULATION"; - break; - case U'\n': - nm = U"NEW LINE"; - break; - case U'\v': - nm = U"VERTICAL TABULATION"; - break; - case U'\f': - nm = U"FORM FEED"; - break; - case U'\r': - nm = U"CARRIAGE RETURN"; - break; - case U'\xE': - nm = U"SHIFT OUT"; - break; - case U'\xF': - nm = U"SHIFT IN"; - break; - case U'\x10': - nm = U"DATA LINK ESCAPE"; - break; - case U'\x11': - nm = U"DEVICE CONTROL ONE"; - break; - case U'\x12': - nm = U"DEVICE CONTROL TWO"; - break; - case U'\x13': - nm = U"DEVICE CONTROL THREE"; - break; - case U'\x14': - nm = U"DEVICE CONTROL FOUR"; - break; - case U'\x15': - nm = U"NEGATIVE ACKNOWLEDGE"; - break; - case U'\x16': - nm = U"SYNCHRONOUS IDLE"; - break; - case U'\x17': - nm = U"END OF TRANSMISSION BLOCk"; - break; - case U'\x18': - nm = U"CANCEL"; - break; - case U'\x19': - nm = U"END OF MEDIUM"; - break; - case U'\x1A': - nm = U"SUBSTITUTE"; - break; - case U'\x1B': - nm = U"ESCAPE"; - break; - case U'\x1C': - nm = U"FILE SEPERATOR"; - break; - case U'\x1D': - nm = U"GROUP SEPERATOR"; - break; - case U'\x1E': - nm = U"RECORD SEPERATOR"; - break; - case U'\x1F': - nm = U"UNIT SEPERATOR"; - break; - case U' ': - nm = U"SPACE"; - break; - case U'!': - nm = U"EXCLAMATION MARK"; - break; - case U'\"': - nm = U"QUOTATION MARK"; - break; - case U'#': - nm = U"NUMBER SIGN"; - break; - case U'\u0024': - nm = U"DOLLAR SIGN"; - break; - case U'%': - nm = U"PERCENT SIGN"; - break; - case U'&': - nm = U"AMPERSAND"; - break; - case U'\'': - nm = U"APOSTROPHE"; - break; - case U'(': - nm = U"LEFT PARANTHESIS"; - break; - case U')': - nm = U"RIGHT PARANTHESIS"; - break; - case U'*': - nm = U"ASTERISK"; - break; - case U'+': - nm = U"PLUS SIGN"; - break; - case U',': - nm = U"COMMA"; - break; - case U'-': - nm = U"HYPHEN-MINUS"; - break; - case U'.': - nm = U"FULL STOP"; - break; - case U'/': - nm = U"SOLIDUS"; - break; - case U'0': - nm = U"DIGIT ZERO"; - break; - case U'1': - nm = U"DIGIT ONE"; - break; - case U'2': - nm = U"DIGIT TWO"; - break; - case U'3': - nm = U"DIGIT THREE"; - break; - case U'4': - nm = U"DIGIT FOUR"; - break; - case U'5': - nm = U"DIGIT FIVE"; - break; - case U'6': - nm = U"DIGIT SIX"; - break; - case U'7': - nm = U"DIGIT SEVEN"; - break; - case U'8': - nm = U"DIGIT EIGHT"; - break; - case U'9': - nm = U"DIGIT NINE"; - break; - case U':': - nm = U"COLON"; - break; - case U';': - nm = U"SEMICOLON"; - break; - case U'<': - nm = U"LESS-THAN SIGN"; - break; - case U'=': - nm = U"EQUALS SIGN"; - break; - case U'>': - nm = U"GREATER-THAN SIGN"; - break; - case U'?': - nm = U"QUESTION MARK"; - break; - case U'\u0040': - nm = U"COMMERCIAL AT"; - break; - case U'A': - nm = U"LATIN CAPITAL LETTER A"; - break; - case U'B': - nm = U"LATIN CAPITAL LETTER B"; - break; - case U'C': - nm = U"LATIN CAPITAL LETTER C"; - break; - case U'D': - nm = U"LATIN CAPITAL LETTER D"; - break; - case U'E': - nm = U"LATIN CAPITAL LETTER E"; - break; - case U'F': - nm = U"LATIN CAPITAL LETTER F"; - break; - case U'G': - nm = U"LATIN CAPITAL LETTER G"; - break; - case U'H': - nm = U"LATIN CAPITAL LETTER H"; - break; - case U'I': - nm = U"LATIN CAPITAL LETTER I"; - break; - case U'J': - nm = U"LATIN CAPITAL LETTER J"; - break; - case U'K': - nm = U"LATIN CAPITAL LETTER K"; - break; - case U'L': - nm = U"LATIN CAPITAL LETTER L"; - break; - case U'M': - nm = U"LATIN CAPITAL LETTER M"; - break; - case U'N': - nm = U"LATIN CAPITAL LETTER N"; - break; - case U'O': - nm = U"LATIN CAPITAL LETTER O"; - break; - case U'P': - nm = U"LATIN CAPITAL LETTER P"; - break; - case U'Q': - nm = U"LATIN CAPITAL LETTER Q"; - break; - case U'R': - nm = U"LATIN CAPITAL LETTER R"; - break; - case U'S': - nm = U"LATIN CAPITAL LETTER S"; - break; - case U'T': - nm = U"LATIN CAPITAL LETTER T"; - break; - case U'U': - nm = U"LATIN CAPITAL LETTER U"; - break; - case U'V': - nm = U"LATIN CAPITAL LETTER V"; - break; - case U'W': - nm = U"LATIN CAPITAL LETTER W"; - break; - case U'X': - nm = U"LATIN CAPITAL LETTER X"; - break; - case U'Y': - nm = U"LATIN CAPITAL LETTER Y"; - break; - case U'Z': - nm = U"LATIN CAPITAL LETTER Z"; - break; - case U'[': - nm = U"LEFT SQUARE BRACKET"; - break; - case U'\\': - nm = U"REVERSE SOLIDUS"; - break; - case U']': - nm = U"RIGHT SQUARE BRACKET"; - break; - case U'^': - nm = U"CIRCUMFLEX ACCENT"; - break; - case U'_': - nm = U"LOW LINE"; - break; - case U'\u0060': - nm = U"GRAVE ACCENT"; - break; - case U'a': - nm = U"LATIN SMALL LETTER A"; - break; - case U'b': - nm = U"LATIN SMALL LETTER B"; - break; - case U'c': - nm = U"LATIN SMALL LETTER C"; - break; - case U'd': - nm = U"LATIN SMALL LETTER D"; - break; - case U'e': - nm = U"LATIN SMALL LETTER E"; - break; - case U'f': - nm = U"LATIN SMALL LETTER F"; - break; - case U'g': - nm = U"LATIN SMALL LETTER G"; - break; - case U'h': - nm = U"LATIN SMALL LETTER H"; - break; - case U'i': - nm = U"LATIN SMALL LETTER I"; - break; - case U'j': - nm = U"LATIN SMALL LETTER J"; - break; - case U'k': - nm = U"LATIN SMALL LETTER K"; - break; - case U'l': - nm = U"LATIN SMALL LETTER L"; - break; - case U'm': - nm = U"LATIN SMALL LETTER M"; - break; - case U'n': - nm = U"LATIN SMALL LETTER N"; - break; - case U'o': - nm = U"LATIN SMALL LETTER O"; - break; - case U'p': - nm = U"LATIN SMALL LETTER P"; - break; - case U'q': - nm = U"LATIN SMALL LETTER Q"; - break; - case U'r': - nm = U"LATIN SMALL LETTER R"; - break; - case U's': - nm = U"LATIN SMALL LETTER S"; - break; - case U't': - nm = U"LATIN SMALL LETTER T"; - break; - case U'u': - nm = U"LATIN SMALL LETTER U"; - break; - case U'v': - nm = U"LATIN SMALL LETTER V"; - break; - case U'w': - nm = U"LATIN SMALL LETTER W"; - break; - case U'x': - nm = U"LATIN SMALL LETTER X"; - break; - case U'y': - nm = U"LATIN SMALL LETTER Y"; - break; - case U'z': - nm = U"LATIN SMALL LETTER Z"; - break; - case U'{': - nm = U"LEFT CURLY BRACKET"; - break; - case U'|': - nm = U"VERTICAL LINE"; - break; - case U'}': - nm = U"RIGHT CURLY BRACKET"; - break; - case U'~': - nm = U"TILDE"; - break; - case U'\x7F': - nm = U"DELETE"; - break; - /* LATIN-1 SUPPLEMENT: */ - case U'\x80': - nm = U"<CONTROL>"; - break; - case U'\x81': - nm = U"<CONTROL>"; - break; - case U'\x82': - nm = U"BREAK PERMITTED HERE"; - break; - case U'\x83': - nm = U"NO BREAK HERE"; - break; - case U'\x84': - nm = U"<CONTROL>"; - break; - case U'\x85': - nm = U"NEXT LINE"; - break; - case U'\x86': - nm = U"START OF SELECTED AREA"; - break; - case U'\x87': - nm = U"END OF SELECTED AREA"; - break; - case U'\x88': - nm = U"CHARACTER TABULATION SET"; - break; - case U'\x89': - nm = U"CHARACTER TABULATION WITH JUSTIFICATION"; - break; - case U'\x8A': - nm = U"LINE TABULATION SET"; - break; - case U'\x8B': - nm = U"PARTIAL LINE FORWARD"; - break; - case U'\x8C': - nm = U"PARTIAL LINE BACKWARD"; - break; - case U'\x8D': - nm = U"REVERSE LINE FEED"; - break; - case U'\x8E': - nm = U"SINGLE SHIFT TWO"; - break; - case U'\x8F': - nm = U"SINGLE SHIFT THREE"; - break; - case U'\x90': - nm = U"DEVICE CONTROL STRING"; - break; - case U'\x91': - nm = U"PRIVATE USE ONE"; - break; - case U'\x92': - nm = U"PRIVATE USE TWO"; - break; - case U'\x93': - nm = U"SET TRANSMIT STATE"; - break; - case U'\x94': - nm = U"CANCEL CHARACTER"; - break; - case U'\x95': - nm = U"MESSAGE WAITING"; - break; - case U'\x96': - nm = U"START OF GUARDED AREA"; - break; - case U'\x97': - nm = U"END OF GUARDED AREA"; - break; - case U'\x98': - nm = U"START OF STRING"; - break; - case U'\x99': - nm = U"<CONTROL>"; - break; - case U'\x9A': - nm = U"SINGLE CHARACTER INTRODUCER"; - break; - case U'\x9B': - nm = U"CONTROL SEQUENCE INTRODUCER"; - break; - case U'\x9C': - nm = U"STRING TERMINATOR"; - break; - case U'\x9D': - nm = U"OPERATING SYSTEM COMMAND"; - break; - case U'\x9E': - nm = U"PRIVACY MESSAGE"; - break; - case U'\x9F': - nm = U"APPLICATION PROGRAM COMMAND"; - break; - case U'\xA0': - nm = U"NO-BREAK SPACE"; - break; - case U'\u00A1': - nm = U"INVERTED EXCLAMATION MARK"; - break; - case U'\u00A2': - nm = U"CENT SIGN"; - break; - case U'\u00A3': - nm = U"POUND SIGN"; - break; - case U'\u00A4': - nm = U"CURRENCY SIGN"; - break; - case U'\u00A5': - nm = U"YEN SIGN"; - break; - case U'\u00A6': - nm = U"BROKEN BAR"; - break; - case U'\u00A7': - nm = U"SECTION SIGN"; - break; - case U'\u00A8': - nm = U"DIAERESIS"; - break; - case U'\u00A9': - nm = U"COPYRIGHT SIGN"; - break; - case U'\u00AA': - nm = U"FEMININE ORDINAL INDICATOR"; - break; - case U'\u00AB': - nm = U"LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"; - break; - case U'\u00AC': - nm = U"NOT SIGN"; - break; - case U'\u00AD': - nm = U"SOFT HYPHEN"; - break; - case U'\u00AE': - nm = U"REGISTERED SIGN"; - break; - case U'\u00AF': - nm = U"MACRON"; - break; - case U'\u00B0': - nm = U"DEGREE SIGN"; - break; - case U'\u00B1': - nm = U"PLUS MINUS SYMBOL"; - break; - case U'\u00B2': - nm = U"SUPERSCRIPT TWO"; - break; - case U'\u00B3': - nm = U"SUPERSCRIPT THREE"; - break; - case U'\u00B4': - nm = U"ACUTE ACCENT"; - break; - case U'\u00B5': - nm = U"MICRO SIGN"; - break; - case U'\u00B6': - nm = U"PILCROW SIGN"; - break; - case U'\u00B7': - nm = U"MIDDLE DOT"; - break; - case U'\u00B8': - nm = U"CEDILLA"; - break; - case U'\u00B9': - nm = U"SUPERSCRIPT ONE"; - break; - case U'\u00BA': - nm = U"MASCULINE ORDINAL INDICATOR"; - break; - case U'\u00BB': - nm = U"RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"; - break; - case U'\u00BC': - nm = U"VULGAR FRACTION ONE QUARTER"; - break; - case U'\u00BD': - nm = U"VULGAR FRACTION ONE HALF"; - break; - case U'\u00BE': - nm = U"VULGAR FRACTION THREE QUARTERS"; - break; - case U'\u00BF': - nm = U"INVERTED QUESTION MARK"; - break; - case U'\u00C0': - nm = U"LATIN CAPITAL LETTER A WITH GRAVE"; - break; - case U'\u00C1': - nm = U"LATIN CAPITAL LETTER A WITH ACUTE"; - break; - case U'\u00C2': - nm = U"LATIN CAPITAL LETTER A WITH CIRCUMFLEX"; - break; - case U'\u00C3': - nm = U"LATIN CAPITAL LETTER A WITH TILDE"; - break; - case U'\u00C4': - nm = U"LATIN CAPITAL LETTER A WITH DIAERESIS"; - break; - case U'\u00C5': - nm = U"LATIN CAPITAL LETTER A WITH RING ABOVE"; - break; - case U'\u00C6': - nm = U"LATIN CAPITAL LETTER AE"; - break; - case U'\u00C7': - nm = U"LATIN CAPITAL LETTER C WITH CEDILLA"; - break; - case U'\u00C8': - nm = U"LATIN CAPITAL LETTER E WITH GRAVE"; - break; - case U'\u00C9': - nm = U"LATIN CAPITAL LETTER E WITH ACUTE"; - break; - case U'\u00CA': - nm = U"LATIN CAPITAL LETTER E WITH CIRCUMFLEX"; - break; - case U'\u00CB': - nm = U"LATIN CAPITAL LETTER E WITH DIAERESIS"; - break; - case U'\u00CC': - nm = U"LATIN CAPITAL LETTER I WITH GRAVE"; - break; - case U'\u00CD': - nm = U"LATIN CAPITAL LETTER I WITH ACUTE"; - break; - case U'\u00CE': - nm = U"LATIN CAPITAL LETTER I WITH CIRCUMFLEX"; - break; - case U'\u00CF': - nm = U"LATIN CAPITAL LETTER I WITH DIAERESIS"; - break; - case U'\u00D0': - nm = U"LATIN CAPITAL LETTER ETH"; - break; - case U'\u00D1': - nm = U"LATIN CAPITAL LETTER N WITH TILDE"; - break; - case U'\u00D2': - nm = U"LATIN CAPITAL LETTER O WITH GRAVE"; - break; - case U'\u00D3': - nm = U"LATIN CAPITAL LETTER O WITH ACUTE"; - break; - case U'\u00D4': - nm = U"LATIN CAPITAL LETTER O WITH CIRCUMFLEX"; - break; - case U'\u00D5': - nm = U"LATIN CAPITAL LETTER O WITH TILDE"; - break; - case U'\u00D6': - nm = U"LATIN CAPITAL LETTER O WITH DIAERESIS"; - break; - case U'\u00D7': - nm = U"MULTIPLICATION SIGN"; - break; - case U'\u00D8': - nm = U"LATIN CAPITAL LETTER O WITH STROKE"; - break; - case U'\u00D9': - nm = U"LATIN CAPITAL LETTER U WITH GRAVE"; - break; - case U'\u00DA': - nm = U"LATIN CAPITAL LETTER U WITH ACUTE"; - break; - case U'\u00DB': - nm = U"LATIN CAPITAL LETTER U WITH CIRCUMFLEX"; - break; - case U'\u00DC': - nm = U"LATIN CAPITAL LETTER U WITH DIAERESIS"; - break; - case U'\u00DD': - nm = U"LATIN CAPITAL LETTER Y WITH ACUTE"; - break; - case U'\u00DE': - nm = U"LATIN CAPITAL LETTER THORN"; - break; - case U'\u00DF': - nm = U"LATIN SMALL LETTER SHARP S"; - break; - case U'\u00E0': - nm = U"LATIN SMALL LETTER A WITH GRAVE"; - break; - case U'\u00E1': - nm = U"LATIN SMALL LETTER A WITH ACUTE"; - break; - case U'\u00E2': - nm = U"LATIN SMALL LETTER A WITH CIRCUMFLEX"; - break; - case U'\u00E3': - nm = U"LATIN SMALL LETTER A WITH TILDE"; - break; - case U'\u00E4': - nm = U"LATIN SMALL LETTER A WITH DIAERESIS"; - break; - case U'\u00E5': - nm = U"LATIN SMALL LETTER A WITH RING ABOVE"; - break; - case U'\u00E6': - nm = U"LATIN SMALL LETTER AE"; - break; - case U'\u00E7': - nm = U"LATIN SMALL LETTER C WITH CEDILLA"; - break; - case U'\u00E8': - nm = U"LATIN SMALL LETTER E WITH GRAVE"; - break; - case U'\u00E9': - nm = U"LATIN SMALL LETTER E WITH ACUTE"; - break; - case U'\u00EA': - nm = U"LATIN SMALL LETTER E WITH CIRCUMFLEX"; - break; - case U'\u00EB': - nm = U"LATIN SMALL LETTER E WITH DIAERESIS"; - break; - case U'\u00EC': - nm = U"LATIN SMALL LETTER I WITH GRAVE"; - break; - case U'\u00ED': - nm = U"LATIN SMALL LETTER I WITH ACUTE"; - break; - case U'\u00EE': - nm = U"LATIN SMALL LETTER I WITH CIRCUMFLEX"; - break; - case U'\u00EF': - nm = U"LATIN SMALL LETTER I WITH DIAERESIS"; - break; - case U'\u00F0': - nm = U"LATIN SMALL LETTER ETH"; - break; - case U'\u00F1': - nm = U"LATIN SMALL LETTER N WITH TILDE"; - break; - case U'\u00F2': - nm = U"LATIN SMALL LETTER O WITH GRAVE"; - break; - case U'\u00F3': - nm = U"LATIN SMALL LETTER O WITH ACUTE"; - break; - case U'\u00F4': - nm = U"LATIN SMALL LETTER O WITH CIRCUMFLEX"; - break; - case U'\u00F5': - nm = U"LATIN SMALL LETTER O WITH TILDE"; - break; - case U'\u00F6': - nm = U"LATIN SMALL LETTER O WITH DIAERESIS"; - break; - case U'\u00F7': - nm = U"DIVISION SIGN"; - break; - case U'\u00F8': - nm = U"LATIN SMALL LETTER O WITH STROKE"; - break; - case U'\u00F9': - nm = U"LATIN SMALL LETTER U WITH GRAVE"; - break; - case U'\u00FA': - nm = U"LATIN SMALL LETTER U WITH ACUTE"; - break; - case U'\u00FB': - nm = U"LATIN SMALL LETTER U WITH CIRCUMFLEX"; - break; - case U'\u00FC': - nm = U"U WITH TWO DOTS"; - break; - case U'\u00FD': - nm = U"LATIN SMALL LETTER Y WITH ACUTE"; - break; - case U'\u00FE': - nm = U"LATIN SMALL LETTER THORN"; - break; - case U'\u00FF': - nm = U"LATIN SMALL LETTER Y WITH DIAERESIS"; - break; - /* LATIN EXTENDED-A: */ - case U'\u0100': - nm = U"LATIN CAPITAL LETTER A WITH MACRON"; - break; - case U'\u0101': - nm = U"LATIN SMALL LETTER A WITH MACRON"; - break; - case U'\u0102': - nm = U"LATIN CAPITAL LETTER A WITH BREVE"; - break; - case U'\u0103': - nm = U"LATIN SMALL LETTER A WITH BREVE"; - break; - case U'\u0104': - nm = U"LATIN CAPITAL LETTER A WITH OGONEK"; - break; - case U'\u0105': - nm = U"LATIN SMALL LETTER A WITH OGONEK"; - break; - case U'\u0106': - nm = U"LATIN CAPITAL LETTER C WITH ACUTE"; - break; - case U'\u0107': - nm = U"LATIN SMALL LETTER C WITH ACUTE"; - break; - case U'\u0108': - nm = U"LATIN CAPITAL LETTER C WITH CIRCUMFLEX"; - break; - case U'\u0109': - nm = U"LATIN SMALL LETTER C WITH CIRCUMFLEX"; - break; - case U'\u010A': - nm = U"LATIN CAPITAL LETTER C WITH DOT ABOVE"; - break; - case U'\u010B': - nm = U"LATIN SMALL LETTER C WITH DOT ABOVE"; - break; - case U'\u010C': - nm = U"LATIN CAPITAL LETTER C WITH CARON"; - break; - case U'\u010D': - nm = U"LATIN SMALL LETTER C WITH CARON"; - break; - case U'\u010E': - nm = U"LATIN CAPITAL LETTER D WITH CARON"; - break; - case U'\u010F': - nm = U"LATIN SMALL LETTER D WITH CARON"; - break; - case U'\u0110': - nm = U"LATIN CAPITAL LETTER D WITH STROKE"; - break; - case U'\u0120': - nm = U"LATIN CAPITAL LETTER G WITH DOT ABOVE"; - break; - case U'\u0130': - nm = U"LATIN CAPITAL LETTER I WITH DOT ABOVE"; - break; - case U'\u0140': - nm = U"LATIN SMALL LETTER L WITH MIDDLE DOT"; - break; - case U'\u0150': - nm = U"LATIN CAPITAL LETTER O WITH DOUBLE ACUTE"; - break; - case U'\u0160': - nm = U"LATIN CAPITAL LETTER S WITH CARON"; - break; - case U'\u0170': - nm = U"LATIN CAPITAL LETTER U WITH DOUBLE ACUTE"; - break; - /* LATIN EXTENDED-B: */ - case U'\u0180': - nm = U"LATIN SMALL LETTER B WITH STROKE"; - break; - case U'\u0190': - nm = U"LATIN CAPITAL LETTER OPEN E"; - break; - case U'\u01A0': - nm = U"LATIN CAPITAL LETTER O WITH HORN"; - break; - case U'\u01B0': - nm = U"LATIN SMALL LETTER U WITH HORN"; - break; - case U'\u01C0': - nm = U"LATIN LETTER DENTAL CLICK"; - break; - case U'\u01D0': - nm = U"LATIN SMALL LETTER I WITH CARON"; - break; - case U'\u01E0': - nm = U"LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON"; - break; - case U'\u01F0': - nm = U"LATIN SMALL LETTER J WITH CARON"; - break; - case U'\u0200': - nm = U"LATIN CAPITAL LETTER A WITH DOUBLE GRAVE"; - break; - case U'\u0210': - nm = U"LATIN CAPITAL LETTER R WITH DOUBLE GRAVE"; - break; - case U'\u0220': - nm = U"LATIN CAPITAL LETTER N WITH LONG RIGHT LEG"; - break; - case U'\u0230': - nm = U"LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON"; - break; - case U'\u0240': - nm = U"LATIN SMALL LETTER Z WITH SWASH TAIL"; - break; - /* IPA EXTENSIONS: */ - case U'\u0250': - nm = U"LATIN SMALL LETTER TURNED A"; - break; - case U'\u0251': - nm = U"LATIN SMALL LETTER ALPHA"; - break; - case U'\u0252': - nm = U"LATIN SMALL LETTER TURNED ALPHA"; - break; - case U'\u0253': - nm = U"LATIN SMALL LETTER B WITH HOOK"; - break; - case U'\u0254': - nm = U"LATIN SMALL LETTER OPEN O"; - break; - case U'\u0255': - nm = U"LATIN SMALL LETTER C WITH CURL"; - break; - case U'\u0256': - nm = U"LATIN SMALL LETTER D WITH TAIL"; - break; - case U'\u0257': - nm = U"LATIN SMALL LETTER D WITH HOOK"; - break; - case U'\u0258': - nm = U"LATIN SMALL LETTER REVERSED E"; - break; - case U'\u0259': - nm = U"LATIN SMALL LETTER SCHWA"; - break; - case U'\u025A': - nm = U"LATIN SMALL LETTER SCHWA WITH HOOK"; - break; - case U'\u025B': - nm = U"LATIN SMALL LETTER OPEN E"; - break; - case U'\u025C': - nm = U"LATIN SMALL LETTER REVERSED OPEN E"; - break; - case U'\u025D': - nm = U"LATIN SMALL LETTER REVERSED OPEN E WITH HOOK"; - break; - case U'\u025E': - nm = U"LATIN SMALL LETTER CLOSED REVERSED OPEN E"; - break; - case U'\u025F': - nm = U"LATIN SMALL LETTER DOTLESS J WITH STROKE"; - break; - case U'\u0260': - nm = U"LATIN SMALL LETTER G WITH HOOK"; - break; - case U'\u0261': - nm = U"LATIN SMALL LETTER SCRIPT G"; - break; - case U'\u0262': - nm = U"LATIN LETTER SMALL CAPITAL G"; - break; - case U'\u0263': - nm = U"LATIN SMALL LETTER GAMMA"; - break; - case U'\u0264': - nm = U"LATIN SMALL LETTER RAMS HORN"; - break; - case U'\u0265': - nm = U"LATIN SMALL LETTER TURNED H"; - break; - case U'\u0266': - nm = U"LATIN SMALL LETTER H WITH HOOK"; - break; - case U'\u0267': - nm = U"LATIN SMALL LETTER HENG WITH HOOK"; - break; - case U'\u0268': - nm = U"LATIN SMALL LETTER I WITH STROKE"; - break; - case U'\u0269': - nm = U"LATIN SMALL LETTER IOTA"; - break; - case U'\u026A': - nm = U"LATIN LETTER SMALL CAPITAL I"; - break; - case U'\u026B': - nm = U"LATIN SMALL LETTER L WITH MIDDLE TILDE"; - break; - case U'\u026C': - nm = U"LATIN SMALL LETTER L WITH BELT"; - break; - case U'\u026D': - nm = U"LATIN SMALL LETTER L WITH RETROFLEX HOOK"; - break; - case U'\u026E': - nm = U"LATIN SMALL LETTER LEZH"; - break; - case U'\u026F': - nm = U"LATIN SMALL LETTER TURNED M"; - break; - case U'\u0270': - nm = U"LATIN SMALL LETTER TURNED M WITH LONG LEG"; - break; - case U'\u0271': - nm = U"LATIN SMALL LETTER M WITH HOOK"; - break; - case U'\u0272': - nm = U"LATIN SMALL LETTER N WITH LEFT HOOK"; - break; - case U'\u0273': - nm = U"LATIN SMALL LETTER N WITH RETROFLEX HOOK"; - break; - case U'\u0274': - nm = U"LATIN LETTER SMALL CAPITAL N"; - break; - case U'\u0275': - nm = U"LATIN SMALL LETTER BARRED O"; - break; - case U'\u0276': - nm = U"LATIN LETTER SMALL CAPITAL OE"; - break; - case U'\u0277': - nm = U"LATIN SMALL LETTER CLOSED OMEGA"; - break; - case U'\u0278': - nm = U"LATIN SMALL LETTER PHI"; - break; - case U'\u0279': - nm = U"LATIN SMALL LETTER TURNED R"; - break; - case U'\u027A': - nm = U"LATIN SMALL LETTER TURNED R WITH LONG LEG"; - break; - case U'\u027B': - nm = U"LATIN SMALL LETTER TURNED R WITH HOOK"; - break; - case U'\u027C': - nm = U"LATIN SMALL LETTER R WITH LONG LEG"; - break; - case U'\u027D': - nm = U"LATIN SMALL LETTER R WITH TAIL"; - break; - case U'\u027E': - nm = U"LATIN SMALL LETTER R WITH FISHHOOK"; - break; - case U'\u027F': - nm = U"LATIN SMALL LETTER REVERSED R WITH FISHHOOK"; - break; - case U'\u0280': - nm = U"LATIN LETTER SMALL CAPITAL R"; - break; - /* GREEK AND COPTIC: */ - case U'\u0370': - nm = U"GREEK CAPITAL LETTER HETA"; - break; - case U'\u0371': - nm = U"GREEK SMALL LETTER HETA"; - break; - case U'\u0372': - nm = U"GREEK CAPITAL LETTER ARCHAIC SAMPI"; - break; - case U'\u0373': - nm = U"GREEK SMALL LETTER ARCHAIC SAMPI"; - break; - case U'\u0374': - nm = U"GREEK NUMERAL SIGN"; - break; - case U'\u0375': - nm = U"GREEK LOWER NUMERAL SIGN"; - break; - case U'\u0376': - nm = U"GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA"; - break; - case U'\u0377': - nm = U"GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"; - break; - case U'\u037A': - nm = U"GREEK YPOGEGRAMMENI"; - break; - case U'\u037B': - nm = U"GREEK SMALL REVERSED LUNATE SIGMA SYMBOL"; - break; - case U'\u037C': - nm = U"GREEK SMALL DOTTED LUNATE SIGMA SYMBOL"; - break; - case U'\u037D': - nm = U"GREEK SMALL REVERSED DOTTED LUNATE SIGMAL SYMBOL"; - break; - case U'\u037E': - nm = U"GREEK QUESTION MARK"; - break; - case U'\u037F': - nm = U"GREEK CAPITAL LETTER YOT"; - break; - case U'\u0384': - nm = U"GREEK TONOS"; - break; - case U'\u0385': - nm = U"GREEK DIALYTIKA TONOS"; - break; - case U'\u0386': - nm = U"GREEK CAPITAL LETTER ALPHA WITH TONOS"; - break; - case U'\u0387': - nm = U"GREEK ANO TELEIA"; - break; - case U'\u0388': - nm = U"GREEK CAPITAL LETTER EPSILON WITH TONOS"; - break; - case U'\u0389': - nm = U"GREEK CAPITAL LETTER ETA WITH TONOS"; - break; - case U'\u038A': - nm = U"GREEK CAPITAL LETTER IOTA WITH TONOS"; - break; - case U'\u038C': - nm = U"GREEK CAPITAL LETTER OMICRON WITH TONOS"; - break; - case U'\u038E': - nm = U"GREEK CAPITAL LETTER USPILON WITH TONOS"; - break; - case U'\u038F': - nm = U"GREEK CAPITAL LETTER OMEGA WITH TONOS"; - break; - case U'\u0390': - nm = U"GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"; - break; - case U'\u0391': - nm = U"GREEK CAPITAL LETTER ALPHA"; - break; - case U'\u0392': - nm = U"GREEK CAPITAL LETTER BETA"; - break; - case U'\u0393': - nm = U"GREEK CAPITAL LETTER GAMMA"; - break; - case U'\u0394': - nm = U"GREEK CAPITAL LETTER DELTA"; - break; - case U'\u0395': - nm = U"GREEK CAPITAL LETTER EPSILON"; - break; - case U'\u0396': - nm = U"GREEK CAPITAL LETTER ZETA"; - break; - case U'\u0397': - nm = U"GREEK CAPITAL LETTER ETA"; - break; - case U'\u0398': - nm = U"GREEK CAPITAL LETTER THETA"; - break; - case U'\u0399': - nm = U"GREEK CAPITAL LETTER IOTA"; - break; - case U'\u039A': - nm = U"GREEK CAPITAL LETTER KAPPA"; - break; - case U'\u039B': - nm = U"GREEK CAPITAL LETTER LAMBDA"; - break; - case U'\u039C': - nm = U"GREEK CAPITAL LETTER MU"; - break; - case U'\u039D': - nm = U"GREEK CAPITAL LETTER NU"; - break; - case U'\u039E': - nm = U"GREEK CAPITAL LETTER XI"; - break; - case U'\u039F': - nm = U"GREEK CAPITAL LETTER OMICRON"; - break; - case U'\u03A0': - nm = U"GREEK CAPITAL LETTER PI"; - break; - case U'\u03A1': - nm = U"GREEK CAPITAL LETTER RHO"; - break; - case U'\u03A3': - nm = U"GREEK CAPITAL LETTER SIGMA"; - break; - case U'\u03A4': - nm = U"GREEK CAPITAL LETTER TAU"; - break; - case U'\u03A5': - nm = U"GREEK CAPITAL LETTER UPSILON"; - break; - case U'\u03A6': - nm = U"GREEK CAPITAL LETTER PHI"; - break; - case U'\u03A7': - nm = U"GREEK CAPITAL LETTER CHI"; - break; - case U'\u03A8': - nm = U"GREEK CAPITAL LETTER PSI"; - break; - case U'\u03A9': - nm = U"GREEK CAPITAL LETTER OMEGA"; - break; - case U'\u03AA': - nm = U"GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"; - break; - case U'\u03AB': - nm = U"GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"; - break; - case U'\u03AC': - nm = U"GREEK SMALL LETTER ALPHA WITH TONOS"; - break; - case U'\u03AD': - nm = U"GREEK SMALL LETTER EPSILON WITH TONOS"; - break; - case U'\u03AE': - nm = U"GREEK SMALL LETTER ETA WITH TONOS"; - break; - case U'\u03AF': - nm = U"GREEK SMALL LETTER IOTA WITH TONOS"; - break; - case U'\u03B0': - nm = U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"; - break; - case U'\u03B1': - nm = U"GREEK SMALL LETTER ALPHA"; - break; - case U'\u03B2': - nm = U"GREEK SMALL LETTER BETA"; - break; - case U'\u03B3': - nm = U"GREEK SMALL LETTER GAMMA"; - break; - case U'\u03B4': - nm = U"GREEK SMALL LETTER DELTA"; - break; - case U'\u03B5': - nm = U"GREEK SMALL LETTER EPSILON"; - break; - case U'\u03B6': - nm = U"GREEK SMALL LETTER ZETA"; - break; - case U'\u03B7': - nm = U"GREEK SMALL LETTER ETA"; - break; - case U'\u03B8': - nm = U"GREEK SMALL LETTER THETA"; - break; - case U'\u03B9': - nm = U"GREEK SMALL LETTER IOTA"; - break; - case U'\u03BA': - nm = U"GREEK SMALL LETTER KAPPA"; - break; - case U'\u03BB': - nm = U"GREEK SMALL LETTER LAMBDA"; - break; - case U'\u03BC': - nm = U"GREEK SMALL LETTER MU"; - break; - case U'\u03BD': - nm = U"GREEK SMALL LETTER NU"; - break; - case U'\u03BE': - nm = U"GREEK SMALL LETTER XI"; - break; - case U'\u03BF': - nm = U"GREEK SMALL LETTER OMICRON"; - break; - case U'\u03C0': - nm = U"GREEK SMALL LETTER PI"; - break; - case U'\u03C1': - nm = U"GREEK SMALL LETTER RHO"; - break; - case U'\u03C2': - nm = U"GREEK SMALL LETTER FINAL SIGMA"; - break; - case U'\u03C3': - nm = U"GREEK SMALL LETTER SIGMA"; - break; - case U'\u03C4': - nm = U"GREEK SMALL LETTER TAU"; - break; - case U'\u03C5': - nm = U"GREEK SMALL LETTER UPSILON"; - break; - case U'\u03C6': - nm = U"GREEK SMALL LETTER PHI"; - break; - case U'\u03C7': - nm = U"GREEK SMALL LETTER CHI"; - break; - case U'\u03C8': - nm = U"GREEK SMALL LETTER PSI"; - break; - case U'\u03C9': - nm = U"GREEK SMALL LETTER OMEGA"; - break; - case U'\u03CA': - nm = U"GREEK SMALL LETTER IOTA WITH DIALYTIKA"; - break; - case U'\u03CB': - nm = U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA"; - break; - case U'\u03CC': - nm = U"GREEK SMALL LETTER OMICRON WITH TONOS"; - break; - case U'\u03CD': - nm = U"GREEK SMALL LETTER UPSILON WITH TONOS"; - break; - case U'\u03CE': - nm = U"GREEK SMALL LETTER OMEGA WITH TONOS"; - break; - case U'\u03CF': - nm = U"GREEK CAPITAL KAI SYMBOL"; - break; - case U'\u03D0': - nm = U"GREEK BETA SYMBOL"; - break; - case U'\u03D1': - nm = U"GREEK THETA SYMBOL"; - break; - case U'\u03D2': - nm = U"GREEK UPSILON WITH HOOK SYMBOL"; - break; - case U'\u03D3': - nm = U"GREEK UPSILON WITH ACUTE AND HOOK SYMBOL"; - break; - case U'\u03D4': - nm = U"GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL"; - break; - case U'\u03D5': - nm = U"GREEK PHI SYMBOL"; - break; - case U'\u03D6': - nm = U"GREEK PI SYMBOL"; - break; - case U'\u03D7': - nm = U"GREEK KAI SYMBOL"; - break; - case U'\u03D8': - nm = U"GREEK LETTER ARCHAIC KOPPA"; - break; - case U'\u03D9': - nm = U"GREEK SMALL LETTER ARCHAIC KOPPA"; - break; - case U'\u03DA': - nm = U"GREEK LETTER STIGMA"; - break; - case U'\u03DB': - nm = U"GREEK SMALL LETTER STIGMA"; - break; - case U'\u03DC': - nm = U"GREEK LETTER DIGAMMA"; - break; - case U'\u03DD': - nm = U"GREEK SMALL LETTER DIGAMMA"; - break; - case U'\u03DE': - nm = U"GREEK LETTER KOPPA"; - break; - case U'\u03DF': - nm = U"GREEK SMALL LETTER KOPPA"; - break; - case U'\u03E0': - nm = U"GREEK LETTER SAMPI"; - break; - case U'\u03F0': - nm = U"GREEK KAPPA SYMBOL"; - break; - /* HEBREW: */ - case U'\u05D0': - nm = U"HEBREW LETTER ALEF"; - break; - case U'\u05D1': - nm = U"HEBREW LETTER BET"; - break; - case U'\u05D2': - nm = U"HEBREW LETTER GIMEL"; - break; - case U'\u05D3': - nm = U"HEBREW LETTER DALET"; - break; - case U'\u05D4': - nm = U"HEBREW LETTER HE"; - break; - case U'\u05D5': - nm = U"HEBREW LETTER VAV"; - break; - case U'\u05D6': - nm = U"HEBREW LETTER ZAYIN"; - break; - case U'\u05D7': - nm = U"HEBREW LETTER HET"; - break; - case U'\u05D8': - nm = U"HEBREW LETTER TET"; - break; - case U'\u05D9': - nm = U"HEBREW LETTER YOD"; - break; - case U'\u05DA': - nm = U"HEBREW LETTER FINAL KAF"; - break; - case U'\u05DB': - nm = U"HEBREW LETTER KAF"; - break; - case U'\u05DC': - nm = U"HEBREW LETTER LAMED"; - break; - case U'\u05DD': - nm = U"HEBREW LETTER FINAL MEM"; - break; - case U'\u05DE': - nm = U"HEBREW LETTER MEM"; - break; - case U'\u05DF': - nm = U"HEBREW LETTER FINAL NUN"; - break; - case U'\u05E0': - nm = U"HEBREW LETTER NUN"; - break; - case U'\u05E1': - nm = U"HEBREW LETTER SAMEKH"; - break; - case U'\u05E2': - nm = U"HEBREW LETTER AYIN"; - break; - case U'\u05E3': - nm = U"HEBREW LETTER FINAL PE"; - break; - case U'\u05E4': - nm = U"HEBREW LETTER PE"; - break; - case U'\u05E5': - nm = U"HEBREW LETTER FINAL TSADI"; - break; - case U'\u05E6': - nm = U"HEBREW LETTER TSADI"; - break; - case U'\u05E7': - nm = U"HEBREW LETTER QOF"; - break; - case U'\u05E8': - nm = U"HEBREW LETTER RESH"; - break; - case U'\u05E9': - nm = U"HEBREW LETTER SHIN"; - break; - case U'\u05EA': - nm = U"HEBREW LETTER TAV"; - break; - case U'\u05EF': - nm = U"HEBREW YOD TRIANGLE"; - break; - /* CYRILLIC: */ - case U'\u0400': - nm = U"CYRILLIC CAPITAL LETTER LE WITH GRAVE"; - break; - case U'\u0401': - nm = U"CYRILLIC CAPITAL LETTER LO"; - break; - case U'\u0402': - nm = U"CYRILLIC CAPITAL LETTER DJE"; - break; - case U'\u0403': - nm = U"CYRILLIC CAPITAL LETTER GJE"; - break; - case U'\u0404': - nm = U"CYRILLIC CAPITAL LETTER UKRAINIAN LE"; - break; - case U'\u0405': - nm = U"CYRILLIC CAPITAL LETTER DZE"; - break; - case U'\u0406': - nm = U"CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I"; - break; - case U'\u0407': - nm = U"CYRILLIC CAPITAL LETTER YI"; - break; - case U'\u0408': - nm = U"CYRILLIC CAPITAL LETTER JE"; - break; - case U'\u0409': - nm = U"CYRILLIC CAPITAL LETTER LJE"; - break; - case U'\u040A': - nm = U"CYRILLIC CAPITAL LETTER NJE"; - break; - case U'\u040B': - nm = U"CYRILLIC CAPITAL LETTER TSHE"; - break; - case U'\u040C': - nm = U"CYRILLIC CAPITAL LETTER KJE"; - break; - case U'\u040D': - nm = U"CYRILLIC CAPITAL LETTER I WITH GRAVE"; - break; - case U'\u040E': - nm = U"CYRILLIC CAPITAL LETTER SHORT U"; - break; - case U'\u040F': - nm = U"CYRILLIC CAPITAL LETTER DZHE"; - break; - case U'\u0410': - nm = U"CYRILLIC CAPITAL LETTER A"; - break; - case U'\u0420': - nm = U"CYRILLIC CAPITAL LETTER ER"; - break; - case U'\u0430': - nm = U"CYRILLIC SMALL LETTER A"; - break; - case U'\u0440': - nm = U"CYRILLIC SMALL LETTER ER"; - break; - case U'\u0450': - nm = U"CYRILLIC SMALL LETTER LE WITH GRAVE"; - break; - case U'\u0460': - nm = U"CYRILLIC CAPITAL LETTER OMEGA"; - break; - case U'\u0470': - nm = U"CYRILLIC CAPITAL LETTER PSI"; - break; - case U'\u0480': - nm = U"CYRILLIC CAPITAL LETTER KOPPA"; - break; - case U'\u0490': - nm = U"CYRILLIC CAPITAL LETTER GHE WITH UPTURN"; - break; - case U'\u04A0': - nm = U"CYRILLIC CAPITAL LETTER BASHKIR KA"; - break; - case U'\u04B0': - nm = U"CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE"; - break; - case U'\u04C0': - nm = U"CYRILLIC LETTER PALOCHKA"; - break; - case U'\u04D0': - nm = U"CYRILLIC CAPITAL LETTER A WITH BREVE"; - break; - case U'\u04E0': - nm = U"CYRILLIC CAPITAL LETTER ABKHASIAN DZE"; - break; - case U'\u04F0': - nm = U"CYRILLIC CAPITAL LETTER U WITH DIAERESIS"; - break; - /* SYRIAC SUPPLEMENT: */ - case U'\u0860': - nm = U"SYRIAC LETTER MALAYALAM NGA"; - break; - case U'\u0861': - nm = U"SYRIAC LETTER MALAYALAM JA"; - break; - case U'\u0862': - nm = U"SYRIAC LETTER MALAYALAM NYA"; - break; - case U'\u0863': - nm = U"SYRIAC LETTER MALAYALAM TTA"; - break; - case U'\u0864': - nm = U"SYRIAC LETTER MALAYALAM NNA"; - break; - case U'\u0865': - nm = U"SYRIAC LETTER MALAYALAM NNNA"; - break; - case U'\u0866': - nm = U"SYRIAC LETTER MALAYALAM BHA"; - break; - case U'\u0867': - nm = U"SYRIAC LETTER MALAYALAM RA"; - break; - case U'\u0868': - nm = U"SYRIAC LETTER MALAYALAM LLA"; - break; - case U'\u0869': - nm = U"SYRIAC LETTER MALAYALAM LLLA"; - break; - case U'\u086A': - nm = U"SYRIAC LETTER MALAYALAM SSA"; - break; - /* RUNIC: */ - case U'\u16A0': - nm = U"RUNIC LETTER FEHU FEOH FE F"; - break; - case U'\u16A1': - nm = U"RUNIC LETTER V"; - break; - case U'\u16A2': - nm = U"RUNIC LETTER URUZ UR U"; - break; - case U'\u16A3': - nm = U"RUNIC LETTER YR"; - break; - case U'\u16A4': - nm = U"RUNIC LETTER Y"; - break; - case U'\u16A5': - nm = U"RUNIC LETTER W"; - break; - case U'\u16A6': - nm = U"RUNIC LETTER THURISAZ THURS THORN"; - break; - case U'\u16A7': - nm = U"RUNIC LETTER ETH"; - break; - case U'\u16A8': - nm = U"RUNIC LETTER ANSUZ A"; - break; - case U'\u16A9': - nm = U"RUNIC LETTER OS O"; - break; - case U'\u16AA': - nm = U"RUNIC LETTER AC A"; - break; - case U'\u16AB': - nm = U"RUNIC LETTER AESC"; - break; - case U'\u16AC': - nm = U"RUNIC LETTER LONG-BRANCHED-OSS O"; - break; - case U'\u16AD': - nm = U"RUNIC LETTER SHORT-TWIG-OSS O"; - break; - case U'\u16AE': - nm = U"RUNIC LETTER O"; - break; - case U'\u16AF': - nm = U"RUNIC LETTER OE"; - break; - case U'\u16B0': - nm = U"RUNIC LETTER ON"; - break; - case U'\u16C0': - nm = U"RUNIC LETTER DOTTED-N"; - break; - case U'\u16D0': - nm = U"RUNIC LETTER SHORT-TWIG-TYR T"; - break; - case U'\u16E0': - nm = U"RUNIC LETTER EAR"; - break; - case U'\u16F0': - nm = U"RUNIC BELGTHOR SYMBOL"; - break; - /* CYRILLIC EXTENDED C: */ - case U'\u1C80': - nm = U"CYRILLIC SMALL LETTER ROUNDED VE"; - break; - case U'\u1C81': - nm = U"CYRILLIC SMALL LETTER LONG-LEGGED DE"; - break; - case U'\u1C82': - nm = U"CYRILLIC SMALL LETTER NARROW O"; - break; - case U'\u1C83': - nm = U"CYRILLIC SMALL LETTER WIDE ES"; - break; - case U'\u1C84': - nm = U"CYRILLIC SMALL LETTER TALL TE"; - break; - case U'\u1C85': - nm = U"CYRILLIC SMALL LETTER THREE-LEGGED TE"; - break; - case U'\u1C86': - nm = U"CYRILLIC SMALL LETTER TALL HARD SIGN"; - break; - case U'\u1C87': - nm = U"CYRILLIC SMALL LETTER TALL YAT"; - break; - case U'\u1C88': - nm = U"CYRILLIC SMALL LETTER UNBLENDED UK"; - break; - /* GENERAL PUNCTUATION: */ - case U'\u2000': - nm = U"EN QUAD"; - break; - case U'\u2001': - nm = U"EM QUAD"; - break; - case U'\u2002': - nm = U"EN SPACE"; - break; - case U'\u2003': - nm = U"EM SPACE"; - break; - case U'\u2004': - nm = U"THREE-PER-EM SPACE"; - break; - case U'\u2005': - nm = U"FOUR-PER-EM SPACE"; - break; - case U'\u2006': - nm = U"SIX-PER-EM SPACE"; - break; - case U'\u2007': - nm = U"FIGURE SPACE"; - break; - case U'\u2008': - nm = U"PUNCTUATION SPACE"; - break; - case U'\u2009': - nm = U"THIN SPACE"; - break; - case U'\u200A': - nm = U"HAIR SPACE"; - break; - case U'\u203C': - nm = U"DOUBLE EXCLAMATION MARK"; - break; - case U'\u2047': - nm = U"DOUBLE QUOTATION MARK"; - break; - case U'\u2048': - nm = U"QUESTION EXCLAMATION MARK"; - break; - case U'\u2049': - nm = U"EXCLAMATION QUESTION MARK"; - break; - /* CURRENCY SYMBOLS: */ - case U'\u20A0': - nm = U"EURO-CURRENCY SIGN"; - break; - case U'\u20A1': - nm = U"COLON SIGN"; - break; - case U'\u20A2': - nm = U"CRUZEIRO SIGN"; - break; - case U'\u20A3': - nm = U"FRENCH FRANC SIGN"; - break; - case U'\u20A4': - nm = U"LIRA SIGN"; - break; - case U'\u20A5': - nm = U"MILL SIGN"; - break; - case U'\u20A6': - nm = U"NAIRA SIGN"; - break; - case U'\u20A7': - nm = U"PESETA SIGN"; - break; - case U'\u20A8': - nm = U"RUPEE SIGN"; - break; - case U'\u20A9': - nm = U"WON SIGN"; - break; - case U'\u20AA': - nm = U"NEW SHEQEL SIGN"; - break; - case U'\u20AB': - nm = U"DONG SIGN"; - break; - case U'\u20AC': - nm = U"EURO SIGN"; - break; - case U'\u20AD': - nm = U"KIP SIGN"; - break; - case U'\u20AE': - nm = U"TUGRIK SIGN"; - break; - case U'\u20AF': - nm = U"DRACHMA SIGN"; - break; - case U'\u20B0': - nm = U"GERMAN PENNY SIGN"; - break; - case U'\u20B1': - nm = U"PESO SIGN"; - break; - case U'\u20B2': - nm = U"GUARANI SIGN"; - break; - case U'\u20B3': - nm = U"AUSTRAL SIGN"; - break; - case U'\u20B4': - nm = U"HRYVNIA SIGN"; - break; - case U'\u20B5': - nm = U"CEDI SIGN"; - break; - case U'\u20B6': - nm = U"LIVRE TOURNOIS SIGN"; - break; - case U'\u20B7': - nm = U"SPESMILO SIGN"; - break; - case U'\u20B8': - nm = U"TENGE SIGN"; - break; - case U'\u20BA': - nm = U"TURKISH LIRA SIGN"; - break; - case U'\u20BB': - nm = U"NORDIC MARK SIGN"; - break; - case U'\u20BC': - nm = U"MANAT SIGN"; - break; - case U'\u20BD': - nm = U"RUBLE SYMBOL"; - break; - case U'\u20BE': - nm = U"LARI SIGN"; - break; - case U'\u20BF': - nm = U"BITCOIN SIGN"; - break; - /* LETTERLIKE SYMBOLS: */ - case U'\u2100': - nm = U"ACCOUNT OF"; - break; - case U'\u2101': - nm = U"ADRESSED TO THE SUBJECT"; - break; - case U'\u2102': - nm = U"DOUBLE-STRUCK CAPITAL C"; - break; - case U'\u2103': - nm = U"DEGREE CELSIUS"; - break; - case U'\u2104': - nm = U"CENTRE LINE SYMBOL"; - break; - case U'\u2105': - nm = U"CARE OF"; - break; - case U'\u2106': - nm = U"CADA UNA"; - break; - case U'\u2107': - nm = U"EULER CONSTANT"; - break; - case U'\u2108': - nm = U"SCRUPLE"; - break; - case U'\u2109': - nm = U"DEGREE FAHRENHEIT"; - break; - case U'\u210A': - nm = U"SCRIPT SMALL G"; - break; - case U'\u210B': - nm = U"SCRIPT CAPITAL H"; - break; - case U'\u210C': - nm = U"BLACK-LETTER CAPITAL H"; - break; - case U'\u210D': - nm = U"DOUBLE-STRUCK CAPITAL H"; - break; - case U'\u210E': - nm = U"PLANCK CONSTANT"; - break; - case U'\u210F': - nm = U"PLANCK CONSTANT OVER TWO PI"; - break; - case U'\u2110': - nm = U"SCRIPT CAPITAL I"; - break; - case U'\u2111': - nm = U"BLACK-LETTER CAPITAL I"; - break; - case U'\u2112': - nm = U"SCRIPT CAPITAL L"; - break; - case U'\u2113': - nm = U"SCRIPT SMALL L"; - break; - case U'\u2114': - nm = U"L B BAR SYMBOL"; - break; - case U'\u2115': - nm = U"DOUBLE-STRUCK CAPITAL N"; - break; - case U'\u2116': - nm = U"NUMERO SIGN"; - break; - case U'\u2117': - nm = U"SOUND RECORDING COPYRIGHT"; - break; - case U'\u2118': - nm = U"SCRIPT CAPITAL P"; - break; - case U'\u2119': - nm = U"DOUBLE-STRUCK CAPITAL P"; - break; - case U'\u211A': - nm = U"DOUBLE-STRUCK CAPITAL Q"; - break; - case U'\u211B': - nm = U"SCRIPT CAPITAL R"; - break; - case U'\u211C': - nm = U"BLACK-LETTER CAPITAL R"; - break; - case U'\u211D': - nm = U"DOUBLE-STRUCK CAPITAL R"; - break; - case U'\u211E': - nm = U"PRESCRIPTION TAKE"; - break; - case U'\u211F': - nm = U"RESPONSE"; - break; - case U'\u2120': - nm = U"SERVICE MARK"; - break; - case U'\u2121': - nm = U"TELEPHONE SIGN"; - break; - case U'\u2122': - nm = U"TRADE MARK SIGN"; - break; - case U'\u2123': - nm = U"VERSICLE"; - break; - case U'\u2124': - nm = U"DOUBLE-STRUCK CAPITAL Z"; - break; - case U'\u2125': - nm = U"OUNCE SIGN"; - break; - case U'\u2126': - nm = U"OHM SIGN"; - break; - case U'\u2127': - nm = U"INVERTED OHM SIGN"; - break; - case U'\u2128': - nm = U"BLACK-LETTER CAPITAL Z"; - break; - case U'\u2129': - nm = U"TURNED GREEK SMALL LETTER IOTA"; - break; - case U'\u212A': - nm = U"KELVIN SIGN"; - break; - case U'\u212B': - nm = U"ANGSTROM SIGN"; - break; - case U'\u212C': - nm = U"SCRIPT CAPITAL B"; - break; - case U'\u212D': - nm = U"BLACK-LETTER CAPITAL C"; - break; - case U'\u212E': - nm = U"ESTIMATED SYMBOL"; - break; - case U'\u212F': - nm = U"SCRIPT SMALL E"; - break; - case U'\u2130': - nm = U"SCRIPT CAPITAL E"; - break; - case U'\u2131': - nm = U"SCRIPT CAPITAL F"; - break; - case U'\u2132': - nm = U"TURNED CAPITAL F"; - break; - case U'\u2133': - nm = U"SCRIPT CAPITAL M"; - break; - case U'\u2134': - nm = U"SCRIPT SMALL O"; - break; - case U'\u2135': - nm = U"ALEF SYMBOL"; - break; - case U'\u2136': - nm = U"BET SYMBOL"; - break; - case U'\u2137': - nm = U"GIMEL SYMBOL"; - break; - case U'\u2138': - nm = U"DALET SYMBOL"; - break; - case U'\u2139': - nm = U"INFORMATION SOURCE"; - break; - case U'\u213A': - nm = U"ROTATED CAPITAL Q"; - break; - case U'\u213B': - nm = U"FACSIMILE SIGN"; - break; - case U'\u213C': - nm = U"DOUBLE-STRUCK SMALL PI"; - break; - case U'\u213D': - nm = U"DOUBLE-STRUCK SMALL GAMMA"; - break; - case U'\u213E': - nm = U"DOUBLE-STRUCK CAPITAL GAMMA"; - break; - case U'\u213F': - nm = U"DOUBLE-STRUCK CAPITAL PI"; - break; - case U'\u2140': - nm = U"DOUBLE-STRUCK N-ARY SUMMATION"; - break; - case U'\u2141': - nm = U"TURNED SANS-SERIF CAPITAL G"; - break; - case U'\u2142': - nm = U"TURNED SANS-SERIF CAPITAL L"; - break; - case U'\u2143': - nm = U"REVERSED SANS-SERIF CAPITAL L"; - break; - case U'\u2144': - nm = U"TURNED SANS-SERIF CAPITAL Y"; - break; - case U'\u2145': - nm = U"DOUBLE-STRUCK ITALIC CAPITAL D"; - break; - case U'\u2146': - nm = U"DOUBLE-STRUCK ITALIC SMALL D"; - break; - case U'\u2147': - nm = U"DOUBLE-STRUCK ITALIC SMALL E"; - break; - case U'\u2148': - nm = U"DOUBLE-STRUCK ITALIC SMALL I"; - break; - case U'\u2149': - nm = U"DOUBLE-STRUCK ITALIC SMALL J"; - break; - case U'\u214A': - nm = U"PROPERTY LINE"; - break; - case U'\u214B': - nm = U"TURNED AMPERSAND"; - break; - case U'\u214C': - nm = U"PER SIGN"; - break; - case U'\u214D': - nm = U"AKTIESELSKAB"; - break; - case U'\u214E': - nm = U"TURNED SMALL F"; - break; - case U'\u214F': - nm = U"SYMBOL FOR SAMARITAN SOURCE"; - break; - /* NUMBER FORMS: */ - case U'\u2150': - nm = U"VULGAR FRACTION ONE SEVENTH"; - break; - case U'\u2151': - nm = U"VULGAR FRACTION ONE NINTH"; - break; - case U'\u2152': - nm = U"VULGAR FRACTION ONE TENTH"; - break; - case U'\u2153': - nm = U"VULGAR FRACTION ONE THIRD"; - break; - case U'\u2154': - nm = U"VULGAR FRACTION TWO THIRDS"; - break; - case U'\u2155': - nm = U"VULGAR FRACTION ONE FIFTH"; - break; - case U'\u2156': - nm = U"VULGAR FRACTION TWO FIFTHS"; - break; - case U'\u2157': - nm = U"VULGAR FRACTION THREE FIFTHS"; - break; - case U'\u2158': - nm = U"VULGAR FRACTION FOUR FIFTHS"; - break; - case U'\u2159': - nm = U"VULGAR FRACTION ONE SIXTH"; - break; - case U'\u215A': - nm = U"VULGAR FRACTION FIVE SIXTHS"; - break; - case U'\u215B': - nm = U"VULGAR FRACTION ONE EIGTH"; - break; - case U'\u215C': - nm = U"VULGAR FRACTION THREE EIGTHS"; - break; - case U'\u215D': - nm = U"VULGAR FRACTION FIVE EIGHTS"; - break; - case U'\u215E': - nm = U"VULGAR FRACTION SEVEN EIGTHS"; - break; - case U'\u215F': - nm = U"FRACTION NUMERATOR ONE"; - break; - case U'\u2160': - nm = U"ROMAN NUMERAL ONE"; - break; - case U'\u2161': - nm = U"ROMAN NUMERAL TWO"; - break; - case U'\u2162': - nm = U"ROMAN NUMERAL THREE"; - break; - case U'\u2163': - nm = U"ROMAN NUMERAL FOUR"; - break; - case U'\u2164': - nm = U"ROMAN NUMERAL FIVE"; - break; - case U'\u2165': - nm = U"ROMAN NUMERAL SIX"; - break; - case U'\u2166': - nm = U"ROMAN NUMERAL SEVEN"; - break; - case U'\u2167': - nm = U"ROMAN NUMERAL EIGHT"; - break; - case U'\u2168': - nm = U"ROMAN NUMERAL NINE"; - break; - case U'\u2169': - nm = U"ROMAN NUMERAL TEN"; - break; - case U'\u216A': - nm = U"ROMAN NUMERAL ELEVEN"; - break; - case U'\u216B': - nm = U"ROMAN NUMERAL TWELVE"; - break; - case U'\u216C': - nm = U"ROMAN NUMERAL FIFTY"; - break; - case U'\u216D': - nm = U"ROMAN NUMERAL ONE HUNDRED"; - break; - case U'\u216E': - nm = U"ROMAN NUMERAL FIVE HUNDRED"; - break; - case U'\u216F': - nm = U"ROMAN NUMERAL ONE THOUSAND"; - break; - case U'\u2170': - nm = U"SMALL ROMAN NUMERAL ONE"; - break; - case U'\u2171': - nm = U"SMALL ROMAN NUMERAL TWO"; - break; - case U'\u2172': - nm = U"SMALL ROMAN NUMERAL THREE"; - break; - case U'\u2173': - nm = U"SMALL ROMAN NUMERAL FOUR"; - break; - case U'\u2174': - nm = U"SMALL ROMAN NUMERAL FIVE"; - break; - case U'\u2175': - nm = U"SMALL ROMAN NUMERAL SIX"; - break; - case U'\u2176': - nm = U"SMALL ROMAN NUMERAL SEVEN"; - break; - case U'\u2177': - nm = U"SMALL ROMAN NUMERAL EIGHT"; - break; - case U'\u2178': - nm = U"SMALL ROMAN NUMERAL NINE"; - break; - case U'\u2179': - nm = U"SMALL ROMAN NUMERAL TEN"; - break; - case U'\u217A': - nm = U"SMALL ROMAN NUMERAL ELEVEN"; - break; - case U'\u217B': - nm = U"SMALL ROMAN NUMERAL TWELVE"; - break; - case U'\u217C': - nm = U"SMALL ROMAN NUMERAL FIFTY"; - break; - case U'\u217D': - nm = U"SMALL ROMAN NUMERAL ONE HUNDRED"; - break; - case U'\u217E': - nm = U"SMALL ROMAN NUMERAL FIVE HUNDRED"; - break; - case U'\u217F': - nm = U"SMALL ROMAN NUMERAL ONE THOUSAND"; - break; - case U'\u2180': - nm = U"ROMAN NUMERAL ONE THOUSAND C D"; - break; - case U'\u2181': - nm = U"ROMAN NUMERAL FIVE THOUSAND"; - break; - case U'\u2182': - nm = U"ROMAN NUMERAL TEN THOUSAND"; - break; - case U'\u2183': - nm = U"ROMAN NUMERAL REVERSED ONE HUNDRED"; - break; - case U'\u2184': - nm = U"LATIN SMALL LETTER REVERSED C"; - break; - case U'\u2185': - nm = U"ROMAN NUMERAL SIX LATE FORM"; - break; - case U'\u2186': - nm = U"ROMAN NUMERAL FIFTY EARLY FORM"; - break; - case U'\u2187': - nm = U"ROMAN NUMERAL FIFTY THOUSAND"; - break; - case U'\u2188': - nm = U"ROMAN NUMERAL ONE HUNDRED THOUSAND"; - break; - case U'\u2189': - nm = U"VULGAR FRACTION ZERO THIRDS"; - break; - case U'\u218A': - nm = U"TURNED DIGIT TWO"; - break; - case U'\u218B': - nm = U"TURNED DIGIT THREE"; - break; - /* MISCELLANEOUS SYMBOLS: */ - case U'\u26B9': - nm = U"SEXTILE"; - break; - /* DINGBATS: */ - case U'\u271D': - nm = U"LATIN CROSS"; - break; - case U'\u2721': - nm = U"STAR OF DAVID"; - break; - /* SUPPLEMENTAL PUNCTUATION: */ - case U'\u2E3B': - nm = U"THREE-EM DASH"; - break; - /* ARABIC PRESENTATION FORMS-A: */ - case U'\uFDFD': - nm = U"ARABIC LIGATURE BISMILLAH AL-RAHMAN AR-RAHEEM"; - break; - /* ANCIENT SYMBOLS: */ - case U'\U00010190': - nm = U"ROMAN SEXTANS SIGN"; - break; - case U'\U00010191': - nm = U"ROMAN UNCIA SIGN"; - break; - case U'\U00010192': - nm = U"ROMAN SEMUNCIA SIGN"; - break; - case U'\U00010193': - nm = U"ROMAN SEXTULA SIGN"; - break; - case U'\U00010194': - nm = U"ROMAN DIMIDIA SEXTULA SIGN"; - break; - case U'\U00010195': - nm = U"ROMAN SILIQUA SIGN"; - break; - case U'\U00010196': - nm = U"ROMAN DENARIUS SIGN"; - break; - case U'\U00010197': - nm = U"ROMAN QUINARIUS SIGN"; - break; - case U'\U00010198': - nm = U"ROMAN SESTERTIUS SIGN"; - break; - case U'\U00010199': - nm = U"ROMAN DUPONDIUS SIGN"; - break; - case U'\U0001019A': - nm = U"ROMAN AS SIGN"; - break; - case U'\U0001019B': - nm = U"ROMAN CENTURIAL SIGN"; - break; - case U'\U0001019C': - nm = U"ASCIA SIGN"; - break; - /* BRAHMI: */ - case U'\U00011066': - nm = U"BRAHMI DIGIT ZERO"; - break; - case U'\U00011067': - nm = U"BRAHMI DIGIT ONE"; - break; - case U'\U00011068': - nm = U"BRAHMI DIGIT TWO"; - break; - case U'\U00011069': - nm = U"BRAHMI DIGIT THREE"; - break; - case U'\U0001106A': - nm = U"BRAHMI DIGIT FOUR"; - break; - case U'\U0001106B': - nm = U"BRAHMI DIGIT FIVE"; - break; - case U'\U0001106C': - nm = U"BRAHMI DIGIT SIX"; - break; - case U'\U0001106D': - nm = U"BRAHMI DIGIT SEVEN"; - break; - case U'\U0001106E': - nm = U"BRAHMI DIGIT EIGHT"; - break; - case U'\U0001106F': - nm = U"BRAHMI DIGIT NINE"; - break; - /* CUNEIFORM: */ - case U'\U00012031': - nm = U"CUNEIFORM SIGN AN PLUS NAGA SQUARED"; - break; - /* CUNEIFORM NUMBERS AND PUNCTUATION: */ - case U'\U0001242B': - nm = U"CUNEIFORM NUMERIC SIGN NINE SHAR2"; - break; - /* EGYPTIAN HIEROGLYPHS: */ - case U'\U000130B8': - nm = U"EGYPTIAN HIEROGLYPH D052"; - break; - /* COUNTING ROD NUMERALS: */ - case U'\U0001D372': - nm = U"IDEOGRAPHIC TALLY MARK ONE"; - break; - case U'\U0001D373': - nm = U"IDEOGRAPHIC TALLY MARK TWO"; - break; - case U'\U0001D374': - nm = U"IDEOGRAPHIC TALLY MARK THREE"; - break; - case U'\U0001D375': - nm = U"IDEOGRAPHIC TALLY MARK FOUR"; - break; - case U'\U0001D376': - nm = U"IDEOGRAPHIC TALLY MARK FIVE"; - break; - case U'\U0001D377': - nm = U"TALLY MARK ONE"; - break; - case U'\U0001D378': - nm = U"TALLY MARK FIVE"; - break; - /* ENCLOSED ALPHANUMERIC SUPPLEMENT: */ - case U'\U0001F10D': - nm = U"CIRCLED ZERO WITH SLASH"; - break; - case U'\U0001F10E': - nm = U"CIRCLED ANTICKLOCKWISE ARROW"; - break; - case U'\U0001F10F': - nm = U"CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH"; - break; - case U'\U0001F12F': - nm = U"COPYLEFT SYMBOL"; - break; - case U'\U0001F16D': - nm = U"CIRCLED CC"; - break; - case U'\U0001F16E': - nm = U"CIRCLED C WITH OVERLAID BACKSLASH"; - break; - case U'\U0001F16F': - nm = U"CIRCLED HUMAN FIGURE"; - break; - /* EMOTICONS: */ - case U'\U0001F600': - nm = U"GRINNING FACE"; - break; - case U'\U0001F601': - nm = U"GRINNING FACE WITH SMIRKING EYES"; - break; - case U'\U0001F602': - nm = U"FACE WITH TEARS OF JOY"; - break; - case U'\U0001F603': - nm = U"SMILING FACE WITH OPEN MOUTH"; - break; - case U'\U0001F604': - nm = U"SMILING FACE WITH OPEN MOUTH AND SMILING EYES"; - break; - case U'\U0001F605': - nm = U"SMILING FACE WITH OPEN MOUTH AND COULD SWEAT"; - break; - case U'\U0001F606': - nm = U"SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES"; - break; - case U'\U0001F607': - nm = U"SMILING FACE WITH HALO"; - break; - case U'\U0001F608': - nm = U"SMILING FACE WITH HORNS"; - break; - case U'\U0001F609': - nm = U"WINKING FACE"; - break; - case U'\U0001F60A': - nm = U"SMILING FACE WITH SMILING EYES"; - break; - case U'\U0001F60B': - nm = U"FACE SAVOURING DELICIOUS FOOD"; - break; - case U'\U0001F60C': - nm = U"RELIEVED FACE"; - break; - case U'\U0001F60D': - nm = U"SMILLING FACE HEART-SHAPED EYES"; - break; - case U'\U0001F60E': - nm = U"SMILLING FACE WITH SUNGLASSES"; - break; - case U'\U0001F60F': - nm = U"SMIRKING FACE"; - break; - case U'\U0001F610': - nm = U"NEUTRAL FACE"; - break; - case U'\U0001F611': - nm = U"EXPRESSIONLESS FACE"; - break; - case U'\U0001F612': - nm = U"UNAMUSED FACE"; - break; - case U'\U0001F613': - nm = U"FACE WITH COLD SWEAT"; - break; - case U'\U0001F614': - nm = U"PENSIVE FACE"; - break; - case U'\U0001F615': - nm = U"CONFUSED FACE"; - break; - case U'\U0001F616': - nm = U"CONFOUNDED FACE"; - break; - case U'\U0001F617': - nm = U"KISSING FACE"; - break; - case U'\U0001F618': - nm = U"FACE THROWING A KISS"; - break; - case U'\U0001F619': - nm = U"KISSING FACE WITH SMILLING EYES"; - break; - case U'\U0001F61A': - nm = U"KISSING FACE WITH CLOSED EYES"; - break; - case U'\U0001F61B': - nm = U"FACE WITH STUCK-OUT TONGUE"; - break; - case U'\U0001F61C': - nm = U"FACE WITH STUCK-OUT TONGUE AND WINKING EYE"; - break; - case U'\U0001F61D': - nm = U"FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES"; - break; - case U'\U0001F61E': - nm = U"DISSAPOINTED FACE"; - break; - case U'\U0001F61F': - nm = U"WORRIED FACE"; - break; - case U'\U0001F620': - nm = U"ANGRY FACE"; - break; - case U'\U0001F621': - nm = U"POUTING FACE"; - break; - case U'\U0001F622': - nm = U"CRYING FACE"; - break; - case U'\U0001F623': - nm = U"PERSEVERING FACE"; - break; - case U'\U0001F624': - nm = U"FACE WITH LOOK OF TRIUMPH"; - break; - case U'\U0001F625': - nm = U"DISSAPOINTED BUT RELIEVED FACE"; - break; - case U'\U0001F626': - nm = U"FROWNING FACE WITH OPEN MOUTH"; - break; - case U'\U0001F627': - nm = U"ANGUISHED FACE"; - break; - case U'\U0001F628': - nm = U"FEARFUL FACE"; - break; - case U'\U0001F629': - nm = U"WEARY FACE"; - break; - case U'\U0001F62A': - nm = U"SLEEPY FACE"; - break; - case U'\U0001F62B': - nm = U"TIRED FACE"; - break; - case U'\U0001F62C': - nm = U"GRIMACING FACE"; - break; - case U'\U0001F62D': - nm = U"LOUDLY CRYING FACE"; - break; - case U'\U0001F62E': - nm = U"FACE WITH OPEN MOUTH"; - break; - case U'\U0001F62F': - nm = U"HUSHED FACE"; - break; - case U'\U0001F630': - nm = U"FACE WITH OPEN MOUTH AND COLD SWEAT"; - break; - case U'\U0001F631': - nm = U"FACE SCREAMING IN FEAR"; - break; - case U'\U0001F632': - nm = U"ASTONISHED FACE"; - break; - case U'\U0001F633': - nm = U"FLUSHED FACE"; - break; - case U'\U0001F634': - nm = U"SLEEPING FACE"; - break; - case U'\U0001F635': - nm = U"DIZZY FACE"; - break; - case U'\U0001F636': - nm = U"FACE WITHOUT MOUTH"; - break; - case U'\U0001F637': - nm = U"FACE WITH MEDICAL MASK"; - break; - case U'\U0001F641': - nm = U"SLIGHTLY FROWNING FACE"; - break; - case U'\U0001F642': - nm = U"SLIGHTLY SMILING FACE"; - break; - case U'\U0001F643': - nm = U"UPSIDE-DOWN FACE"; - break; - case U'\U0001F644': - nm = U"FACE WITH ROLLING EYES"; - break; - /* CJK UNIFIED IDEOGRAPHS EXTENSION G: */ - case U'\U0003106C': - nm = U"CJK UNIFIED IDEOGRAPH-3106C"; - break; - } - { - struct u8c_strcp_tuple const tuple = u8c_strcp(nm); - ret.nm = tuple.str; - ret.nmsz = tuple.strsz; - } - return ret; -} diff --git a/src/u8c/str.h.d/stralloc.c b/src/u8c/str.h.d/stralloc.c deleted file mode 100644 index f9addcd..0000000 --- a/src/u8c/str.h.d/stralloc.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdlib.h> -# include <u8c/err.h> -# include <u8c/str.h> -# include <uchar.h> -struct u8c_stralloc_tuple u8c_stralloc(size_t const _sz) { - struct u8c_stralloc_tuple ret = { - .stat = false, - }; - char32_t * arr = NULL; - if((arr = calloc(sizeof *arr,_sz)) == NULL) { - u8c_seterr(u8c_errtyp_badalloc,U"u8c_stralloc: Unable to allocate resources (not enough memory?)."); - ret.stat = true; - return ret; - } - ret.str = arr; - return ret; -} diff --git a/src/u8c/str.h.d/strcat.c b/src/u8c/str.h.d/strcat.c deleted file mode 100644 index 5e5f693..0000000 --- a/src/u8c/str.h.d/strcat.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdlib.h> -# include <u8c/SIZE_C.h> -# include <u8c/err.h> -# include <u8c/str.h> -# include <uchar.h> -struct u8c_strcat_tuple u8c_strcat(char32_t const * const restrict _lstr,char32_t const * const restrict _rstr) { - struct u8c_strcat_tuple ret = { - .stat = false, - }; - size_t lsz = u8c_strsz(_lstr).sz; - size_t rsz = u8c_strsz(_rstr).sz; - ret.strsz = lsz + rsz; - char32_t * out = NULL; - { - struct u8c_stralloc_tuple const tuple = u8c_stralloc(ret.strsz + SIZE_C(0x1)); - if(tuple.stat) { - ret.stat = true; - return ret; - } - out = tuple.str; - } - for(register size_t n = SIZE_C(0x0);n < lsz;n += SIZE_C(0x1)) { - out[n] = _lstr[n]; - } - for(register size_t n = SIZE_C(0x0);n < rsz;n += SIZE_C(0x1)) { - out[n + lsz] = _rstr[n]; - } - ret.str = out; - return ret; -} diff --git a/src/u8c/str.h.d/strcmp.c b/src/u8c/str.h.d/strcmp.c deleted file mode 100644 index 31654d0..0000000 --- a/src/u8c/str.h.d/strcmp.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/err.h> -# include <u8c/str.h> -struct u8c_strcmp_tuple u8c_strcmp(char32_t const * const restrict _lstr,char32_t const * const restrict _rstr) { - struct u8c_strcmp_tuple ret = { - .stat = false, - }; - for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { - register char32_t const lchr = _lstr[n]; - register char32_t const rchr = _rstr[n]; - if(lchr != rchr) { - if(lchr < rchr) { - ret.res = UINT8_C(0x0); - return ret; - } - ret.res = UINT8_C(0x2); - return ret; - } - if(lchr == U'\x0') { - ret.res = UINT8_C(0x1); - return ret; - } - } - u8c_seterr(u8c_errtyp_untermin,U"u8c_strcmp: Unterminated input."); - ret.stat = true; - return ret; -} diff --git a/src/u8c/str.h.d/strcp.c b/src/u8c/str.h.d/strcp.c deleted file mode 100644 index 1343bf1..0000000 --- a/src/u8c/str.h.d/strcp.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdlib.h> -# include <u8c/SIZE_C.h> -# include <u8c/err.h> -# include <u8c/str.h> -struct u8c_strcp_tuple u8c_strcp(char32_t const * const restrict _in) { - struct u8c_strcp_tuple ret = { - .stat = false, - }; - ret.strsz = u8c_strsz(_in).sz; - uint_least32_t * out = NULL; - { - struct u8c_stralloc_tuple const tuple = u8c_stralloc(ret.strsz + SIZE_C(0x1)); - if(tuple.stat) { - ret.stat = true; - return ret; - } - out = tuple.str; - } - for(register size_t n = SIZE_C(0x0);n < ret.strsz;n += SIZE_C(0x1)) { - out[n] = _in[n]; - } - ret.str = out; - return ret; -} diff --git a/src/u8c/str.h.d/strfndchr.c b/src/u8c/str.h.d/strfndchr.c deleted file mode 100644 index 93bb77c..0000000 --- a/src/u8c/str.h.d/strfndchr.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/err.h> -# include <u8c/str.h> -struct u8c_strfndchr_tuple u8c_strfndchr(char32_t const * const restrict _in,char32_t const _chr) { - struct u8c_strfndchr_tuple ret = { - .stat = false, - }; - for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { - register uint_least32_t const tmp = _in[n]; - if(tmp == U'\x0') { - if(_chr == U'\x0') { - ret.pos = n; - return ret; - } - ret.pos = SIZE_C(-0x1); - return ret; - } - if(tmp == _chr) { - ret.pos = n; - return ret; - } - } - u8c_seterr(u8c_errtyp_untermin,U"u8c_strfndchr: Unterminated input."); - ret.pos = SIZE_C(-0x1); - ret.stat = true; - return ret; -} diff --git a/src/u8c/str.h.d/strfndpat.c b/src/u8c/str.h.d/strfndpat.c deleted file mode 100644 index 1091238..0000000 --- a/src/u8c/str.h.d/strfndpat.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/err.h> -# include <u8c/str.h> -struct u8c_strfndpat_tuple u8c_strfndpat(char32_t const * const restrict _in,char32_t const * const restrict _pat) { - struct u8c_strfndpat_tuple ret = { - .stat = false, - }; - size_t insz = u8c_strsz(_in).sz; - size_t patsz = u8c_strsz(_pat).sz; - if(insz == SIZE_C(0x1) || insz < patsz) { - ret.pos = SIZE_C(-0x1); - return ret; - } - for(register size_t n = SIZE_C(0x0);n < insz - patsz;n += SIZE_C(0x1)) { - char32_t const * str = u8c_strsubstr(n,patsz - SIZE_C(0x1),_in).str; - uint_least8_t const cmpres = u8c_strcmp(str,_pat).res; - u8c_strfree(str); - if(cmpres == UINT8_C(0x1)) { - ret.pos = n; - return ret; - } - } - ret.pos = SIZE_C(-0x1); - return ret; -} diff --git a/src/u8c/str.h.d/strfree.c b/src/u8c/str.h.d/strfree.c deleted file mode 100644 index bf6d477..0000000 --- a/src/u8c/str.h.d/strfree.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdint.h> -# include <stdlib.h> -# include <u8c/str.h> -struct u8c_strfree_tuple u8c_strfree(char32_t const * const restrict _str) { - struct u8c_strfree_tuple ret = { - .stat = false, - }; - free((char32_t *)_str); /* This cast does indeed discard a const-qualifier, but it is not undefined behaviour, as the array must have been allocated by calloc or malloc, meaning it's original type is not const-qualified. */ - return ret; -} diff --git a/src/u8c/str.h.d/strins.c b/src/u8c/str.h.d/strins.c deleted file mode 100644 index 89173ae..0000000 --- a/src/u8c/str.h.d/strins.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <u8c/SIZE_C.h> -# include <u8c/str.h> -# include <uchar.h> -struct u8c_strins_tuple u8c_strins(size_t const _pos,char32_t const * const restrict _str0,char32_t const * const restrict _str1) { - struct u8c_strins_tuple ret = { - .stat = false, - }; - char32_t const * lstr = u8c_strsubstr(SIZE_C(0x0),_pos - SIZE_C(0x1),_str0).str; - char32_t const * rstr = u8c_strsubstr(_pos,SIZE_C(0x0),_str0).str; - ret.strsz = SIZE_C(0x0); - char32_t const * out = NULL; - { - char32_t const * tmp = u8c_strcat(lstr,_str1).str; - u8c_strfree(lstr); - out = u8c_strcat(tmp,rstr).str; - u8c_strfree(rstr); - u8c_strfree(tmp); - } - ret.str = out; - return ret; -} diff --git a/src/u8c/str.h.d/strsubstr.c b/src/u8c/str.h.d/strsubstr.c deleted file mode 100644 index b9daac5..0000000 --- a/src/u8c/str.h.d/strsubstr.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdlib.h> -# include <u8c/SIZE_C.h> -# include <u8c/str.h> -# include <uchar.h> -struct u8c_strsubstr_tuple u8c_strsubstr(size_t const _start,size_t const _len,char32_t const * const restrict _in) { - struct u8c_strsubstr_tuple ret = { - .stat = false, - }; - size_t insz = u8c_strsz(_in).sz; - size_t len = _len; - if(_len == SIZE_C(0x0)) { - len = insz - _start; - } - if(insz < _start + len) { - return ret; - } - size_t const outsz = len + SIZE_C(0x2); - char32_t * out = NULL; - { - struct u8c_stralloc_tuple const tuple = u8c_stralloc(outsz); - if(tuple.stat) { - ret.stat = true; - return ret; - } - out = tuple.str; - } - for(register size_t n = SIZE_C(0x0);n <= len;n += SIZE_C(0x1)) { - out[n] = _in[n + _start]; - } - ret.str = out; - return ret; -} diff --git a/src/u8c/str.h.d/strsz.c b/src/u8c/str.h.d/strsz.c deleted file mode 100644 index f1b348a..0000000 --- a/src/u8c/str.h.d/strsz.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/str.h> -# include <uchar.h> -struct u8c_strsz_tuple u8c_strsz(char32_t const * const restrict _in) { - struct u8c_strsz_tuple ret = { - .stat = false, - }; - { - struct u8c_strfndchr_tuple const tuple = u8c_strfndchr(_in,UINT8_C(0x0)); - if(tuple.stat) { - ret.stat = true; - return ret; - } - ret.sz = tuple.pos; - } - return ret; -} diff --git a/src/u8c/u16.h.d/u16alloc.c b/src/u8c/u16.h.d/u16alloc.c deleted file mode 100644 index ce20ecb..0000000 --- a/src/u8c/u16.h.d/u16alloc.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdlib.h> -# include <u8c/err.h> -# include <u8c/u16.h> -# include <uchar.h> -struct u8c_u16alloc_tuple u8c_u16alloc(size_t const _sz) { - struct u8c_u16alloc_tuple ret = { - .stat = false, - }; - char16_t * arr = NULL; - if((arr = calloc(sizeof *arr,_sz)) == NULL) { - u8c_seterr(u8c_errtyp_badalloc,U"u8c_u16alloc: Unable to allocate resources (not enough memory?)."); - ret.stat = true; - return ret; - } - ret.u16 = arr; - return ret; -} diff --git a/src/u8c/u16.h.d/u16free.c b/src/u8c/u16.h.d/u16free.c deleted file mode 100644 index 43e7503..0000000 --- a/src/u8c/u16.h.d/u16free.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdint.h> -# include <stdlib.h> -# include <u8c/u16.h> -struct u8c_u16free_tuple u8c_u16free(char16_t const * const restrict _u16) { - struct u8c_u16free_tuple ret = { - .stat = false, - }; - free((char16_t *)_u16); - return ret; -} diff --git a/src/u8c/u8.h.d/u8alloc.c b/src/u8c/u8.h.d/u8alloc.c deleted file mode 100644 index ba28243..0000000 --- a/src/u8c/u8.h.d/u8alloc.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdlib.h> -# include <u8c/err.h> -# include <u8c/u8.h> -struct u8c_u8alloc_tuple u8c_u8alloc(size_t const _sz) { - struct u8c_u8alloc_tuple ret = { - .stat = false, - }; - unsigned char * arr = NULL; - if((arr = calloc(sizeof *arr,_sz)) == NULL) { - u8c_seterr(u8c_errtyp_badalloc,U"u8c_u8alloc: Unable to allocate resources (not enough memory?)."); - ret.stat = true; - return ret; - } - ret.u8 = arr; - return ret; -} diff --git a/src/u8c/u8.h.d/u8dec.c b/src/u8c/u8.h.d/u8dec.c deleted file mode 100644 index 4cba14f..0000000 --- a/src/u8c/u8.h.d/u8dec.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <assert.h> -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/err.h> -# include <u8c/str.h> -# include <u8c/u8.h> -# include <uchar.h> -struct u8c_u8dec_tuple u8c_u8dec(unsigned char const * const restrict _in) { - struct u8c_u8dec_tuple ret = { - .stat = false, - }; - register size_t insz = SIZE_C(0x0); - for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;ret.strsz += SIZE_C(0x1)) { /* First pass: get size of input array and determine size of output array. */ - register unsigned char const tmp = _in[n]; - if(tmp == UINT8_C(0x0)) { /* Null-terminator: end of string has been reached. */ - insz = n; - goto nottoobig; - } - if(tmp >= UINT8_C(0b11111000)) { /* Too big. */ - u8c_seterr(u8c_errtyp_u8oor,U"u8c_u8dec: Character out of range (too big)."); - ret.stat = true; - return ret; - } - if(tmp >= UINT8_C(0b11110000)) { /* Four byte. */ - n += SIZE_C(0x4); - continue; - } - if(tmp >= UINT8_C(0b11100000)) { /* Three bytes. */ - n += SIZE_C(0x3); - continue; - } - if(tmp >= UINT8_C(0b11000000)) { /* Two bytes. */ - n += SIZE_C(0x2); - continue; - } - /* One byte. */ - n += SIZE_C(0x1); - } - /* Input is not null-terminated. */ - u8c_seterr(u8c_errtyp_untermin,U"u8c_u8dec: Unterminated input."); - ret.stat = true; - return ret; -nottoobig:; - uint_least32_t * out = NULL; - { - struct u8c_stralloc_tuple const tuple = u8c_stralloc(ret.strsz + SIZE_C(0x1)); - if(tuple.stat) { - ret.stat = true; - return ret; - } - out = tuple.str; - } - for(register size_t n = SIZE_C(0x0),outn = SIZE_C(0x0);n < insz;outn += SIZE_C(0x1)) { /* Second pass: decode UTF-8. */ - if(_in[n] >= UINT8_C(0b11110000)) { /* Four bytes. */ - uint_least32_t codep = (_in[n] ^ UINT32_C(0b11110000)) << UINT32_C(0x12); - n += SIZE_C(0x1); - codep += (_in[n] ^ UINT32_C(0b10000000)) << UINT32_C(0xC); - n += SIZE_C(0x1); - codep += (_in[n] ^ UINT32_C(0b10000000)) << UINT32_C(0x6); - n += SIZE_C(0x1); - codep += (uint_least32_t)(_in[n]) ^ SIZE_C(0b10000000); - n += SIZE_C(0x1); - out[outn] = codep; - continue; - } - if(_in[n] >= UINT8_C(0b11100000)) { /* Three bytes. */ - uint_least32_t codep = (_in[n] ^ UINT32_C(0b11100000)) << UINT32_C(0xC); - n += SIZE_C(0x1); - codep += (_in[n] ^ UINT32_C(0b10000000)) << UINT32_C(0x6); - n += SIZE_C(0x1); - codep += _in[n] ^ UINT32_C(0b10000000); - n += SIZE_C(0x1); - out[outn] = codep; - continue; - } - if(_in[n] >= UINT8_C(0b11000000)) { /* Two bytes. */ - uint_least32_t codep = (_in[n] ^ UINT32_C(0b11000000)) << UINT32_C(0x6); - n += SIZE_C(0x1); - codep += _in[n] ^ UINT32_C(0b10000000); - n += SIZE_C(0x1); - out[outn] = codep; - continue; - } - /* One byte. */ - out[outn] = (uint_least32_t)(_in[n]); - n += SIZE_C(0x1); - continue; - } - ret.str = out; - return ret; -} diff --git a/src/u8c/u8.h.d/u8enc.c b/src/u8c/u8.h.d/u8enc.c deleted file mode 100644 index 2ac0007..0000000 --- a/src/u8c/u8.h.d/u8enc.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <assert.h> -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/err.h> -# include <u8c/main.h> -# include <u8c/str.h> -# include <u8c/u8.h> -# include <uchar.h> -struct u8c_u8enc_tuple u8c_u8enc(char32_t const * const restrict _in) { - struct u8c_u8enc_tuple ret = { - .stat = false, - }; - size_t insz = SIZE_C(0x0); /* Size of input array (bytes). */ - for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { /* First pass: get size of input array, and determine size of output array. */ - register char32_t const tmp = _in[n]; - if(tmp > u8c_unimax) { /* Codepoint out of range. */ - u8c_seterr(u8c_errtyp_stroor,U"u8c_u8enc: Codepoint out of range (too big)."); - ret.stat = true; - return ret; - } - if(tmp >= UINT32_C(0x10000)) { /* 4 bytes. */ - ret.u8sz += SIZE_C(0x4); - continue; - } - if(tmp >= UINT32_C(0x800)) { /* 3 bytes. */ - ret.u8sz += SIZE_C(0x3); - continue; - } - if(tmp >= UINT32_C(0x80)) { /* 2 bytes. */ - ret.u8sz += SIZE_C(0x2); - continue; - } - /* 1 byte. */ - ret.u8sz += SIZE_C(0x1); - if(tmp == UINT32_C(0x0)) { - insz = n + SIZE_C(0x1); - goto nottoobig; - } - } - u8c_seterr(u8c_errtyp_untermin,U"u8c_u8enc: Unterminated input."); - ret.stat = true; - return ret; -nottoobig:; - unsigned char * out = NULL; - { - struct u8c_u8alloc_tuple const tuple = u8c_u8alloc(ret.u8sz + SIZE_C(0x1)); - if(tuple.stat) { - ret.stat = true; - return ret; - } - out = tuple.u8; - } - for(register size_t n = SIZE_C(0x0), outn = SIZE_C(0x0);n < insz;n += SIZE_C(0x1),outn += SIZE_C(0x1)) { /* Second pass: encode each codepoint into UTF-8. */ - register char32_t const tmp = _in[n]; - if(tmp >= UINT32_C(0x10000)) { // Four bytes. - out[outn] = UINT8_C(0b11110000) + (uint_least8_t)(tmp >> UINT32_C(0x12)); - outn += SIZE_C(0x1); - out[outn] = UINT8_C(0b10000000) + (uint_least8_t)(tmp >> UINT32_C(0xC) & UINT8_C(0b00111111)); - outn += SIZE_C(0x1); - out[outn] = UINT8_C(0b10000000) + (uint_least8_t)(tmp >> UINT32_C(0x6) & UINT8_C(0b00111111)); - outn += SIZE_C(0x1); - out[outn] = UINT8_C(0b10000000) + (uint_least8_t)(tmp & UINT32_C(0b00111111)); - continue; - } - if(tmp >= UINT32_C(0x800)) { /* Three bytes. */ - out[outn] = UINT8_C(0xE0) + (uint_least8_t)(tmp >> UINT32_C(0xC)); - outn += SIZE_C(0x1); - out[outn] = UINT8_C(0x80) + (uint_least8_t)(tmp >> UINT32_C(0x6) & UINT8_C(0b00111111)); - outn += SIZE_C(0x1); - out[outn] = UINT8_C(0x80) + (uint_least8_t)(tmp & UINT32_C(0b00111111)); - continue; - } - if(tmp >= UINT32_C(0x80)) { /* Two bytes. */ - out[outn] = UINT8_C(0xC0) + (uint_least8_t)(tmp >> UINT8_C(0x6)); - outn += SIZE_C(0x1); - out[outn] = UINT8_C(0x80) + (uint_least8_t)(tmp & UINT8_C(0b00111111)); - continue; - } - /* One byte. */ - out[outn] = (uint_least8_t)tmp; - } - ret.u8 = out; - return ret; -} diff --git a/src/u8c/u8.h.d/u8free.c b/src/u8c/u8.h.d/u8free.c deleted file mode 100644 index a0b61a8..0000000 --- a/src/u8c/u8.h.d/u8free.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stdint.h> -# include <stdlib.h> -# include <u8c/u8.h> -struct u8c_u8free_tuple u8c_u8free(unsigned char const * const restrict _u8) { - struct u8c_u8free_tuple ret = { - .stat = false, - }; - free((unsigned char *)_u8); - return ret; -} @@ -1,392 +0,0 @@ -# include <inttypes.h> -# include <stdarg.h> -# include <stdint.h> -# include <stdio.h> -# include <stdlib.h> -# include <string.h> -# include <u8c/SIZE_C.h> -# include <u8c/chk.h> -# include <u8c/err.h> -# include <u8c/fmt.h> -# include <u8c/main.h> -# include <u8c/str.h> -# include <u8c/u8.h> -static void errhandl(enum u8c_errtyp errtyp); -static int hlpscrn( char const * const restrict nm); -static void test( int n); -static char const * testnm( int n); -static const int maxtest = 0x17; -static void errhandl(enum u8c_errtyp errtyp) { - printf(":: Error handler called with type %d.\n",(int)errtyp); -} -static int hlpscrn(char const * const restrict nm) { - printf("u8c-test: Test u8c\n"); - printf("Usage: %s [test number]\n",nm); - printf("\n"); - printf("Test numbers:\n"); - for(int n = 0x0;n <= maxtest;n += 0x1) { - printf("\t %i - \"%s\"\n",n,testnm(n)); - } - printf("\n"); - return EXIT_SUCCESS; -} -static void test(int n) { - char const * const restrict _testnm = testnm(n); - /* printf("\n+->\n| \x1b[38:2::169:225:61mTesting\x1b[0m \""%s"\"...\n+->\n\n",_testnm); */ /* This command works in all of the terminals I tested, except Konsole (whic is funny, because it's xterm-based (and sets the TERM and COLORTERM environemnt variable to "xterm-256color" and "truecolor"), and xterm supports it). */ - printf("\n+->\n| \x1b[38;2;169;225;61mTesting\x1b[0m #%i \"%s\"...\n+->\n\n",n,_testnm); - switch(n) { - case 0x0: - { - char32_t const * err = u8c_geterr().err; - printf("default error message: "); - u8c_println(stdout,err); - u8c_seterr(u8c_errtyp_deferr,U"Gluchwein!"); - u8c_strfree(err); - err = u8c_geterr().err; - printf("set error message: "); - u8c_println(stdout,err); - u8c_strfree(err); - } - break; - case 0x1: - { - char32_t const * msg0 = U"¢,ह,𐍈,€,↊,👋"; - unsigned char const * msg1 = u8c_u8enc(msg0).u8; - printf("Encoded: %s\n",msg1); - u8c_u8free(msg1); - msg0 = u8c_u8dec(msg1).str; - msg1 = u8c_u8enc(msg0).u8; - printf("Encoded -> Decoded -> Encoded: %s\n",msg1); - u8c_strfree(msg0); - u8c_u8free(msg1); - } - break; - case 0x2: - { - u8c_print(stdout,U"Hello"); - u8c_print(stdout,U" ðere!\n"); - } - break; - case 0x3: - { - u8c_println(stdout,U"Hello"); - u8c_println(stdout,U" ðere!"); - } - break; - case 0x4: - { - u8c_println(stdout,U"The \uFFFCnumber\uFFFC is \uFFFC.",u8c_fmttyp_fgcol,u8c_col_mint,u8c_fmttyp_fgcol0,u8c_fmttyp_int,(int_least64_t){-0x10}); - } - break; - case 0x5: - { - u8c_println(stdout,U"\uFFFCred\uFFFCorange\uFFFCyellow\uFFFCchartreuse\uFFFCgreen\uFFFCmint\uFFFCcyan\uFFFCazure\uFFFCblue\uFFFCviolet\uFFFCmagenta\uFFFCrose\uFFFC",u8c_fmttyp_fgcol,u8c_col_red,u8c_fmttyp_fgcol,u8c_col_orange,u8c_fmttyp_fgcol,u8c_col_yellow,u8c_fmttyp_fgcol,u8c_col_chartreuse,u8c_fmttyp_fgcol,u8c_col_green,u8c_fmttyp_fgcol,u8c_col_mint,u8c_fmttyp_fgcol,u8c_col_cyan,u8c_fmttyp_fgcol,u8c_col_azure,u8c_fmttyp_fgcol,u8c_col_blue,u8c_fmttyp_fgcol,u8c_col_violet,u8c_fmttyp_fgcol,u8c_col_magenta,u8c_fmttyp_fgcol,u8c_col_rose,u8c_fmttyp_fgcol0); - } - break; - case 0x6: - { - for(register uint_least32_t n = UINT32_C(0x300);n <= UINT32_C(0x36F);n += UINT32_C(0x1)) { - u8c_print(stdout,(uint_least32_t[]){UINT32_C(0x61),n,UINT32_C(0x20),UINT32_C(0x0),}); - fflush(stdout); - } - u8c_print(stdout,U"\n"); - } - break; - case 0x7: - { - char32_t const * str0 = U"Hello"; - char32_t const * str1 = U"Hello"; - char32_t const * str2 = U"Goodbye"; - printf("str0: "); - u8c_println(stdout,str0); - printf("str1: "); - u8c_println(stdout,str1); - printf("str2: "); - u8c_println(stdout,str2); - uint_least8_t res = u8c_strcmp(str0,str1).res; - printf("str0,str1: %" PRIXLEAST8 ".\n",res); - res = u8c_strcmp(str1,str2).res; - printf("str0,str2: %" PRIXLEAST8 ".\n",res); - res = u8c_strcmp(str2,str1).res; - printf("str2,str1: %" PRIXLEAST8 ".\n",res); - } - break; - case 0x8: - { - for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - bool const res = u8c_isalnum(n).res; - if(res) { - u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); - } - } - u8c_println(stdout,U""); - } - break; - case 0x9: - { - for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - bool const res = u8c_isalpha(n).res; - if(res) { - u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); - } - } - u8c_println(stdout,U""); - } - break; - case 0xA: - { - for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - bool const res = u8c_isdigit(n).res; - if(res) { - u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); - } - } - u8c_println(stdout,U""); - } - break; - case 0xB: - { - for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - bool const res = u8c_ispunct(n).res; - if(res) { - u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); - } - } - u8c_println(stdout,U""); - } - break; - case 0xC: - { - u8c_println(stdout,U"Can you see ðis?"); - } - break; - case 0xD: - { - char32_t const * str0 = U"Free_as_in"; - char32_t const * str1 = U"_freedom!"; - char32_t const * str2 = u8c_strcat(str0,str1).str; - printf("string #0: "); - u8c_println(stdout,str0); - printf("string #1: "); - u8c_println(stdout,str1); - printf("string #2: "); - u8c_println(stdout,str2); - u8c_strfree(str2); - - } - break; - case 0xE: - { - char32_t const * str0 = U"I_wish_to_suck_big_duck."; - char32_t const * str1 = u8c_strsubstr(SIZE_C(0x0),SIZE_C(0xE),str0).str; - char32_t const * str2 = u8c_strsubstr(SIZE_C(0xF),SIZE_C(0x0),str0).str; - printf("string #0: "); - u8c_println(stdout,str0); - printf("string #1: "); - u8c_println(stdout,str1); - printf("string #2: "); - u8c_println(stdout,str2); - u8c_strfree(str1); - u8c_strfree(str2); - } - break; - case 0xF: - { - for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - bool const res = u8c_isxdigit(n).res; - if(res) { - u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); - } - } - u8c_println(stdout,U""); - } - break; - case 0x10: - { - u8c_dbgprint(U"Hello"); - u8c_dbgprint(U" ðere!"); - } - break; - case 0x11: - { - char32_t const * str = U"Proprietary as in Micro$oft."; - size_t pos0 = u8c_strfndchr(str,U'M').pos; - size_t pos1 = u8c_strfndchr(str,U'ŋ').pos; - printf("string: "); - u8c_println(stdout,str); - printf("Position of 'M': %zu\n",pos0); - printf("Position of 'ŋ': %zu\n",pos1); - } - break; - case 0x12: - { - char32_t const * str = U"Proprietary as in Micro$oft."; - size_t pos0 = u8c_strfndpat(str,U"as in").pos; - size_t pos1 = u8c_strfndpat(str,U"forever").pos; - printf("string: "); - u8c_println(stdout,str); - printf("Position of \"as in\": %zu\n",pos0); - printf("Position of \"forever\": %zu\n",pos1); - } - break; - case 0x13: - { - char32_t const * str0 = U"There_is_I_love."; - char32_t const * str1 = U"just_somebody_that_"; - char32_t const * str2 = u8c_strins(SIZE_C(0x9),str0,str1).str; - printf("String #0: "); - u8c_println(stdout,str0); - printf("String #1: "); - u8c_println(stdout,str1); - printf("String #2: "); - u8c_println(stdout,str2); - u8c_strfree(str2); - } - break; - case 0x14: - { - for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - bool const res = u8c_islower(n).res; - if(res) { - u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); - } - } - u8c_println(stdout,U""); - } - break; - case 0x15: - { - for(register uint_least32_t n = UINT32_C(0x1);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - bool const res = u8c_isupper(n).res; - if(res) { - u8c_print(stdout,(uint_least32_t[]){n,UINT32_C(0x20),UINT32_C(0x0),}); - } - } - u8c_println(stdout,U""); - } - break; - case 0x16: - { - register uint_least32_t num = UINT32_C(0x0); - for(register uint_least32_t n = UINT32_C(0x0);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1),num += UINT32_C(0x1)) { - { - bool const res = u8c_issurro(n).res; - if(res) { - num -= UINT32_C(0x1); - continue; - } - } - char32_t const * nm = u8c_uninm(n).nm; - uint_least8_t res = u8c_strcmp(nm,U"UNDEFINED IN UNICODE").res; - if(res == UINT8_C(0x1)) { - num -= UINT32_C(0x1); - u8c_strfree(nm); - continue; - } - printf("U+%" PRIXLEAST32 " ",n); - u8c_println(stdout,nm); - u8c_strfree(nm); - } - printf("\n:: The number of mapped (named) codepoints is %" PRIuLEAST32 ", which is %f%% of the total number of defined Unicode codepoints (143859).\n",num,((double)num) / (double)UINT32_C(0x231F3) * (double)UINT8_C(0x64)); - } - break; - case 0x17: - { - char32_t const * lastblk = u8c_strcp(U"").str; - for(register uint_least32_t n = UINT32_C(0x0);n <= UINT32_C(0x10FFFF);n += UINT32_C(0x1)) { - char32_t const * blk = u8c_uniblk(n).blk; - if(u8c_strcmp(blk,lastblk).res == UINT8_C(0x1) || u8c_strcmp(blk,U"UNDEFINED IN UNICODE").res == UINT8_C(0x1)) { - u8c_strfree(blk); - continue; - } - u8c_strfree(lastblk); - lastblk = u8c_strcp(blk).str; - printf("U+%" PRIXLEAST32 " = ",n); - u8c_println(stdout,blk); - u8c_strfree(blk); - } - u8c_strfree(lastblk); - } - break; - } - /* printf("\n+->\n| \x1b[38:2::61:225:169mDone\x1b[0m testing (%i) \"%s\"!\n+->\n",n,_testnm); */ - printf("\n+->\n| \x1b[38;2;61;225;169mDone\x1b[0m testing #%i \"%s\"!\n+->\n\n",n,_testnm); -} -static char const * testnm(int n) { - switch(n) { - default: - return "N/A"; - case 0x0: - return "Error messages"; - case 0x1: - return "UTF-8 encoding/decoding"; - case 0x2: - return "Printing (u8c_print)"; - case 0x3: - return "Printing (u8c_println)"; - case 0x4: - return "Text formatting"; - case 0x5: - return "Colour text"; - case 0x6: - return "Combining characters"; - case 0x7: - return "String comparison"; - case 0x8: - return "u8c_isalnum"; - case 0x9: - return "u8c_isalpha"; - case 0xA: - return "u8c_isdigit"; - case 0xB: - return "u8c_ispunct"; - case 0xC: - return "UTF-32 string literals"; - case 0xD: - return "String concatenation"; - case 0xE: - return "Sub-strings"; - case 0xF: - return "u8c_isxdigit"; - case 0x10: - return "Printing (u8c_dbgprint)"; - case 0x11: - return "u8c_strfndchr"; - case 0x12: - return "u8c_strfndpat"; - case 0x13: - return "String insertion"; - case 0x14: - return "u8c_islower"; - case 0x15: - return "u8c_isupper"; - case 0x16: - return "u8c_uninm"; - case 0x17: - return "u8c_uniblk"; - } -} -int main(int const argc,char const * * argv) { - if(argc == 0x2 && !strcmp(argv[SIZE_C(0x1)],"--help")) { - exit(hlpscrn(argv[SIZE_C(0x0)])); - } - if(u8c_init().stat) { - printf("Unable to initialise u8c!\n"); - exit(EXIT_FAILURE); - } - u8c_regerrhandl(u8c_errtyp_all,errhandl); - u8c_setfmt(UINT8_C(0xC),UINT8_C(0x1)); - printf("u8c version: %" PRIXLEAST64 "\n",u8c_ver); - printf("Debug build: %s\n",u8c_dbg ? "true" : "false"); - printf("Thread safe: %s\n",u8c_thrdsafe ? "true" : "false"); - if(argc == 0x2) { - test(atoi(argv[SIZE_C(0x1)])); - } - else { - for(int n = 0x0;n <= 0x17;n += 0x1) { - test(n); - } - } - u8c_end(); - exit(EXIT_SUCCESS); -} diff --git a/u8c-check/src/test.cc b/u8c-check/src/test.cc new file mode 100644 index 0000000..3f72a1e --- /dev/null +++ b/u8c-check/src/test.cc @@ -0,0 +1,118 @@ +#if defined(NDEBUG) +#undef NDEBUG +#endif + +#include <chrono> /* std::chrono::duration, std::chrono::high_resolution_clock */ +#include <cstdlib> /* EXIT_FAILURE, EXIT_SUCCESS, std::exit */ +#include <cstring> /* std::strcmp */ +#include <iostream> /* std::cerr, std::cout, std::endl */ +#include <limits> /* std::numeric_limits */ +#include <u8c/u8c> + +# include "test0.inl" +# include "test1.inl" + +static_assert(u8c::abs(-0x1) == 0x1); +static_assert(u8c::abs(-0x1p0) == 0x1p0); +static_assert(u8c::abs(-0x100p0) == 0x100p0); + +static_assert(u8c::fma(0x10,0x10,0x100) == 0x200); + +static_assert(!u8c::isinf(0x0)); +static_assert(u8c::isinf(std::numeric_limits<float>::infinity())); + +static_assert(!u8c::isnan(0x0)); +static_assert(u8c::isnan(std::numeric_limits<float>::quiet_NaN())); + +static_assert(u8c::isprime(0x2u)); +static_assert(u8c::isprime(0x3u)); +static_assert(!u8c::isprime(0x4u)); +static_assert(u8c::isprime(0x5u)); +static_assert(!u8c::isprime(0x6u)); +static_assert(u8c::isprime(0x7u)); +static_assert(!u8c::isprime(0x8u)); +static_assert(!u8c::isprime(0x9u)); +static_assert(!u8c::isprime(0xAu)); +static_assert(u8c::isprime(0xBu)); +static_assert(!u8c::isprime(0xCu)); +static_assert(u8c::isprime(0xDu)); +static_assert(!u8c::isprime(0xEu)); +static_assert(!u8c::isprime(0xFu)); + +static_assert(u8c::pow(0x1,0x10000) == 0x1); +static_assert(u8c::pow(0x2,0x2) == 0x4); +static_assert(u8c::pow(0x2,0x4) == 0x10); +static_assert(u8c::pow(0x2,0x10) == 0x10000); +static_assert(u8c::pow(0x3,0x3) == 0x1B); + +static_assert(u8c::quota(0x1,0x3) < u8c::quota<>::inf()); +static_assert(u8c::quota(0x1,0x3) == u8c::quota(0x2,0x6)); +static_assert(u8c::quota<>::inf() == u8c::quota<>::inf()); +static_assert(u8c::quota<>::nan() != u8c::quota<>::nan()); + +static_assert(u8c::trunc(static_cast<u8c::ubyte>(std::numeric_limits<u8c::byte>::max()) + u8c_uint16c(0x1),u8c_bytec(0x0)) == std::numeric_limits<u8c::byte>::min()); + +static_assert(u8c::cstrlen("This is a string!") == 0x11uz); +static_assert(u8c::cstrlen("Das war ein Befehl!") == 0x13uz); + +static_assert(u8c::cstrcmp("Clang","Clang") == u8c_bytec(0x0)); +static_assert(u8c::cstrcmp("Clang","GCC") == u8c_bytec(0x1)); +static_assert(u8c::cstrcmp("GCC","Clang") == u8c_bytec(-0x1)); +static_assert(u8c::cstrcmp("GCC","GCC") == u8c_bytec(0x0)); + +auto main(int const argc,char const * const * const argv) -> int { + int constexpr maxtestn = 0x1; + auto gettestnm = [](int const _n) { + switch (_n) { + [[unlikely]] default: + return "N/A"; + case 0x0: + return "Array Stress-testing"; + case 0x1: + return "Strings"; + } + }; + auto helpscrn = [&](char const * const _prognm) { + std::cout << "u8c-test: Test u8c" << std::endl; + std::cout << "Usage: " << _prognm << " [test number]" << std::endl; + std::cout << std::endl; + std::cout << "Test numbers:" << std::endl; + for (int n = 0x0;n <= maxtestn;n += 0x1) { + std::cout << "\t " << n << " - \"" << gettestnm(n) << "\"" << std::endl; + } + std::cout << std::endl; + std::cout << "u8c version: " << u8c::ver << std::endl; + }; + auto test = [&](int const _n) { + auto const testnm = gettestnm(_n); + std::cout << ":: \u001B[95mTesting\u001B[0m test #" << _n << " \u001B[3m\"" << testnm << "\"\u001B[0m..." << std::endl << std::endl; + auto begin = std::chrono::high_resolution_clock::now(); + switch (_n) { + [[unlikely]] default: + std::exit(EXIT_FAILURE); + case 0x0: + ::test0(); + break; + case 0x1: + ::test1(); + break; + } + auto const end = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> const tmdiff = end - begin; + std::cout << std::endl << ":: \u001B[96mDone\u001B[0m testing test #" << _n << " \u001B[3m\"" << testnm << "\"\u001B[0m (took " << tmdiff.count() << " seconds)." << std::endl; + }; + if (argc > 0x1) { + if (!std::strcmp(argv[0x1uz],"--help")) { + helpscrn(argv[0x0uz]); + std::exit(EXIT_SUCCESS); + } + else { + std::cerr << "Invalid argument \"\u001B[3m" << argv[0x1uz] << "\"\u001B[0m." << std::endl; + std::exit(EXIT_FAILURE); + } + } + for (int n = 0x0;n <= maxtestn;n += 0x1) { + test(n); + } + std::exit(EXIT_SUCCESS); +} diff --git a/u8c-check/src/test0.inl b/u8c-check/src/test0.inl new file mode 100644 index 0000000..5b4db6c --- /dev/null +++ b/u8c-check/src/test0.inl @@ -0,0 +1,49 @@ +#include <iostream> /* std::cerr, std::endl */ +#include <limits> /* std::numeric_limits */ +#include <random> /* std::random_device, std::uniform_int_distribution */ +#include <u8c/arr> + +auto test0() -> void { + std::cerr << "Constructing array of 256 elements, each with a value of 15..."; + u8c::arr<int> arr(0x100uz,0xF); + u8c_assert(arr.sz() == 0x100uz); + u8c_assert(static_cast<u8c::size>(arr.end() - arr.begin()) == arr.sz()); + for (auto const elm : arr) { + u8c_assert(elm == 0xF); + } + std::cerr << " okay." << std::endl; + std::random_device rd; + { + std::uniform_int_distribution<int> distr(0x0,std::numeric_limits<int>::max()); + for (u8c::byte n = u8c_bytec(0x0);n <= u8c_bytec(0x10);n += u8c_ubytec(0x1)) { + auto const val = distr(rd); + std::cerr << "Filling array with the value of " << val << "..."; + arr.fill(val); + for (auto const elm : arr) { + u8c_assert(elm == val); + } + std::cerr << " okay." << std::endl; + } + } + { + std::uniform_int_distribution<u8c::size> distr(0x1,0xFFF); + for (u8c::byte n = u8c_bytec(0x0);n <= u8c_bytec(0x4);n += u8c_ubytec(0x1)) { + auto const sz = distr(rd); + std::cerr << "Allocating the array to the size of " << sz << "..."; + arr.alloc(sz); + u8c_assert(arr.sz() == sz); + std::cerr << " okay." << std::endl; + } + } + std::cerr << "Doing some additionel tests..."; + arr.alloc(0x2uz); + u8c_assert(arr.sz() == 0x2uz); + arr.fill(0xF); + u8c_assert(arr[0x0uz] == 0xF); + u8c_assert(arr[0x1uz] == 0xF); + arr.realloc(0x4uz); + u8c_assert(arr.sz() == 0x4uz); + u8c_assert(arr[0x0uz] == 0xF); + u8c_assert(arr[0x1uz] == 0xF); + std::cerr << " okay." << std::endl; +} diff --git a/u8c-check/src/test1.inl b/u8c-check/src/test1.inl new file mode 100644 index 0000000..4e2a18e --- /dev/null +++ b/u8c-check/src/test1.inl @@ -0,0 +1,24 @@ +#include <cstdint> /* u8c_uint32c, u8c_ubytec, std::int_fast8_t, u8c::uint32 */ +#include <iomanip> /* std::hex */ +#include <iostream> /* std::cout, std::endl */ +#include <random> /* std::random_device */ + +auto test1() -> void { + std::random_device rd; + std::uniform_int_distribution<char32_t> distr(u8c_uint32c(0x0),u8c_uint32c(0x100)); + for(std::int_fast8_t n = u8c_bytec(0x0);n <= u8c_bytec(0x4);n += u8c_ubytec(0x1)) { + auto const chr = distr(rd); + std::cout << "U+" << std::hex << static_cast<u8c::uint32>(chr) << " (\"" << u8c::uninm(chr) << "\" @ \"" << u8c::uniblk(chr) << "\")" << std::endl; + std::cout << "Is alphanumeric: " << u8c::isalnum(chr) << std::endl; + std::cout << "Is alphabetic: " << u8c::isalpha(chr) << std::endl; + std::cout << "Is control character: " << u8c::iscntrl(chr) << std::endl; + std::cout << "Is digit: " << u8c::isdigit(chr) << std::endl; + std::cout << "Is lowercase: " << u8c::islower(chr) << std::endl; + std::cout << "Is punctuation mark: " << u8c::ispunct(chr) << std::endl; + std::cout << "Is whitespace: " << u8c::isspace(chr) << std::endl; + std::cout << "Is surrogate point: " << u8c::issurro(chr) << std::endl; + std::cout << "Is uppercase: " << u8c::isupper(chr) << std::endl; + std::cout << "Is hexadecimal digit: " << u8c::isxdigit(chr) << std::endl; + std::cout << std::endl; + } +} @@ -1,12 +1,28 @@ <?xml version="1.0" encoding="UTF-8" standalone="no"?> -<svg height="384" version="1.1" width="384" xmlns="http://www.w3.org/2000/svg"> - <rect fill="#444747" height="384" width="384" /> - <circle fill="#00000000" cx="192" cy="192" r="168" stroke="#A9E13D" stroke-width="16" /> - <circle fill="#F8F8F1" cx="192" cy="192" r="144"/> - <circle fill="#444747" cx="192" cy="240" r="96"/> - <circle fill="#F8F8F1" cx="192" cy="240" r="48"/> - <circle fill="#00000000" cx="192" cy="240" r="72" stroke="#A9E13D" stroke-width="16" /> - <circle fill="#444747" cx="192" cy="144" r="96"/> - <circle fill="#F8F8F1" cx="192" cy="144" r="48"/> - <circle fill="#00000000" cx="192" cy="144" r="72" stroke="#A9E13D" stroke-width="16" /> +<svg height="768" version="1.1" width="768" xmlns="http://www.w3.org/2000/svg"> + <clipPath id="clipPath61"> + <rect height="768" width="381" x="387" /> + </clipPath> + <clipPath id="clipPath62"> + <rect height="768" width="768" /> + <!--<rect height="768" rx="192" width="768" />--> <!-- Comment above line and uncomment this one (before this comment) for a background with rounded corners. --> + </clipPath> + <clipPath id="clipPath253"> + <rect height="768" width="384" x="384" /> + </clipPath> + <!-- Background --> + <rect clip-path="url(#clipPath62)" fill="#444747" height="768" width="768" /> + <!--<circle fill="#00000000" cx="384" cy="384" r="336" stroke="#444747" stroke-width="96" />--> <!-- Comment above line and uncomment this one (before this comment) for a transparent background. --> + <!-- White Middle Circle --> + <circle fill="#F8F8F1" cx="384" cy="384" r="288"/> + <!-- Digit Eight --> + <circle fill="#00000000" cx="384" cy="536" r="184" stroke="#444747" stroke-width="96" /> + <circle fill="#00000000" cx="384" cy="232" r="184" stroke="#444747" stroke-width="96" /> + <circle fill="#00000000" cx="384" cy="536" r="184" stroke="#A9E13D" stroke-width="32" /> + <circle fill="#00000000" cx="384" cy="232" r="184" stroke="#444747" stroke-width="64" /> + <circle fill="#00000000" cx="384" cy="232" r="184" stroke="#A9E13D" stroke-width="32" /> + <circle clip-path="url(#clipPath61)" fill="#00000000" cx="384" cy="536" r="184" stroke="#444747" stroke-width="64" /> + <circle clip-path="url(#clipPath253)" fill="#00000000" cx="384" cy="536" r="184" stroke="#A9E13D" stroke-width="32" /> + <!-- Greater Ring --> + <circle fill="#00000000" cx="384" cy="384" r="336" stroke="#A9E13D" stroke-width="32" /> </svg> diff --git a/u8c/include/u8c/arr b/u8c/include/u8c/arr new file mode 100644 index 0000000..81372f6 --- /dev/null +++ b/u8c/include/u8c/arr @@ -0,0 +1,62 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_RMLtbYyYWBICBIbq) +#define u8c_key_RMLtbYyYWBICBIbq + +#include <u8c/misc> + +namespace u8c { + template<typename T> class arr { + public: + constexpr auto alloc( u8c::size num) -> void; + constexpr auto app( u8c::arr<T> const & oth) -> u8c::arr<T> const &; + [[nodiscard]] constexpr arr() noexcept = default; + [[nodiscard]] constexpr arr( T const * begin, T const * end); + [[nodiscard]] constexpr arr( u8c::size num); + [[nodiscard]] constexpr arr( u8c::arr<T> const & oth); + template<u8c::size N> [[nodiscard]] constexpr arr( T const (& arr)[N]) noexcept; + template<typename T0> requires std::convertible_to<T0,T> [[nodiscard]] constexpr arr( T0 val); + template<typename T0> requires std::convertible_to<T0,T> [[nodiscard]] constexpr arr( u8c::size num, T0 val); + [[nodiscard]] constexpr auto begin() const noexcept -> T *; + [[nodiscard]] constexpr auto end() const noexcept -> T *; + template<typename T0> requires std::convertible_to<T0,T> constexpr auto fill( T0 val) -> void; + template<typename T0> requires std::convertible_to<T0,T> constexpr auto fill( T * begin, T * end,T0 val) -> void; + [[nodiscard]] constexpr auto isstatic() const noexcept -> bool; + constexpr auto operator = ( u8c::arr<T> const & oth) -> u8c::arr<T> const &; + [[nodiscard]] constexpr auto operator [] (u8c::size pos) const noexcept -> T &; + constexpr auto realloc( u8c::size num) -> void; + constexpr auto set( T const * begin, T const * end) -> void; + constexpr auto set( u8c::arr<T> const & oth) -> void; + template<u8c::size N> constexpr auto set( T const (& arr)[N]) noexcept -> void; + template<typename T0> requires std::convertible_to<T0,T> constexpr auto set( T0 val) -> void; + [[nodiscard]] constexpr auto sub( T const * begin, T const * end) const -> u8c::arr<T>; + [[nodiscard]] constexpr auto sz() const noexcept -> u8c::size; + constexpr ~arr() noexcept; + constexpr static auto npos = -0x1uz; + private: + bool _isstatic = false; + T * _ptr = nullptr; + u8c::size _sz = 0x0uz; + }; +} + +#include <u8c/arr.d/arr> + +#endif diff --git a/u8c/include/u8c/arr.d/arr b/u8c/include/u8c/arr.d/arr new file mode 100644 index 0000000..a0af8b7 --- /dev/null +++ b/u8c/include/u8c/arr.d/arr @@ -0,0 +1,159 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_zQ92KNf0pxkz48g2) +#define u8c_key_zQ92KNf0pxkz48g2 + +#include <algorithm> /* std::copy, std::fill */ +#include <cstdlib> /* std::abort */ +#include <iostream> /* std::cerr, std::endl */ +#include <stdexcept> /* std::invalid_argument, std::out_of_range */ +#include <type_traits> /* std::is_constant_evaluated */ + +template<typename T> constexpr auto u8c::arr<T>::alloc(u8c::size const _num) -> void { + if (this->isstatic()) [[unlikely]] { + this->_isstatic = false; + } + else { + ::delete[] this->_ptr; + } + this->_ptr = ::new T[_num]; + this->_sz = _num; +} +template<typename T> constexpr auto u8c::arr<T>::app(u8c::arr<T> const & _oth) -> u8c::arr<T> const & { + this->realloc(this->sz() + _oth.sz()); + std::copy(_oth.begin(),_oth.end(),this->begin() + this->sz() - _oth.sz()); + return *this; +} +template<typename T> constexpr u8c::arr<T>::arr(T const * const _begin,T const * const _end) { + this->set(_begin,_end); +} +template<typename T> constexpr u8c::arr<T>::arr(u8c::size const _num) { + this->alloc(_num); +} +template<typename T> constexpr u8c::arr<T>::arr(u8c::arr<T> const & _oth) { + this->set(_oth); +} +template<typename T> template<u8c::size N> constexpr u8c::arr<T>::arr(T const (&_arr)[N]) noexcept { + this->set(_arr); +} +template<typename T> template<typename T0> requires std::convertible_to<T0,T> constexpr u8c::arr<T>::arr(T0 const _val) { + this->set(_val); +} +template<typename T> template<typename T0> requires std::convertible_to<T0,T> constexpr u8c::arr<T>::arr(u8c::size const _num,T0 const _val) { + this->alloc(_num); + this->fill(this->begin(),this->end(),_val); +} +template<typename T> constexpr auto u8c::arr<T>::begin() const noexcept -> T * { + return this->_ptr; +} +template<typename T> constexpr auto u8c::arr<T>::end() const noexcept -> T * { + return this->begin() + this->_sz; +} +template<typename T> template<typename T0> requires std::convertible_to<T0,T> constexpr auto u8c::arr<T>::fill(T0 const _val) -> void { + this->fill(this->begin(),this->end(),_val); +} +template<typename T> template<typename T0> requires std::convertible_to<T0,T> constexpr auto u8c::arr<T>::fill(T * const u8c_restr _begin,T * const u8c_restr _end,T0 const _val) -> void { + if (this->sz() == 0x0uz) [[unlikely]] { + return; /* slime incident */ + } + if (_begin < this->begin() || _end > this->end()) [[unlikely]] { + throw std::out_of_range("Beginning or end are out of this array's range."); + } + if (this->isstatic()) [[unlikely]] { + this->alloc(static_cast<u8c::size>(_end - _begin + 0x1uz)); + } + std::fill(this->begin(),this->end(),static_cast<T>(_val)); +} +template<typename T> constexpr auto u8c::arr<T>::isstatic() const noexcept -> bool { + return this->_isstatic; +} +template<typename T> constexpr auto u8c::arr<T>::operator = (u8c::arr<T> const & _oth) -> u8c::arr<T> const & { + this->set(_oth); + return *this; +} +template<typename T> constexpr auto u8c::arr<T>::operator [] (u8c::size const _pos) const noexcept -> T & { + if constexpr (u8c::dbg) { + if (_pos > this->sz()) [[unlikely]] { + //std::cerr << "u8c :: " << std::source_location::current().function_name() << " :: Input parameter is out of range." << std::endl; + std::cerr << "u8c :: " << __func__ << " :: Input parameter is out of range." << std::endl; + std::abort(); + } + } + return this->begin()[_pos]; +} +template<typename T> constexpr auto u8c::arr<T>::realloc(u8c::size const _num) -> void { + if (this->sz() == 0x0uz) [[unlikely]] { + return this->alloc(_num); + } + if (this->isstatic()) [[unlikely]] { + this->_isstatic = false; + } + this->_ptr = u8c::renew(this->begin(),this->sz(),_num); + this->_sz = _num; +} +template<typename T> constexpr auto u8c::arr<T>::set(T const * const u8c_restr _begin,T const * const u8c_restr _end) -> void { + if constexpr (u8c::dbg) { + if (_begin == nullptr || _end == nullptr) [[unlikely]] { + throw std::invalid_argument("Provided parameter has value of nullptr."); + } + } + this->alloc(static_cast<u8c::size>(_end - _begin + 0x1uz)); + std::copy(_begin,_end,this->begin()); +} +template<typename T> constexpr auto u8c::arr<T>::set(u8c::arr<T> const & _oth) -> void { + this->set(_oth.begin(),_oth.end()); +} +template<typename T> template<u8c::size N> constexpr auto u8c::arr<T>::set(T const (&_arr)[N]) noexcept -> void { + this->~arr(); + this->_isstatic = true; + this->_ptr = _arr; + this->_sz = N; +} +template<typename T> template<typename T0> requires std::convertible_to<T0,T> constexpr auto u8c::arr<T>::set(T0 const _val) -> void { + this->alloc(0x1uz); + *this->begin() = _val; +} +template<typename T> constexpr auto u8c::arr<T>::sub(T const * const u8c_restr _begin,T const * const u8c_restr _end) const -> u8c::arr<T> { + if (_begin < this->begin() || _end > this->end()) [[unlikely]] { + throw std::out_of_range("Beginning or end are out of this array's range."); + } + u8c::size const sz = static_cast<u8c::size>(_end - _begin) + 0x1uz; + u8c::arr<T> arr; + if (this->isstatic()) [[unlikely]] { + arr._sz = sz; + arr._ptr = _begin; + } + else { + arr.alloc(sz); + std::copy(_begin,_end,arr.begin()); + } + return arr; +} +template<typename T> constexpr auto u8c::arr<T>::sz() const noexcept -> u8c::size { + return this->_sz; +} +template<typename T> constexpr u8c::arr<T>::~arr<T>() noexcept { + if (this->isstatic()) { + return; + } + ::delete[] this->_ptr; +} + +#endif diff --git a/u8c/include/u8c/cstr b/u8c/include/u8c/cstr new file mode 100644 index 0000000..f8f5184 --- /dev/null +++ b/u8c/include/u8c/cstr @@ -0,0 +1,37 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_gM1GPEGwZN8BgcwU) +#define u8c_key_gM1GPEGwZN8BgcwU + +#include <u8c/misc> + +namespace u8c { + constexpr auto cstrcmp(char const * lstr,char const * rstr) noexcept -> u8c::byte; + constexpr auto cstrcpy(char * dest,char const * src) noexcept -> char *; + constexpr auto cstrdup(char const * str) -> char *; + constexpr auto cstrlen(char const * str) noexcept -> u8c::size; +} + +#include <u8c/cstr.d/cstrcmp> +#include <u8c/cstr.d/cstrcpy> +#include <u8c/cstr.d/cstrdup> +#include <u8c/cstr.d/cstrlen> + +#endif diff --git a/u8c/include/u8c/cstr.d/cstrcmp b/u8c/include/u8c/cstr.d/cstrcmp new file mode 100644 index 0000000..9615ad2 --- /dev/null +++ b/u8c/include/u8c/cstr.d/cstrcmp @@ -0,0 +1,43 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_hQ3g8CRKOedpjvM7) +#define u8c_key_hQ3g8CRKOedpjvM7 + + +#include <algorithm> /* std::min */ + +constexpr auto u8c::cstrcmp(char const * const u8c_restr _lstr,char const * const u8c_restr _rstr) noexcept -> u8c::byte { + auto const maxn = std::min(u8c::cstrlen(_lstr),u8c::cstrlen(_rstr)); + for (auto n = 0x0uz;n <= maxn;n += 0x1uz) { + auto const lchr = _lstr[n]; + auto const rchr = _rstr[n]; + if (lchr != rchr) [[unlikely]] { + if (lchr > rchr) { + return u8c_bytec(-0x1); + } + if (lchr < rchr) { + return u8c_bytec(0x1); + } + } + } + return u8c_bytec(0x0); +} + +#endif diff --git a/u8c/include/u8c/cstr.d/cstrcpy b/u8c/include/u8c/cstr.d/cstrcpy new file mode 100644 index 0000000..37cad96 --- /dev/null +++ b/u8c/include/u8c/cstr.d/cstrcpy @@ -0,0 +1,34 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_MvAfxuZelp52mHk5) +#define u8c_key_MvAfxuZelp52mHk5 + +#include <algorithm> /* std::copy */ + +constexpr auto u8c::cstrcpy(char * const u8c_restr _dest,char const * const u8c_restr _src) noexcept -> char * { + auto const sz = u8c::cstrlen(_src); + if (sz == 0x0uz) [[unlikely]] { + return _dest; + } + std::copy(_src,_src + sz - 0x1uz,_dest); + return _dest; +} + +#endif diff --git a/u8c/include/u8c/cstr.d/cstrdup b/u8c/include/u8c/cstr.d/cstrdup new file mode 100644 index 0000000..d37f03b --- /dev/null +++ b/u8c/include/u8c/cstr.d/cstrdup @@ -0,0 +1,29 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_vf9vDNmIndanmgPg) +#define u8c_key_vf9vDNmIndanmgPg + +constexpr auto u8c::cstrdup(char const * const u8c_restr _str1) -> char * { + auto const sz = u8c::cstrlen(_str1); + auto * const u8c_restr str = ::new char[sz]; + return u8c::cstrcpy(str,_str1);; +} + +#endif diff --git a/u8c/include/u8c/cstr.d/cstrlen b/u8c/include/u8c/cstr.d/cstrlen new file mode 100644 index 0000000..2f7cb5a --- /dev/null +++ b/u8c/include/u8c/cstr.d/cstrlen @@ -0,0 +1,34 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_2yXSG12yvwzu2XCt) +#define u8c_key_2yXSG12yvwzu2XCt + +constexpr auto u8c::cstrlen(char const * const u8c_restr _str) noexcept -> u8c::size { + auto sz = 0x0uz; + for (u8c::size n = 0x0uz;;n += 0x1uz) { + if (_str[n] == '\u0000') [[unlikely]] { + break; + } + sz += 0x1uz; + } + return sz; +} + +#endif diff --git a/u8c/include/u8c/impl b/u8c/include/u8c/impl new file mode 100644 index 0000000..af6a2c4 --- /dev/null +++ b/u8c/include/u8c/impl @@ -0,0 +1,33 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_9y1ZpGLV5Chmuy9U) +#define u8c_key_9y1ZpGLV5Chmuy9U + +#include <u8c/cstr> + +namespace u8c { + [[nodiscard]] consteval auto isarch(char const * arch) noexcept -> bool; + [[nodiscard]] consteval auto isos( char const * os) noexcept -> bool; +} + +#include <u8c/impl.d/isarch> +#include <u8c/impl.d/isos> + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/impl.d/isarch b/u8c/include/u8c/impl.d/isarch new file mode 100644 index 0000000..33b0faa --- /dev/null +++ b/u8c/include/u8c/impl.d/isarch @@ -0,0 +1,90 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_HeaDXGqHoIcCdWWR) +#define u8c_key_HeaDXGqHoIcCdWWR + +consteval auto u8c::isarch([[maybe_unused]] char const * const u8c_restr _arch) noexcept -> bool { + if (!u8c::cstrcmp(_arch,"alpha")) { +#if defined(__alpha) || defined(__alpha__) || defined(_M_ALPHA) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"arm")) { +#if defined(__TARGET_ARCH_ARM) || defined(__TARGET_ARCH_THUMB) || defined(__arm) || defined(__arm__) || defined(__thumb__) || defined(_M_ARM) || defined(_M_ARMT) + return true; +#else + return false; + } +#endif + if (!u8c::cstrcmp(_arch,"itanium")) { +#if defined(__IA64__) || defined(__ia64__) || defined(__itanium__) || defined(_IA64) || defined(_M_IA64) + return true; +#else + return false; + } +#endif + if (!u8c::cstrcmp(_arch,"m68k")) { +#if defined(__MC68K__) || defined(__m68k__) || defined(M68000) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"mips")) { +#if defined(__MIPS__) || defined(__mips) || defined(__mips__) || defined(mips) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"pa-risc")) { +#if defined(__HPPA__) || defined(__hppa) || defined(__hppa__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"ppc")) { +#if defined(__POWERPC__) || defined(__ppc) || defined(__PPC__) || defined(__powerpc) || defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(_M_PPC) || defined(_XENON) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"sparc")) { +#if defined(__sparc) || defined(__sparc__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"x86")) { +#if defined(__386) || defined(__I86__) || defined(__IA32__) || defined(__INTEL__) || defined(__THW_INTEL__) || defined(__X86__) || defined(__i386) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_I86) || defined(_M_IX86) || defined(_X86_) ||defined(i386) + return true; +#else + return false; +#endif + } + return false; +} + +#endif diff --git a/u8c/include/u8c/impl.d/isos b/u8c/include/u8c/impl.d/isos new file mode 100644 index 0000000..06a6934 --- /dev/null +++ b/u8c/include/u8c/impl.d/isos @@ -0,0 +1,258 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_ACd4FIGZ23h2QNrU) +#define u8c_key_ACd4FIGZ23h2QNrU + +#if __has_include(<sys/param.h>) +#include <sys/param.h> /* BSD */ +#endif +#if __has_include(<unistd.h>) +#include <unistd.h> /* _POSIX_VERSION */ +#endif + +consteval auto u8c::isos(char const * const u8c_restr _os) noexcept -> bool { + if (!u8c::cstrcmp(_os,"aix")) { +#if defined(__TOS_AIX__) || defined(_AIX) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"amigaos")) { +#if defined(__amigaos__) || defined(AMIGA) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"android")) { +#if defined(__ANDROID__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"beos")) { +#if defined(__BEOS__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"bluegene")) { +#if defined(__THW_BLUEGENE__) || defined(__TOS_BGQ__) || defined(__bg__) || defined(__bgq__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"bsd")) { +#if defined(_SYSTYPE_BSD) || defined(BSD) + return true; +#else + return u8c::isos("bsdos") || u8c::isos("dragonflybsd") || u8c::isos("freebsd") || u8c::isos("netbsd") || u8c::isos("openbsd"); +#endif + } + if (!u8c::cstrcmp(_os,"bsdos")) { +#if defined(__bsdi__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"dragonflybsd")) { +#if defined(__DragonFly__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"freebsd")) { +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"hpux")) { +#if defined(__hpux) || defined(_hpux) || defined(hpux) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"hurd")) { +#if defined(__GNU__) || defined(__gnu_hurd__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"ibmi")) { +#if defined(__OS400__) || defined(__OS400_TGTVRM__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"integrity")) { +#if defined(__INTEGRITY) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"interix")) { +#if defined(__INTERIX) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"irix")) { +#if defined(__sgi) || defined(sgi) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"linux")) { +#if defined(__linux) || defined(__linux__) || defined(linux) || defined(u8c_os_android) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"mac")) { +#if (defined(__APPLE__) && defined(__MACH__)) || defined(Macintosh) || defined(macintosh) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"minix")) { +#if defined(__minix) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"msdos")) { +#if defined(__DOS__) || defined(__MSDOS__) || defined(_MSDOS) || defined(MSDOS) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"netbsd")) { +#if defined(__NetBSD__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"nonstop")) { +#if defined(__TANDEM) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"openbsd")) { +#if defined(__OpenBSD__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"os2")) { +#if defined(__OS2__) || defined(__TOS_OS2__) || defined(_OS2) || defined(OS2) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"palmos")) { +#if defined(__palmos__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"qnx")) { +#if defined(__QNX__) || defined(__QNXNTO__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"solaris")) { +#if (defined(__SVR4) || defined(__svr4__) || defined(__sysv__) || defined(_SYSTYPE_SVR4)) && (defined(__sun) || defined(sun)) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"sun")) { +#if defined(__sun) || defined(sun) + return !u8c::isos("solaris"); +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"unicos")) { +#if defined(_UNICOS) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"unix")) { +#if defined(__unix) || defined(__unix__) || defined(_POSIX_VERSION) || defined(u8c_os_aix) || defined(u8c_os_android)|| defined(u8c_os_bsd) || defined(u8c_os_hpux) || defined(u8c_os_hurd) || defined(u8c_os_linux) || defined(u8c_os_mac) || defined(u8c_os_minix) || defined(u8c_os_solaris) || defined(u8c_os_sun) || defined(u8c_os_unicos) || defined(u8c_os_unixware) || defined(unix) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"unixware")) { +#if defined(sco) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"win")) { +#if defined(__TOS_WIN__) || defined(__WIN32__) || defined(__WINDOWS__) || defined(_WIN16) || defined(_WIN32) || defined(_WIN32_CE) || defined(_WIN64) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"zos")) { +#if defined(__HOS_MVS__) || defined(__MVS__) || defined(__TOS_MVS__) + return true; +#else + return false; +#endif + } + return false; +} + +#endif diff --git a/u8c/include/u8c/math b/u8c/include/u8c/math new file mode 100644 index 0000000..2b12906 --- /dev/null +++ b/u8c/include/u8c/math @@ -0,0 +1,80 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_vm8mXaTP2bcUVL49) +#define u8c_key_vm8mXaTP2bcUVL49 + +#include <compare> /* std::partial_ordering */ +#include <concepts> /* std::convertible_to, std::floating_point, std::integral, std::signed_integral */ +#include <type_traits> /* std::is_arithmetic_v, std::is_same_v */ +#include <u8c/impl> + +namespace u8c { + template<std::signed_integral T = int> class quota { + public: + [[nodiscard]] constexpr static auto inf() noexcept -> u8c::quota<T>; + [[nodiscard]] constexpr auto isinf() const noexcept -> bool; + [[nodiscard]] constexpr auto isnan() const noexcept -> bool; + [[nodiscard]] constexpr auto lower() const noexcept -> T; + [[nodiscard]] constexpr static auto nan() noexcept -> u8c::quota<T>; + [[nodiscard]] constexpr auto upper() const noexcept -> T; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator + (u8c::quota<T0> const & oth) const noexcept -> u8c::quota<T>; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator - (u8c::quota<T0> const & oth) const noexcept -> u8c::quota<T>; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator <=> (u8c::quota<T0> const & oth) const noexcept -> std::partial_ordering; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator = ( u8c::quota<T0> const & oth) const noexcept -> bool; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator == ( u8c::quota<T0> const & oth) const noexcept -> bool; + template<std::integral T0> [[nodiscard]] constexpr operator T0 () const noexcept; + [[nodiscard]] constexpr quota() noexcept = default; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr quota( T0 val) noexcept; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr quota( u8c::quota<T0> const & oth) noexcept; + template<std::signed_integral T0> [[nodiscard]] constexpr quota( T0 upper,T0 lower) noexcept; + constexpr ~quota() noexcept = default; + private: + u8c::ubyte _flags = false; + T _lower = T{0x0}; + T _upper = T{0x0}; + + }; + template<typename T,typename T0 = void> concept arith = std::is_arithmetic_v<T> || std::is_same_v<T,u8c::quota<T0>>; + template<typename T> [[u8c_attr_const]] constexpr auto abs( u8c::quota<T> val) noexcept -> u8c::quota<T>; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto abs( T val) noexcept -> T; + template<typename T> [[u8c_attr_const]] constexpr auto fma( u8c::quota<T> x, u8c::quota<T> y, u8c::quota<T> z) noexcept -> u8c::quota<T>; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto fma( T x, T y, T z) noexcept -> T; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto isinf( T val) noexcept -> bool; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto isnan( T val) noexcept -> bool; + template<typename T> [[u8c_attr_const]] constexpr auto isprime(u8c::quota<T> val) noexcept -> bool; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto isprime(T val) noexcept -> bool; + template<typename T> [[u8c_attr_const]] constexpr auto pow( u8c::quota<T> base,u8c::quota<T> exp) noexcept -> u8c::quota<T>; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto pow( T base,T exp) noexcept -> T; + template<typename T> [[u8c_attr_const]] constexpr auto sqrt( u8c::quota<T> val) noexcept -> u8c::quota<T>; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto sqrt( T val) noexcept -> T; + template<std::integral T,std::integral T0> [[u8c_attr_const]] constexpr auto trunc( T val, T0) noexcept -> T0; +} + +#include <u8c/math.d/abs> +#include <u8c/math.d/fma> +#include <u8c/math.d/isinf> +#include <u8c/math.d/isnan> +#include <u8c/math.d/isprime> +#include <u8c/math.d/pow> +#include <u8c/math.d/quota> +#include <u8c/math.d/sqrt> +#include <u8c/math.d/trunc> + +#endif diff --git a/u8c/include/u8c/math.d/abs b/u8c/include/u8c/math.d/abs new file mode 100644 index 0000000..53eaba3 --- /dev/null +++ b/u8c/include/u8c/math.d/abs @@ -0,0 +1,37 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_DHlzI0Min44ZJ3qF) +#define u8c_key_DHlzI0Min44ZJ3qF + +#include <type_traits> /* std::is_unsigned_v */ + +template<u8c::arith T> constexpr auto u8c::abs(T const _val) noexcept -> T { + if constexpr (std::is_unsigned_v<T>) { + return _val; + } + else { + if (_val < T{0x0}) { + return -_val; + } + return _val; + } +} + +#endif diff --git a/u8c/include/u8c/math.d/fma b/u8c/include/u8c/math.d/fma new file mode 100644 index 0000000..e2b756d --- /dev/null +++ b/u8c/include/u8c/math.d/fma @@ -0,0 +1,27 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_3Xt4uPu34bdh53dB) +#define u8c_key_3Xt4uPu34bdh53dB + +template<u8c::arith T> constexpr auto u8c::fma(T const _x,T const _y,T const _z) noexcept -> T { + return _x * _y + _z; +} + +#endif diff --git a/u8c/include/u8c/math.d/isinf b/u8c/include/u8c/math.d/isinf new file mode 100644 index 0000000..e77793a --- /dev/null +++ b/u8c/include/u8c/math.d/isinf @@ -0,0 +1,32 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_aSzgnLkMAeJF6xFF) +#define u8c_key_aSzgnLkMAeJF6xFF + +#include <limits> /* std::numeric_limits */ + +template<u8c::arith T> constexpr auto u8c::isinf(T const _val) noexcept -> bool { + if (std::numeric_limits<T>::has_infinity) { + return _val == std::numeric_limits<T>::infinity(); + } + return false; +} + +#endif diff --git a/u8c/include/u8c/math.d/isnan b/u8c/include/u8c/math.d/isnan new file mode 100644 index 0000000..9e90d12 --- /dev/null +++ b/u8c/include/u8c/math.d/isnan @@ -0,0 +1,62 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_0RXxS4xdwMkbkEi6) +#define u8c_key_0RXxS4xdwMkbkEi6 + +#include <limits> /* std::numeric_limits */ + +template<u8c::arith T> constexpr auto u8c::isnan(T const _val) noexcept -> bool { + if constexpr (std::numeric_limits<T>::has_quiet_NaN) { + if constexpr (std::numeric_limits<T>::is_iec559) { + return _val != _val; + } + else { +#if defined(__cpp_if_consteval) + if consteval { + return _val != _val; + } + else { + u8c::ubyte * const u8c_restr nanval = nullptr; + u8c::ubyte * const u8c_restr valval = nullptr; + { + auto const tmp = std::numeric_limits<T>::quiet_NaN(); + nanval = reinterpret_cast<unsigned char *>(&tmp); + } + { + auto const tmp = _val; + valval = reinterpret_cast<unsigned char *>(&tmp); + } + for (std::size_t n = 0x0uz;n < sizeof(T);n += 0x1uz) { + if (valval[n] != nanval[n]) { + return false; + } + } + } +#else + return _val != _val; +#endif + } + } + else { + return false; + } +} + +#endif diff --git a/u8c/include/u8c/math.d/isprime b/u8c/include/u8c/math.d/isprime new file mode 100644 index 0000000..5074d4b --- /dev/null +++ b/u8c/include/u8c/math.d/isprime @@ -0,0 +1,35 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_rrAoPS9LdRWHCbsB) +#define u8c_key_rrAoPS9LdRWHCbsB + +template<u8c::arith T> constexpr auto u8c::isprime(T const _val) noexcept -> bool { + if (_val <= T{0x1}) [[unlikely]] { + return false; + } + for (T iter = T{0x2};iter < _val / T{0x2} + T{0x1};iter += T{0x1}) { + if (_val % iter == T{0x0}) [[unlikely]] { + return false; + } + } + return true; +} + +#endif diff --git a/u8c/include/u8c/math.d/pow b/u8c/include/u8c/math.d/pow new file mode 100644 index 0000000..53410b8 --- /dev/null +++ b/u8c/include/u8c/math.d/pow @@ -0,0 +1,40 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_JD8l4B23bwAqQ2HP) +#define u8c_key_JD8l4B23bwAqQ2HP + +#include <type_traits> /* std::is_floating_point_v */ + +template<u8c::arith T> constexpr auto u8c::pow(T const _base,T const _exp) noexcept -> T { + //if constexpr (std::is_floating_point_v<T>) { + //} + //else { + if (u8c::abs(_base) <= T{0x1}) [[unlikely]] { + return _base; + } + T res = _base; + for (T iter = T{0x1};iter < _exp;iter += T{0x1}) { + res *= _base; + } + return res; + //} +} + +#endif diff --git a/u8c/include/u8c/math.d/quota b/u8c/include/u8c/math.d/quota new file mode 100644 index 0000000..3ea9350 --- /dev/null +++ b/u8c/include/u8c/math.d/quota @@ -0,0 +1,94 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_k92caE4RpzrErnKD) +#define u8c_key_k92caE4RpzrErnKD + +#include <compare> /* std::partial_ordering */ +#include <concepts> /* std:convertible_to, std::integral, std::signed_integral */ + +template<std::signed_integral T> constexpr auto u8c::quota<T>::inf() noexcept -> u8c::quota<T> { + auto tmp = u8c::quota<T>(); + tmp._flags = u8c_ubytec(0b10); + return tmp; +} +template<std::signed_integral T> constexpr auto u8c::quota<T>::isinf() const noexcept -> bool { + return (this->_flags & u8c_ubytec(0b10)) == u8c_ubytec(0b10); +} +template<std::signed_integral T> constexpr auto u8c::quota<T>::isnan() const noexcept -> bool { + return (this->_flags & u8c_ubytec(0b1)) == u8c_ubytec(0b1); +} +template<std::signed_integral T> constexpr auto u8c::quota<T>::lower() const noexcept -> T { + return this->_lower; +} +template<std::signed_integral T> constexpr auto u8c::quota<T>::nan() noexcept -> u8c::quota<T> { + auto tmp = u8c::quota<T>(); + tmp._flags = u8c_ubytec(0b1); + return tmp; +} +template<std::signed_integral T> constexpr auto u8c::quota<T>::upper() const noexcept -> T { + return this->_upper; +} +template<std::signed_integral T> template<std::convertible_to<T> T0> constexpr auto u8c::quota<T>::operator <=> (u8c::quota<T0> const & _oth) const noexcept -> std::partial_ordering { + if (this->isinf()) { + return std::partial_ordering::greater; + } + if (_oth.isinf()) { + return std::partial_ordering::less; + } + if (this->isnan() || _oth.isnan()) [[unlikely]] { + return std::partial_ordering::unordered; + } + auto const tmp0 = this->upper() * _oth.lower(); + auto const tmp1 = _oth.upper() * this->lower(); + return tmp0 <=> tmp1; +} +template<std::signed_integral T> template<std::convertible_to<T> T0> constexpr auto u8c::quota<T>::operator == (u8c::quota<T0> const & _oth) const noexcept -> bool { + if (this->isinf() && _oth.isinf()) [[unlikely]] { + return true; + } + if (this->isnan() || _oth.isnan()) [[unlikely]] { + return false; + } + if (this->_flags != _oth._flags) [[unlikely]] { + return false; + } + return this->upper() * _oth.lower() == _oth.upper() * this->lower(); +} +template<std::signed_integral T> template<std::integral T0> constexpr u8c::quota<T>::operator T0 () const noexcept { + return u8c::trunc<T0>(this->_upper / this->_lower); +} +template<std::signed_integral T> template<std::convertible_to<T> T0> constexpr u8c::quota<T>::quota(T0 const _val) noexcept { + if (u8c::isnan(_val)) [[unlikely]] { + this->_flags |= u8c_ubytec(0b1); + } + else if (u8c::isinf(_val)) [[unlikely]] { + this->_flags |= u8c_ubytec(0b10); + } + else { + this->_upper = T{_val}; + this->_lower = T{0x1}; + } +} +template<std::signed_integral T> template<std::signed_integral T0> constexpr u8c::quota<T>::quota(T0 const _upper,T0 const _lower) noexcept { + this->_upper = _upper; + this->_lower = _lower; +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/math.d/sqrt b/u8c/include/u8c/math.d/sqrt new file mode 100644 index 0000000..d1eb5e1 --- /dev/null +++ b/u8c/include/u8c/math.d/sqrt @@ -0,0 +1,42 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_GPVreQYtljZ3JsPu) +#define u8c_key_GPVreQYtljZ3JsPu + +#include <limits> /* std::numeric_limits */ +#include <type_traits> /* std::is_integral_v */ + +template<u8c::arith T> constexpr auto u8c::sqrt(T const _val) noexcept -> T { + if (_val < T{0x0}) [[unlikely]] { + return std::numeric_limits<T>::quiet_NaN(); + } + if (_val == T{0x0}) [[unlikely]] { + return _val; + } + /*constexpr auto err = []() { + if constexpr(std::is_integral_v<T>) { + return T{0x1}; + } + return std::numeric_limits<T>::epsilon(); + }();*/ + return _val; +} + +#endif diff --git a/u8c/include/u8c/math.d/trunc b/u8c/include/u8c/math.d/trunc new file mode 100644 index 0000000..d4ff731 --- /dev/null +++ b/u8c/include/u8c/math.d/trunc @@ -0,0 +1,36 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_cQ6WEZj1q8fhHahe) +#define u8c_key_cQ6WEZj1q8fhHahe + +#include <concepts> /* std::integral */ +#include <limits> /* std::numeric_limits */ +#include <type_traits> /* std::make_unsigned_t */ + +template<std::integral T,std::integral T0> constexpr auto u8c::trunc(T const _val,T0) noexcept -> T0 { + if constexpr (std::numeric_limits<T0>::is_modulo) { + return static_cast<T0>(_val); + } + else { + return static_cast<T0>(static_cast<std::make_unsigned_t<T0>>(_val)); + } +} + +#endif diff --git a/u8c/include/u8c/misc b/u8c/include/u8c/misc new file mode 100644 index 0000000..f7deca1 --- /dev/null +++ b/u8c/include/u8c/misc @@ -0,0 +1,132 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_4grZQR1RdsRJL14e) +#define u8c_key_4grZQR1RdsRJL14e + +#include <climits> /* CHAR_BIT */ +#include <cstdint> /* std::int_least16_t, std::int_least32_t, std::int_least64_t, std::int_least8_t, std::intmax_t, std::uint_least16_t, std::uint_least32_t, std::uint_least64_t, std::uint_least8_t, std::uintmax_t */ +#include <cstdlib> /* std::abort */ +#include <iostream> /* std::cerr, std::endl */ + +#define u8c_assert(expr) \ + if constexpr (!u8c::dbg) { \ + /*if consteval { \ + static_assert(expr); \ + } \ + else*/ if (!(expr)) [[unlikely]] { \ + std::cerr << "u8c :: \"" << __FILE__ "\" @ " << __LINE__ << ": Assertion failed: expression \"" << #expr << "\" evaluates to false." << std::endl; \ + std::abort(); \ + } \ + } \ + +#if defined(__GNUC__) || defined(__clang__) +#define u8c_attr_abitag(...) gnu::abi_tag(__VA_ARGS__) +#define u8c_attr_allocsz(...) gnu::alloc_size(__VA_ARGS__) +#define u8c_attr_artif gnu::artificial +#define u8c_attr_cold gnu::cold +#define u8c_attr_const gnu::const +#define u8c_attr_fmt gnu::format +#define u8c_attr_malloc gnu::malloc +#define u8c_attr_nonnull(...) gnu::nonnull(__VA_ARGS__) +#define u8c_attr_hot gnu::hot +#define u8c_attr_inline gnu::always_inline +#define u8c_attr_pure gnu::pure +#define u8c_attr_retnonnull gnu::returns_nonnull +#define u8c_attr_sect gnu::section +#define u8c_attr_used gnu::used +#else +#define u8c_attr_abitag(...) +#define u8c_attr_allocsz(...) +#define u8c_attr_artif +#define u8c_attr_cold +#define u8c_attr_const +#define u8c_attr_fmt +#define u8c_attr_hot +#define u8c_attr_inline +#define u8c_attr_malloc +#define u8c_attr_nonnull(...) +#define u8c_attr_pure +#define u8c_attr_retnonnull +#define u8c_attr_sect +#define u8c_attr_used +#endif +#if defined(__clang__) +#define u8c_attr_noderef clang::noderef +#define u8c_attr_nodup clang::noduplicate +#define u8c_attr_noesc(...) clang::noescape(__VA_ARGS__) +#else +#define u8c_attr_noderef +#define u8c_attr_nodup +#define u8c_attr_noesc(...) +#endif + +#define u8c_bytec(expr) (static_cast<u8c::byte>(INT8_C(expr))) +#define u8c_int16c(expr) (static_cast<u8c::int16>(INT16_C(expr))) +#define u8c_int32c(expr) (static_cast<u8c::int32>(INT32_C(expr))) +#define u8c_int64c(expr) (static_cast<u8c::int64>(INT64_C(expr))) +#define u8c_intmaxc(expr) (static_cast<u8c::intmax>(INTMAX_C(expr))) +#define u8c_ubytec(expr) (static_cast<u8c::ubyte>(UINT8_C(expr))) +#define u8c_uint16c(expr) (static_cast<u8c::uint16>(UINT16_C(expr))) +#define u8c_uint32c(expr) (static_cast<u8c::uint32>(UINT32_C(expr))) +#define u8c_uint64c(expr) (static_cast<u8c::uint64>(UINT64_C(expr))) +#define u8c_uintmaxc(expr) (static_cast<u8c::uintmax>(UINTMAX_C(expr))) + +#if defined(__GNUC__) || defined(__clang__) +#define u8c_restr __restrict__ +#elif defined(__INTEL_COMPILER) || defined(_MSC_VER) +#define u8c_restr __restrict +#else +#define u8c_restr +#endif + +namespace u8c { + using byte = signed char; + using int16 = std::int_least16_t; + using int32 = std::int_least32_t; + using int64 = std::int_least64_t; + using intmax = std::intmax_t; + using size = decltype(0x0uz); + using ssize = decltype(0x0z); + using ubyte = unsigned char; + using uint16 = std::uint_least16_t; + using uint32 = std::uint_least32_t; + using uint64 = std::uint_least64_t; + using uintmax = std::uintmax_t; + template<typename T> concept utf = std::is_same_v<T,char16_t> || std::is_same_v<T,char32_t> || std::is_same_v<T,char8_t>; + enum class endi : bool { + big = true, + little = false, + }; + template<typename T> [[nodiscard,u8c_attr_allocsz(0x3),u8c_attr_malloc,u8c_attr_nonnull(0x1)]] constexpr auto renew(T * ptr,u8c::size sz,u8c::size newsz) -> T *; + template<typename T> constexpr auto renew(std::nullptr_t, u8c::size sz,u8c::size newsz) -> T * = delete; + constexpr auto bytesz = static_cast<u8c::ubyte>(CHAR_BIT); + constexpr auto dbg = +#if defined(NDEBUG) || !defined(_DEBUG) + false; +#else + true; +#endif + constexpr auto unimax = U'\U00010FFF'; + constexpr auto ver = u8c_uint64c(0x1B); +} + +#include <u8c/misc.d/renew> + +#endif diff --git a/u8c/include/u8c/misc.d/renew b/u8c/include/u8c/misc.d/renew new file mode 100644 index 0000000..8d67b78 --- /dev/null +++ b/u8c/include/u8c/misc.d/renew @@ -0,0 +1,32 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_PDOxHgtcAGQDjPjZ) +#define u8c_key_PDOxHgtcAGQDjPjZ + +#include <algorithm> /* std::copy_n, std::min */ + +template<typename T> auto constexpr u8c::renew(T * const u8c_restr _ptr,u8c::size const _sz,u8c::size const _newsz) -> T * { + T * const u8c_restr ptr = ::new T[_newsz]; + std::copy_n(_ptr,std::min(_sz,_newsz),ptr); + ::delete[] _ptr; + return ptr; +} + +#endif diff --git a/u8c/include/u8c/str b/u8c/include/u8c/str new file mode 100644 index 0000000..d8f5753 --- /dev/null +++ b/u8c/include/u8c/str @@ -0,0 +1,55 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_ywSpAOVLrorTYrkK) +#define u8c_key_ywSpAOVLrorTYrkK + +#include <u8c/arr> +#include <u8c/utf> + +namespace u8c { + class str { + public: + constexpr auto app( u8c::str const & oth) -> u8c::str const &; + constexpr auto begin() const noexcept -> char32_t *; + constexpr auto end() const noexcept -> char32_t *; + constexpr auto operator = ( u8c::str const & oth) -> u8c::str const &; + constexpr auto operator [] (u8c::str const & oth) const noexcept -> u8c::str const &; + [[nodiscard]] constexpr str() noexcept; + [[nodiscard]] constexpr str( u8c::str const & oth); + template<u8c::utf T> [[nodiscard]] constexpr str( T chr); + template<u8c::utf T,u8c::size N> [[nodiscard]] constexpr str( T const (& strlit)[N]) noexcept; + [[nodiscard]] constexpr auto u8() const -> u8c::arr<char8_t>; + private: + u8c::arr<char32_t> _arr; + }; + template<typename T> [[nodiscard,u8c_attr_hot]] constexpr auto fmt( T fmt) -> u8c::str; + auto operator << (std::ostream & strm, u8c::str const & str) -> std::ostream &; + /*class { + public: + bool ascii = false; + u8c::ubyte base = u8c_ubytec(0xB); + u8c::endi endi = u8c::endi::little; + } inline fmtsets;*/ +} + +#include <u8c/str.d/dbgprint> +#include <u8c/str.d/str> + +#endif diff --git a/u8c/include/u8c/str.d/dbgprint b/u8c/include/u8c/str.d/dbgprint new file mode 100644 index 0000000..4ecb6b9 --- /dev/null +++ b/u8c/include/u8c/str.d/dbgprint @@ -0,0 +1,32 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ +#if !defined(u8c_key_WQGQeKhIxLI16CtT) +#define u8c_key_WQGQeKhIxLI16CtT + +#if 0x0 + +auto u8c::dbgprint(u8c::str const _msg) -> void { + if constexpr (u8c::dbg) { + return u8c::println(stderr,_msg); + } +} + +#endif + +#endif diff --git a/u8c/include/u8c/str.d/fmt b/u8c/include/u8c/str.d/fmt new file mode 100644 index 0000000..3c602f0 --- /dev/null +++ b/u8c/include/u8c/str.d/fmt @@ -0,0 +1,59 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_6cyujV0FoSmPeQWl) +#define u8c_key_6cyujV0FoSmPeQWl + +#if 0x0 +auto u8c::dbgprint(u8c::str const _msg) -> void { +#if defined(NDEBUG) + constexpr auto dbg = true; +#else + constexpr auto dbg = false; +#endif + if constexpr(dbg) { + return u8c::println(stderr,_msg); + } +} +auto u8c::fmt(u8c::str const _str) -> u8c::str { + return u8c::str(_str); +} +template<typename T> auto u8c::fmt(u8c::str const _str,T const _fmt) -> u8c::str { + u8c::str str; + for(auto chr : _str) { + if(chr == U'\uFFFC') [[unlikely]] { + return str + u8c::fmter::fmt(_fmt); + } + str += chr; + } + return str; +} +template<typename T,typename... TArgs> auto u8c::fmt(u8c::str const _str,T const _fmt,TArgs... _args) -> u8c::str { + u8c::str str; + for(auto chr : _str) { + if(chr == U'\uFFFC') [[unlikely]] { + return str + u8c::fmt(str,_fmt,_args...); + } + str += chr; + } + return str; +} +#endif + +#endif diff --git a/u8c/include/u8c/str.d/str b/u8c/include/u8c/str.d/str new file mode 100644 index 0000000..f708f8a --- /dev/null +++ b/u8c/include/u8c/str.d/str @@ -0,0 +1,50 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_3zvMYqWFDYnlTEyW) +#define u8c_key_3zvMYqWFDYnlTEyW + +#include <type_traits> /* std::is_same_v */ + +constexpr auto u8c::str::begin() const noexcept -> char32_t * { + return this->_arr.begin(); +} +constexpr auto u8c::str::end() const noexcept -> char32_t * { + return this->_arr.end(); +} +constexpr auto u8c::str::operator = (u8c::str const & _oth) -> u8c::str const & { + this->_arr = _oth._arr; + return *this; +} +constexpr u8c::str::str(u8c::str const & _oth) { + *this = _oth; +} +template<u8c::utf T> constexpr u8c::str::str(T const _chr) { + if constexpr (std::is_same_v<T,char32_t>) { + this->_arr.set(_chr); + } +} +template<u8c::utf T,u8c::size N> constexpr u8c::str::str(T const (& _strlit)[N]) noexcept { + this->_arr.set(_strlit,_strlit + N); +} +constexpr auto u8c::str::u8() const -> u8c::arr<char8_t> { + return u8c::cnv<char8_t>(this->begin(),this->end()); +} + +#endif diff --git a/u8c/include/u8c/u8c b/u8c/include/u8c/u8c new file mode 100644 index 0000000..f9591c5 --- /dev/null +++ b/u8c/include/u8c/u8c @@ -0,0 +1,50 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +/* + Greater Header Dependencies: + + misc →┬─────────────────────────→┬→ u8c + ├→ arr ─→┬────────────────→┤ + │ └→ utf ─→┬───────→┤ + │ └→ str ─→┤ + └→ cstr →┬────────────────→┤ + └→ impl →┬───────→┤ + └→ math →┘ +*/ + +#if !defined(u8c_key_piDyeERQmK9By1n3) +#define u8c_key_piDyeERQmK9By1n3 + +#include <concepts> /* std::convertible_to */ +#include <ostream> /* std::ostream */ +#include <type_traits> /* std::is_same_v */ + +#include <u8c/math> +#include <u8c/str> + +namespace u8c { + [[nodiscard]] constexpr auto uniblk(char32_t chr) -> u8c::str; + [[nodiscard]] constexpr auto uninm( char32_t chr) -> u8c::str; +} + +#include <u8c/u8c.d/uniblk> +#include <u8c/u8c.d/uninm> + +#endif diff --git a/u8c/include/u8c/u8c.d/uniblk b/u8c/include/u8c/u8c.d/uniblk new file mode 100644 index 0000000..e216833 --- /dev/null +++ b/u8c/include/u8c/u8c.d/uniblk @@ -0,0 +1,497 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_Z0dBX6z5KYfITIHo) +#define u8c_key_Z0dBX6z5KYfITIHo + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::uniblk(char32_t const _chr) -> u8c::str { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + if(_chr <= U'\u007F') { + return U"BASIC LATIN"; + } + if(_chr >= U'\u0080' && _chr <= U'\u00FF') { + return U"LATIN-1 SUPPLEMENT"; + } + if(_chr >= U'\u0100' && _chr <= U'\u017F') { + return U"LATIN EXTENDED-A"; + } + if(_chr >= U'\u0180' && _chr <= U'\u024F') { + return U"LATIN EXTENDED-B"; + } + if(_chr >= U'\u0250' && _chr <= U'\u02AF') { + return U"IPA EXTENSIONS"; + } + if(_chr >= U'\u02B0' && _chr <= U'\u02FF') { + return U"SPACING MODIFIER LETTERS"; + } + if(_chr >= U'\u0300' && _chr <= U'\u036F') { + return U"COMBINING DIRACITICAL MARKS"; + } + if(_chr >= U'\u0370' && _chr <= U'\u03FF') { + return U"GREEK AND COPTIC"; + } + if(_chr >= U'\u0400' && _chr <= U'\u04FF') { + return U"CYRILLIC"; + } + if(_chr >= U'\u0500' && _chr <= U'\u052F') { + return U"CYRILLIC SUPPLEMENT"; + } + if(_chr >= U'\u0530' && _chr <= U'\u058F') { + return U"ARMENIAN"; + } + if(_chr >= U'\u0590' && _chr <= U'\u05FF') { + return U"HEBREW"; + } + if(_chr >= U'\u0600' && _chr <= U'\u06FF') { + return U"ARABIC"; + } + if(_chr >= U'\u0700' && _chr <= U'\u074F') { + return U"SYRIAC"; + } + if(_chr >= U'\u0750' && _chr <= U'\u077F') { + return U"ARABIC SUPPLEMENT"; + } + if(_chr >= U'\u0780' && _chr <= U'\u07BF') { + return U"THAANA"; + } + if(_chr >= U'\u07C0' && _chr <= U'\u07FF') { + return U"NKO"; + } + if(_chr >= U'\u0800' && _chr <= U'\u083F') { + return U"SAMARITAN"; + } + if(_chr >= U'\u0840' && _chr <= U'\u085F') { + return U"MANDAIC"; + } + if(_chr >= U'\u0860' && _chr <= U'\u086F') { + return U"SYRIAC SUPPLEMENT"; + } + if(_chr >= U'\u08A0' && _chr <= U'\u08FF') { + return U"ARABIC EXTENDED-A"; + } + if(_chr >= U'\u0900' && _chr <= U'\u097F') { + return U"DEVANAGARI"; + } + if(_chr >= U'\u0980' && _chr <= U'\u09FF') { + return U"BENGALI"; + } + if(_chr >= U'\u0A00' && _chr <= U'\u0A7F') { + return U"GURMUKHI"; + } + if(_chr >= U'\u0A80' && _chr <= U'\u0AFF') { + return U"GUJARATI"; + } + if(_chr >= U'\u0B00' && _chr <= U'\u0B7F') { + return U"ORIYAS"; + } + if(_chr >= U'\u0B80' && _chr <= U'\u0BFF') { + return U"TAMIL"; + } + if(_chr >= U'\u0C00' && _chr <= U'\u0C7F') { + return U"TELUGU"; + } + if(_chr >= U'\u0C80' && _chr <= U'\u0CFF') { + return U"KANNADA"; + } + if(_chr >= U'\u0D00' && _chr <= U'\u0D7F') { + return U"MALAYALAM"; + } + if(_chr >= U'\u0D80' && _chr <= U'\u0DFF') { + return U"SINHALA"; + } + if(_chr >= U'\u0E00' && _chr <= U'\u0E7F') { + return U"THAI"; + } + if(_chr >= U'\u0E80' && _chr <= U'\u0EFF') { + return U"LAO"; + } + if(_chr >= U'\u0F00' && _chr <= U'\u0FFF') { + return U"TIBETAN"; + } + if(_chr >= U'\u1000' && _chr <= U'\u109F') { + return U"MYANMAR"; + } + if(_chr >= U'\u10A0' && _chr <= U'\u10FF') { + return U"GEORGIAN"; + } + if(_chr >= U'\u1100' && _chr <= U'\u11FF') { + return U"HANGUL JAMO"; + } + if(_chr >= U'\u1200' && _chr <= U'\u137F') { + return U"ETHIOPIC"; + } + if(_chr >= U'\u1380' && _chr <= U'\u139F') { + return U"ETHIOPIC SUPPLEMENT"; + } + if(_chr >= U'\u13A0' && _chr <= U'\u13FF') { + return U"CHEROKEE"; + } + if(_chr >= U'\u1400' && _chr <= U'\u167F') { + return U"UNIFIED CANADIAN ABORIGINAL SYLLABICS"; + } + if(_chr >= U'\u1680' && _chr <= U'\u169F') { + return U"OGHAM"; + } + if(_chr >= U'\u16A0' && _chr <= U'\u16FF') { + return U"RUNIC"; + } + if(_chr >= U'\u1700' && _chr <= U'\u171F') { + return U"TAGALOG"; + } + if(_chr >= U'\u1720' && _chr <= U'\u173F') { + return U"HANUNOO"; + } + if(_chr >= U'\u1740' && _chr <= U'\u175F') { + return U"BUHID"; + } + if(_chr >= U'\u1760' && _chr <= U'\u177F') { + return U"TAGBANWA"; + } + if(_chr >= U'\u1700' && _chr <= U'\u17FF') { + return U"TAGALOG"; + } + if(_chr >= U'\u1780' && _chr <= U'\u171F') { + return U"KHMER"; + } + if(_chr >= U'\u1800' && _chr <= U'\u18AF') { + return U"MONGOLIAN"; + } + if(_chr >= U'\u18B0' && _chr <= U'\u18FF') { + return U"UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED"; + } + if(_chr >= U'\u1900' && _chr <= U'\u194F') { + return U"LIMBU"; + } + if(_chr >= U'\u1950' && _chr <= U'\u197F') { + return U"TAI LE"; + } + if(_chr >= U'\u1980' && _chr <= U'\u19DF') { + return U"NEW TAI LUE"; + } + if(_chr >= U'\u19E0' && _chr <= U'\u19FF') { + return U"KHMER SYMBOLS"; + } + if(_chr >= U'\u1A00' && _chr <= U'\u1A1F') { + return U"BUGINESE"; + } + if(_chr >= U'\u1A20' && _chr <= U'\u1AAF') { + return U"TAI THAM"; + } + if(_chr >= U'\u1AB0' && _chr <= U'\u1AFF') { + return U"COMBINING DIACRITICAL MARKS EXTENDED"; + } + if(_chr >= U'\u1B00' && _chr <= U'\u1B7F') { + return U"BALINESE"; + } + if(_chr >= U'\u1B80' && _chr <= U'\u1BBF') { + return U"SUNDANESE"; + } + if(_chr >= U'\u1BC0' && _chr <= U'\u1BFF') { + return U"BATAK"; + } + if(_chr >= U'\u1C00' && _chr <= U'\u1C4F') { + return U"LEPCHA"; + } + if(_chr >= U'\u1C50' && _chr <= U'\u1C7F') { + return U"OL CHIKI"; + } + if(_chr >= U'\u1C80' && _chr <= U'\u1C8F') { + return U"CYRILLIC EXTENDED C"; + } + if(_chr >= U'\u1C90' && _chr <= U'\u1CBF') { + return U"GEORGIAN EXTENDED"; + } + if(_chr >= U'\u1CC0' && _chr <= U'\u1CCF') { + return U"SUNDANESE SUPPLEMENT"; + } + if(_chr >= U'\u1CD0' && _chr <= U'\u1CFF') { + return U"VEDIC EXTENSIONS"; + } + if(_chr >= U'\u1D00' && _chr <= U'\u1D7F') { + return U"PHONETIC EXTENSIONS"; + } + if(_chr >= U'\u1D80' && _chr <= U'\u1DBF') { + return U"PHONETIC EXTENSIONS SUPPLEMENT"; + } + if(_chr >= U'\u1DC0' && _chr <= U'\u1DFF') { + return U"COMBINING DIACRITICAL MARKS SUPPLEMENT"; + } + if(_chr >= U'\u1E00' && _chr <= U'\u1EFF') { + return U"LATIN EXTENDED ADDITIONAL"; + } + if(_chr >= U'\u1F00' && _chr <= U'\u1FFF') { + return U"GREEK EXTENDED"; + } + if(_chr >= U'\u2000' && _chr <= U'\u206F') { + return U"GENERAL PUNCTUATION"; + } + if(_chr >= U'\u2070' && _chr <= U'\u209F') { + return U"SUPERSCRIPTS AND SUBSCRIPTS"; + } + if(_chr >= U'\u20A0' && _chr <= U'\u20CF') { + return U"CURRENCY SYMBOLS"; + } + if(_chr >= U'\u20D0' && _chr <= U'\u20FF') { + return U"COMBINING DIACRITICAL MARKS FOR SYMBOLS"; + } + if(_chr >= U'\u2100' && _chr <= U'\u214F') { + return U"LETTERLIKE SYMBOLS"; + } + if(_chr >= U'\u2150' && _chr <= U'\u218F') { + return U"NUMBER FORMS"; + } + if(_chr >= U'\u2190' && _chr <= U'\u21FF') { + return U"ARROWS"; + } + if(_chr >= U'\U00011A00' && _chr <= U'\U00011A4F') { + return U"ZANABAZAR SQUARE"; + } + if(_chr >= U'\U00011A50' && _chr <= U'\U00011AAF') { + return U"SOYOMBO"; + } + if(_chr >= U'\U00011AC0' && _chr <= U'\U00011AFF') { + return U"PAU CIN HAU"; + } + if(_chr >= U'\U00011C00' && _chr <= U'\U00011C6F') { + return U"BHAIKSUKI"; + } + if(_chr >= U'\U00011C70' && _chr <= U'\U00011CBF') { + return U"MARCHEN"; + } + if(_chr >= U'\U00011D00' && _chr <= U'\U00011D5F') { + return U"MASARAM GONDI"; + } + if(_chr >= U'\U00011D60' && _chr <= U'\U00011DAF') { + return U"GUNJALA GONDI"; + } + if(_chr >= U'\U00011EE0' && _chr <= U'\U00011EFF') { + return U"MAKASAR"; + } + if(_chr >= U'\U00011FB0' && _chr <= U'\U00011FBF') { + return U"LISU SUPPLEMENT"; + } + if(_chr >= U'\U00011FC0' && _chr <= U'\U00011FFF') { + return U"TAMIL SUPPLEMENT"; + } + if(_chr >= U'\U00012000' && _chr <= U'\U000123FF') { + return U"CUNEIFORM"; + } + if(_chr >= U'\U00012400' && _chr <= U'\U0001247F') { + return U"CUNEIFORM NUMBERS AND PUNCTUATION"; + } + if(_chr >= U'\U00012480' && _chr <= U'\U0001254F') { + return U"EARLY DYNASTIC CUNEIFORM"; + } + if(_chr >= U'\U00013000' && _chr <= U'\U0001342F') { + return U"EGYPTIAN HIEROGLYPHS"; + } + if(_chr >= U'\U00013430' && _chr <= U'\U0001343F') { + return U"EGYPTIAN HIEROGLYPH FORMAT CONTROLS"; + } + if(_chr >= U'\U00014400' && _chr <= U'\U0001467F') { + return U"ANATOLIAN HIEROGLYPHS"; + } + if(_chr >= U'\U00016800' && _chr <= U'\U00016A3F') { + return U"BAMUM SUPPLEMENT"; + } + if(_chr >= U'\U00016A40' && _chr <= U'\U00016A6F') { + return U"MRO"; + } + if(_chr >= U'\U00016AD0' && _chr <= U'\U00016AFF') { + return U"BASSA VAH"; + } + if(_chr >= U'\U00016B00' && _chr <= U'\U00016B8F') { + return U"PAHAWH HMONG"; + } + if(_chr >= U'\U00016E40' && _chr <= U'\U00016E9F') { + return U"MEDEFAIDRIN"; + } + if(_chr >= U'\U00016F00' && _chr <= U'\U00016F9F') { + return U"MIAO"; + } + if(_chr >= U'\U00016FE0' && _chr <= U'\U00016FFF') { + return U"IDEOGRAPHIC SYMBOLS AND PUNCTUATION"; + } + if(_chr >= U'\U00017000' && _chr <= U'\U000187FF') { + return U"TANGUT"; + } + if(_chr >= U'\U00018800' && _chr <= U'\U00018AFF') { + return U"TANGUT COMPONENTS"; + } + if(_chr >= U'\U00018B00' && _chr <= U'\U00018CFF') { + return U"KHITAN SMALL SCRIPT"; + } + if(_chr >= U'\U00018D00' && _chr <= U'\U00018D8F') { + return U"TANGUT SUPPLEMENT"; + } + if(_chr >= U'\U0001B000' && _chr <= U'\U0001B0FF') { + return U"KANA SUPPLEMENT"; + } + if(_chr >= U'\U0001B100' && _chr <= U'\U0001B12F') { + return U"KANA EXTENDED-A"; + } + if(_chr >= U'\U0001B130' && _chr <= U'\U0001B16F') { + return U"SMALL KANA EXTENSION"; + } + if(_chr >= U'\U0001B170' && _chr <= U'\U0001B2FF') { + return U"NUSHU"; + } + if(_chr >= U'\U0001BC00' && _chr <= U'\U0001BC9F') { + return U"DUPLOYAN"; + } + if(_chr >= U'\U0001BCA0' && _chr <= U'\U0001BCAF') { + return U"SHORTHAND FORMAT CONTROLS"; + } + if(_chr >= U'\U0001D000' && _chr <= U'\U0001D0FF') { + return U"BYZANTINE MUSICAL SYMBOLS"; + } + if(_chr >= U'\U0001D100' && _chr <= U'\U0001D1FF') { + return U"MUSICAL SYMBOLS"; + } + if(_chr >= U'\U0001D200' && _chr <= U'\U0001D24F') { + return U"ANCIENT GREEK MUSICAL NOTATION"; + } + if(_chr >= U'\U0001D2E0' && _chr <= U'\U0001D2FF') { + return U"MAYAN NUMERALS"; + } + if(_chr >= U'\U0001D300' && _chr <= U'\U0001D35F') { + return U"TAI XUAN JING SYMBOLS"; + } + if(_chr >= U'\U0001D360' && _chr <= U'\U0001D37F') { + return U"COUNTING ROD NUMERALS"; + } + if(_chr >= U'\U0001D400' && _chr <= U'\U0001D7FF') { + return U"MATHEMATICAL ALPHANUMERIC SYMBOLS"; + } + if(_chr >= U'\U0001D800' && _chr <= U'\U0001DAAF') { + return U"SUTTON SIGNWRITING"; + } + if(_chr >= U'\U0001E000' && _chr <= U'\U0001E02F') { + return U"GLAGOLITIC SUPPLEMENT"; + } + if(_chr >= U'\U0001E100' && _chr <= U'\U0001E14F') { + return U"NYIAKENG PUACHUE HMONG"; + } + if(_chr >= U'\U0001E2C0' && _chr <= U'\U0001E2FF') { + return U"WANCHO"; + } + if(_chr >= U'\U0001E800' && _chr <= U'\U0001E8DF') { + return U"MENDE KIKAKUI"; + } + if(_chr >= U'\U0001E900' && _chr <= U'\U0001E95F') { + return U"ADLAM"; + } + if(_chr >= U'\U0001EC70' && _chr <= U'\U0001ECBF') { + return U"INDIC SIYAQ NUMBERS"; + } + if(_chr >= U'\U0001ED00' && _chr <= U'\U0001ED4F') { + return U"OTTOMAN SIYAQ NUMBERS"; + } + if(_chr >= U'\U0001EE00' && _chr <= U'\U0001EEFF') { + return U"ARABIC MATHEMATICAL ALPHABETIC SYMBOLS"; + } + if(_chr >= U'\U0001F000' && _chr <= U'\U0001F02F') { + return U"MAHJONG TILES"; + } + if(_chr >= U'\U0001F030' && _chr <= U'\U0001F09F') { + return U"DOMINO TILES"; + } + if(_chr >= U'\U0001F0A0' && _chr <= U'\U0001F0FF') { + return U"PLAYING CARDS"; + } + if(_chr >= U'\U0001F100' && _chr <= U'\U0001F1FF') { + return U"ENCLOSED ALPHANUMERIC SUPPLEMENT"; + } + if(_chr >= U'\U0001F200' && _chr <= U'\U0001F2FF') { + return U"ENCLOSED IDEOGRAPHIC SUPPLEMENT"; + } + if(_chr >= U'\U0001F300' && _chr <= U'\U0001F5FF') { + return U"MISCELLANEOUS SYMBOLS AND PICTOGRAPHS"; + } + if(_chr >= U'\U0001F600' && _chr <= U'\U0001F64F') { + return U"EMOTICONS"; + } + if(_chr >= U'\U0001F650' && _chr <= U'\U0001F67F') { + return U"ORNAMENTAL DINGBATS"; + } + if(_chr >= U'\U0001F680' && _chr <= U'\U0001F6FF') { + return U"TRANSPORT AND MAP SYMBOLS"; + } + if(_chr >= U'\U0001F700' && _chr <= U'\U0001F77F') { + return U"ALCHEMICAL SYMBOLS"; + } + if(_chr >= U'\U0001F780' && _chr <= U'\U0001F7FF') { + return U"GEOMETRIC SHAPES EXTENDED"; + } + if(_chr >= U'\U0001F800' && _chr <= U'\U0001F8FF') { + return U"SUPPLEMENTAL ARROWS-C"; + } + if(_chr >= U'\U0001F900' && _chr <= U'\U0001F9FF') { + return U"SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS"; + } + if(_chr >= U'\U0001FA00' && _chr <= U'\U0001FA6F') { + return U"CHESS SYMBOLS"; + } + if(_chr >= U'\U0001FA70' && _chr <= U'\U0001FAFF') { + return U"SYMBOLS AND PICTOGRAPHS EXTENDED-A"; + } + if(_chr >= U'\U0001FB00' && _chr <= U'\U0001FBFF') { + return U"SYMBOLS FOR LEGACY COMPUTING"; + } + if(_chr >= U'\U00020000' && _chr <= U'\U0002A6DF') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION B"; + } + if(_chr >= U'\U0002A700' && _chr <= U'\U0002B73F') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION C"; + } + if(_chr >= U'\U0002B740' && _chr <= U'\U0002B81F') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION D"; + } + if(_chr >= U'\U0002B820' && _chr <= U'\U0002CEAF') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION E"; + } + if(_chr >= U'\U0002CEB0' && _chr <= U'\U0002EBEF') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION F"; + } + if(_chr >= U'\U0002F800' && _chr <= U'\U0002FA1F') { + return U"CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT"; + } + if(_chr >= U'\U00030000' && _chr <= U'\U0003134F') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION G"; + } + if(_chr >= U'\U000E0000' && _chr <= U'\U000E007F') { + return U"TAGS"; + } + if(_chr >= U'\U000E0100' && _chr <= U'\U000E1EFF') { + return U"VARIATION SELECTORS SUPPLEMENT"; + } + if(_chr >= U'\U000F0000' && _chr <= U'\U000FFFFF') { + return U"SUPPLEMENTARY PRIVATE USE AREA-A"; + } + if(_chr >= U'\U00100000' && _chr <= U'\U0010FFFF') { + return U"SUPPLEMENTARY PRIVATE USE AREA-B"; + } + return U"UNDEFINED IN UNICODE"; +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/u8c.d/uninm b/u8c/include/u8c/u8c.d/uninm new file mode 100644 index 0000000..3fc67f3 --- /dev/null +++ b/u8c/include/u8c/u8c.d/uninm @@ -0,0 +1,2697 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_EW3CUEOMiNBCpImA) +#define u8c_key_EW3CUEOMiNBCpImA + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::uninm(char32_t const _chr) -> u8c::str{ + switch(_chr) { + [[unlikely]] default: + if(_chr > u8c::unimax) [[unlikely]] { + throw std::out_of_range("Character out of range."); + } + return U"UNDEFINED IN UNICODE"; + /* BASIC LATIN: */ + case U'\u0000': + return U"NULL"; + case U'\u0001': + return U"START OF HEADING"; + case U'\u0002': + return U"START OF TEXT"; + case U'\u0003': + return U"END OF TEXT"; + case U'\u0004': + return U"END OF TRANSMISSION"; + case U'\u0005': + return U"ENQUIRY"; + case U'\u0006': + return U"ACKNOWLEDGE"; + case U'\u0007': + return U"BELL"; + case U'\u0008': + return U"BACKSPACE"; + case U'\u0009': + return U"HORIZONTAL TABULATION"; + case U'\u000A': + return U"NEW LINE"; + case U'\u000B': + return U"VERTICAL TABULATION"; + case U'\u000C': + return U"FORM FEED"; + case U'\u000D': + return U"CARRIAGE RETURN"; + case U'\u000E': + return U"SHIFT OUT"; + case U'\u000F': + return U"SHIFT IN"; + case U'\u0010': + return U"DATA LINK ESCAPE"; + case U'\u0011': + return U"DEVICE CONTROL ONE"; + case U'\u0012': + return U"DEVICE CONTROL TWO"; + case U'\u0013': + return U"DEVICE CONTROL THREE"; + case U'\u0014': + return U"DEVICE CONTROL FOUR"; + case U'\u0015': + return U"NEGATIVE ACKNOWLEDGE"; + case U'\u0016': + return U"SYNCHRONOUS IDLE"; + case U'\u0017': + return U"END OF TRANSMISSION BLOCk"; + case U'\u0018': + return U"CANCEL"; + case U'\u0019': + return U"END OF MEDIUM"; + case U'\u001A': + return U"SUBSTITUTE"; + case U'\u001B': + return U"ESCAPE"; + case U'\u001C': + return U"FILE SEPERATOR"; + case U'\u001D': + return U"GROUP SEPERATOR"; + case U'\u001E': + return U"RECORD SEPERATOR"; + case U'\u001F': + return U"UNIT SEPERATOR"; + case U'\u0020': + return U"SPACE"; + case U'\u0021': + return U"EXCLAMATION MARK"; + case U'\u0022': + return U"QUOTATION MARK"; + case U'\u0023': + return U"NUMBER SIGN"; + case U'\u0024': + return U"DOLLAR SIGN"; + case U'\u0025': + return U"PERCENT SIGN"; + case U'\u0026': + return U"AMPERSAND"; + case U'\u0027': + return U"APOSTROPHE"; + case U'\u0028': + return U"LEFT PARANTHESIS"; + case U'\u0029': + return U"RIGHT PARANTHESIS"; + case U'\u002A': + return U"ASTERISK"; + case U'\u002B': + return U"PLUS SIGN"; + case U'\u002C': + return U"COMMA"; + case U'\u002D': + return U"HYPHEN-MINUS"; + case U'\u002E': + return U"FULL STOP"; + case U'\u002F': + return U"SOLIDUS"; + case U'\u0030': + return U"DIGIT ZERO"; + case U'\u0031': + return U"DIGIT ONE"; + case U'\u0032': + return U"DIGIT TWO"; + case U'\u0033': + return U"DIGIT THREE"; + case U'\u0034': + return U"DIGIT FOUR"; + case U'\u0035': + return U"DIGIT FIVE"; + case U'\u0036': + return U"DIGIT SIX"; + case U'\u0037': + return U"DIGIT SEVEN"; + case U'\u0038': + return U"DIGIT EIGHT"; + case U'\u0039': + return U"DIGIT NINE"; + case U'\u003A': + return U"COLON"; + case U'\u003B': + return U"SEMICOLON"; + case U'\u003C': + return U"LESS-THAN SIGN"; + case U'\u003D': + return U"EQUALS SIGN"; + case U'\u003E': + return U"GREATER-THAN SIGN"; + case U'\u003F': + return U"QUESTION MARK"; + case U'\u0040': + return U"COMMERCIAL AT"; + case U'\u0041': + return U"LATIN CAPITAL LETTER A"; + case U'\u0042': + return U"LATIN CAPITAL LETTER B"; + case U'\u0043': + return U"LATIN CAPITAL LETTER C"; + case U'\u0044': + return U"LATIN CAPITAL LETTER D"; + case U'\u0045': + return U"LATIN CAPITAL LETTER E"; + case U'\u0046': + return U"LATIN CAPITAL LETTER F"; + case U'\u0047': + return U"LATIN CAPITAL LETTER G"; + case U'\u0048': + return U"LATIN CAPITAL LETTER H"; + case U'\u0049': + return U"LATIN CAPITAL LETTER I"; + case U'\u004A': + return U"LATIN CAPITAL LETTER J"; + case U'\u004B': + return U"LATIN CAPITAL LETTER K"; + case U'\u004C': + return U"LATIN CAPITAL LETTER L"; + case U'\u004D': + return U"LATIN CAPITAL LETTER M"; + case U'\u004E': + return U"LATIN CAPITAL LETTER N"; + case U'\u004F': + return U"LATIN CAPITAL LETTER O"; + case U'\u0050': + return U"LATIN CAPITAL LETTER P"; + case U'\u0051': + return U"LATIN CAPITAL LETTER Q"; + case U'\u0052': + return U"LATIN CAPITAL LETTER R"; + case U'\u0053': + return U"LATIN CAPITAL LETTER S"; + case U'\u0054': + return U"LATIN CAPITAL LETTER T"; + case U'\u0055': + return U"LATIN CAPITAL LETTER U"; + case U'\u0056': + return U"LATIN CAPITAL LETTER V"; + case U'\u0057': + return U"LATIN CAPITAL LETTER W"; + case U'\u0058': + return U"LATIN CAPITAL LETTER X"; + case U'\u0059': + return U"LATIN CAPITAL LETTER Y"; + case U'\u005A': + return U"LATIN CAPITAL LETTER Z"; + case U'\u005B': + return U"LEFT SQUARE BRACKET"; + case U'\u005C': + return U"REVERSE SOLIDUS"; + case U'\u005D': + return U"RIGHT SQUARE BRACKET"; + case U'\u005E': + return U"CIRCUMFLEX ACCENT"; + case U'\u005F': + return U"LOW LINE"; + case U'\u0060': + return U"GRAVE ACCENT"; + case U'\u0061': + return U"LATIN SMALL LETTER A"; + case U'\u0062': + return U"LATIN SMALL LETTER B"; + case U'\u0063': + return U"LATIN SMALL LETTER C"; + case U'\u0064': + return U"LATIN SMALL LETTER D"; + case U'\u0065': + return U"LATIN SMALL LETTER E"; + case U'\u0066': + return U"LATIN SMALL LETTER F"; + case U'\u0067': + return U"LATIN SMALL LETTER G"; + case U'\u0068': + return U"LATIN SMALL LETTER H"; + case U'\u0069': + return U"LATIN SMALL LETTER I"; + case U'\u006A': + return U"LATIN SMALL LETTER J"; + case U'\u006B': + return U"LATIN SMALL LETTER K"; + case U'\u006C': + return U"LATIN SMALL LETTER L"; + case U'\u006D': + return U"LATIN SMALL LETTER M"; + case U'\u006E': + return U"LATIN SMALL LETTER N"; + case U'\u006F': + return U"LATIN SMALL LETTER O"; + case U'\u0070': + return U"LATIN SMALL LETTER P"; + case U'\u0071': + return U"LATIN SMALL LETTER Q"; + case U'\u0072': + return U"LATIN SMALL LETTER R"; + case U'\u0073': + return U"LATIN SMALL LETTER S"; + case U'\u0074': + return U"LATIN SMALL LETTER T"; + case U'\u0075': + return U"LATIN SMALL LETTER U"; + case U'\u0076': + return U"LATIN SMALL LETTER V"; + case U'\u0077': + return U"LATIN SMALL LETTER W"; + case U'\u0078': + return U"LATIN SMALL LETTER X"; + case U'\u0079': + return U"LATIN SMALL LETTER Y"; + case U'\u007A': + return U"LATIN SMALL LETTER Z"; + case U'\u007B': + return U"LEFT CURLY BRACKET"; + case U'\u007C': + return U"VERTICAL LINE"; + case U'\u007D': + return U"RIGHT CURLY BRACKET"; + case U'\u007E': + return U"TILDE"; + case U'\u007F': + return U"DELETE"; + /* LATIN-1 SUPPLEMENT: */ + case U'\u0080': + return U"PADDING CHARACTER"; + case U'\u0081': + return U"HIGH OCTET PRESET"; + case U'\u0082': + return U"BREAK PERMITTED HERE"; + case U'\u0083': + return U"NO BREAK HERE"; + case U'\u0084': + return U"INDEX"; + case U'\u0085': + return U"NEXT LINE"; + case U'\u0086': + return U"START OF SELECTED AREA"; + case U'\u0087': + return U"END OF SELECTED AREA"; + case U'\u0088': + return U"CHARACTER TABULATION SET"; + case U'\u0089': + return U"CHARACTER TABULATION WITH JUSTIFICATION"; + case U'\u008A': + return U"LINE TABULATION SET"; + case U'\u008B': + return U"PARTIAL LINE FORWARD"; + case U'\u008C': + return U"PARTIAL LINE BACKWARD"; + case U'\u008D': + return U"REVERSE LINE FEED"; + case U'\u008E': + return U"SINGLE SHIFT TWO"; + case U'\u008F': + return U"SINGLE SHIFT THREE"; + case U'\u0090': + return U"DEVICE CONTROL STRING"; + case U'\u0091': + return U"PRIVATE USE ONE"; + case U'\u0092': + return U"PRIVATE USE TWO"; + case U'\u0093': + return U"SET TRANSMIT STATE"; + case U'\u0094': + return U"CANCEL CHARACTER"; + case U'\u0095': + return U"MESSAGE WAITING"; + case U'\u0096': + return U"START OF GUARDED AREA"; + case U'\u0097': + return U"END OF GUARDED AREA"; + case U'\u0098': + return U"START OF STRING"; + case U'\u0099': + return U"SINGLE GRAPHIC CHARACTER INTRODUCER"; + case U'\u009A': + return U"SINGLE CHARACTER INTRODUCER"; + case U'\u009B': + return U"CONTROL SEQUENCE INTRODUCER"; + case U'\u009C': + return U"STRING TERMINATOR"; + case U'\u009D': + return U"OPERATING SYSTEM COMMAND"; + case U'\u009E': + return U"PRIVACY MESSAGE"; + case U'\u009F': + return U"APPLICATION PROGRAM COMMAND"; + case U'\u00A0': + return U"NO-BREAK SPACE"; + case U'\u00A1': + return U"INVERTED EXCLAMATION MARK"; + case U'\u00A2': + return U"CENT SIGN"; + case U'\u00A3': + return U"POUND SIGN"; + case U'\u00A4': + return U"CURRENCY SIGN"; + case U'\u00A5': + return U"YEN SIGN"; + case U'\u00A6': + return U"BROKEN BAR"; + case U'\u00A7': + return U"SECTION SIGN"; + case U'\u00A8': + return U"DIAERESIS"; + case U'\u00A9': + return U"COPYRIGHT SIGN"; + case U'\u00AA': + return U"FEMININE ORDINAL INDICATOR"; + case U'\u00AB': + return U"LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"; + case U'\u00AC': + return U"NOT SIGN"; + case U'\u00AD': + return U"SOFT HYPHEN"; + case U'\u00AE': + return U"REGISTERED SIGN"; + case U'\u00AF': + return U"MACRON"; + case U'\u00B0': + return U"DEGREE SIGN"; + case U'\u00B1': + return U"PLUS MINUS SYMBOL"; + case U'\u00B2': + return U"SUPERSCRIPT TWO"; + case U'\u00B3': + return U"SUPERSCRIPT THREE"; + case U'\u00B4': + return U"ACUTE ACCENT"; + case U'\u00B5': + return U"MICRO SIGN"; + case U'\u00B6': + return U"PILCROW SIGN"; + case U'\u00B7': + return U"MIDDLE DOT"; + case U'\u00B8': + return U"CEDILLA"; + case U'\u00B9': + return U"SUPERSCRIPT ONE"; + case U'\u00BA': + return U"MASCULINE ORDINAL INDICATOR"; + case U'\u00BB': + return U"RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"; + case U'\u00BC': + return U"VULGAR FRACTION ONE QUARTER"; + case U'\u00BD': + return U"VULGAR FRACTION ONE HALF"; + case U'\u00BE': + return U"VULGAR FRACTION THREE QUARTERS"; + case U'\u00BF': + return U"INVERTED QUESTION MARK"; + case U'\u00C0': + return U"LATIN CAPITAL LETTER A WITH GRAVE"; + case U'\u00C1': + return U"LATIN CAPITAL LETTER A WITH ACUTE"; + case U'\u00C2': + return U"LATIN CAPITAL LETTER A WITH CIRCUMFLEX"; + case U'\u00C3': + return U"LATIN CAPITAL LETTER A WITH TILDE"; + case U'\u00C4': + return U"LATIN CAPITAL LETTER A WITH DIAERESIS"; + case U'\u00C5': + return U"LATIN CAPITAL LETTER A WITH RING ABOVE"; + case U'\u00C6': + return U"LATIN CAPITAL LETTER AE"; + case U'\u00C7': + return U"LATIN CAPITAL LETTER C WITH CEDILLA"; + case U'\u00C8': + return U"LATIN CAPITAL LETTER E WITH GRAVE"; + case U'\u00C9': + return U"LATIN CAPITAL LETTER E WITH ACUTE"; + case U'\u00CA': + return U"LATIN CAPITAL LETTER E WITH CIRCUMFLEX"; + case U'\u00CB': + return U"LATIN CAPITAL LETTER E WITH DIAERESIS"; + case U'\u00CC': + return U"LATIN CAPITAL LETTER I WITH GRAVE"; + case U'\u00CD': + return U"LATIN CAPITAL LETTER I WITH ACUTE"; + case U'\u00CE': + return U"LATIN CAPITAL LETTER I WITH CIRCUMFLEX"; + case U'\u00CF': + return U"LATIN CAPITAL LETTER I WITH DIAERESIS"; + case U'\u00D0': + return U"LATIN CAPITAL LETTER ETH"; + case U'\u00D1': + return U"LATIN CAPITAL LETTER N WITH TILDE"; + case U'\u00D2': + return U"LATIN CAPITAL LETTER O WITH GRAVE"; + case U'\u00D3': + return U"LATIN CAPITAL LETTER O WITH ACUTE"; + case U'\u00D4': + return U"LATIN CAPITAL LETTER O WITH CIRCUMFLEX"; + case U'\u00D5': + return U"LATIN CAPITAL LETTER O WITH TILDE"; + case U'\u00D6': + return U"LATIN CAPITAL LETTER O WITH DIAERESIS"; + case U'\u00D7': + return U"MULTIPLICATION SIGN"; + case U'\u00D8': + return U"LATIN CAPITAL LETTER O WITH STROKE"; + case U'\u00D9': + return U"LATIN CAPITAL LETTER U WITH GRAVE"; + case U'\u00DA': + return U"LATIN CAPITAL LETTER U WITH ACUTE"; + case U'\u00DB': + return U"LATIN CAPITAL LETTER U WITH CIRCUMFLEX"; + case U'\u00DC': + return U"LATIN CAPITAL LETTER U WITH DIAERESIS"; + case U'\u00DD': + return U"LATIN CAPITAL LETTER Y WITH ACUTE"; + case U'\u00DE': + return U"LATIN CAPITAL LETTER THORN"; + case U'\u00DF': + return U"LATIN SMALL LETTER SHARP S"; + case U'\u00E0': + return U"LATIN SMALL LETTER A WITH GRAVE"; + case U'\u00E1': + return U"LATIN SMALL LETTER A WITH ACUTE"; + case U'\u00E2': + return U"LATIN SMALL LETTER A WITH CIRCUMFLEX"; + case U'\u00E3': + return U"LATIN SMALL LETTER A WITH TILDE"; + case U'\u00E4': + return U"LATIN SMALL LETTER A WITH DIAERESIS"; + case U'\u00E5': + return U"LATIN SMALL LETTER A WITH RING ABOVE"; + case U'\u00E6': + return U"LATIN SMALL LETTER AE"; + case U'\u00E7': + return U"LATIN SMALL LETTER C WITH CEDILLA"; + case U'\u00E8': + return U"LATIN SMALL LETTER E WITH GRAVE"; + case U'\u00E9': + return U"LATIN SMALL LETTER E WITH ACUTE"; + case U'\u00EA': + return U"LATIN SMALL LETTER E WITH CIRCUMFLEX"; + case U'\u00EB': + return U"LATIN SMALL LETTER E WITH DIAERESIS"; + case U'\u00EC': + return U"LATIN SMALL LETTER I WITH GRAVE"; + case U'\u00ED': + return U"LATIN SMALL LETTER I WITH ACUTE"; + case U'\u00EE': + return U"LATIN SMALL LETTER I WITH CIRCUMFLEX"; + case U'\u00EF': + return U"LATIN SMALL LETTER I WITH DIAERESIS"; + case U'\u00F0': + return U"LATIN SMALL LETTER ETH"; + case U'\u00F1': + return U"LATIN SMALL LETTER N WITH TILDE"; + case U'\u00F2': + return U"LATIN SMALL LETTER O WITH GRAVE"; + case U'\u00F3': + return U"LATIN SMALL LETTER O WITH ACUTE"; + case U'\u00F4': + return U"LATIN SMALL LETTER O WITH CIRCUMFLEX"; + case U'\u00F5': + return U"LATIN SMALL LETTER O WITH TILDE"; + case U'\u00F6': + return U"LATIN SMALL LETTER O WITH DIAERESIS"; + case U'\u00F7': + return U"DIVISION SIGN"; + case U'\u00F8': + return U"LATIN SMALL LETTER O WITH STROKE"; + case U'\u00F9': + return U"LATIN SMALL LETTER U WITH GRAVE"; + case U'\u00FA': + return U"LATIN SMALL LETTER U WITH ACUTE"; + case U'\u00FB': + return U"LATIN SMALL LETTER U WITH CIRCUMFLEX"; + case U'\u00FC': + return U"U WITH TWO DOTS"; + case U'\u00FD': + return U"LATIN SMALL LETTER Y WITH ACUTE"; + case U'\u00FE': + return U"LATIN SMALL LETTER THORN"; + case U'\u00FF': + return U"LATIN SMALL LETTER Y WITH DIAERESIS"; + /* LATIN EXTENDED-A: */ + case U'\u0100': + return U"LATIN CAPITAL LETTER A WITH MACRON"; + case U'\u0101': + return U"LATIN SMALL LETTER A WITH MACRON"; + case U'\u0102': + return U"LATIN CAPITAL LETTER A WITH BREVE"; + case U'\u0103': + return U"LATIN SMALL LETTER A WITH BREVE"; + case U'\u0104': + return U"LATIN CAPITAL LETTER A WITH OGONEK"; + case U'\u0105': + return U"LATIN SMALL LETTER A WITH OGONEK"; + case U'\u0106': + return U"LATIN CAPITAL LETTER C WITH ACUTE"; + case U'\u0107': + return U"LATIN SMALL LETTER C WITH ACUTE"; + case U'\u0108': + return U"LATIN CAPITAL LETTER C WITH CIRCUMFLEX"; + case U'\u0109': + return U"LATIN SMALL LETTER C WITH CIRCUMFLEX"; + case U'\u010A': + return U"LATIN CAPITAL LETTER C WITH DOT ABOVE"; + case U'\u010B': + return U"LATIN SMALL LETTER C WITH DOT ABOVE"; + case U'\u010C': + return U"LATIN CAPITAL LETTER C WITH CARON"; + case U'\u010D': + return U"LATIN SMALL LETTER C WITH CARON"; + case U'\u010E': + return U"LATIN CAPITAL LETTER D WITH CARON"; + case U'\u010F': + return U"LATIN SMALL LETTER D WITH CARON"; + case U'\u0110': + return U"LATIN CAPITAL LETTER D WITH STROKE"; + case U'\u0111': + return U"LATIN SMALL LETTER D WITH STROKE"; + case U'\u0112': + return U"LATIN CAPITAL LETTER E WITH MACRON"; + case U'\u0113': + return U"LATIN SMALL LETTER E WITH MACRON"; + case U'\u0114': + return U"LATIN CAPITAL LETTER E WITH BREVE"; + case U'\u0115': + return U"LATIN SMALL LETTER E WITH BREVE"; + case U'\u0116': + return U"LATIN CAPITAL LETTER E WITH DOT ABOVE"; + case U'\u0117': + return U"LATIN SMALL LETTER E WITH DOT ABOVE"; + case U'\u0118': + return U"LATIN CAPITAL LETTER E WITH OGONEK"; + case U'\u0119': + return U"LATIN SMALL LETTER E WITH OGONEK"; + case U'\u011A': + return U"LATIN CAPITAL LETTER E WITH CARON"; + case U'\u011B': + return U"LATIN SMALL LETTER E WITH CARON"; + case U'\u011C': + return U"LATIN CAPITAL LETTER G WITH CIRCUMFLEX"; + case U'\u011D': + return U"LATIN SMALL LETTER G WITH CIRCUMFLEX"; + case U'\u011E': + return U"LATIN CAPITAL LETTER G WITH BREVE"; + case U'\u011F': + return U"LATIN SMALL LETTER G WITH BREVE"; + case U'\u0120': + return U"LATIN CAPITAL LETTER G WITH DOT ABOVE"; + case U'\u0121': + return U"LATIN SMALL LETTER G WITH DOT ABOVE"; + case U'\u0122': + return U"LATIN CAPITAL LETTER G WITH CEDILLA"; + case U'\u0123': + return U"LATIN SMALL LETTER G WITH CEDILLA"; + case U'\u0124': + return U"LATIN CAPITAL LETTER H WITH CIRCUMFLEX"; + case U'\u0125': + return U"LATIN SMALL LETTER H WITH CIRCUMFLEX"; + case U'\u0126': + return U"LATIN CAPITAL LETTER H WITH STROKE"; + case U'\u0127': + return U"LATIN SMALL LETTER H WITH STROKE"; + case U'\u0128': + return U"LATIN CAPITAL LETTER I WITH TILDE"; + case U'\u0129': + return U"LATIN SMALL LETTER I WITH TILDE"; + case U'\u012A': + return U"LATIN CAPITAL LETTER I WITH MACRON"; + case U'\u012B': + return U"LATIN SMALL LETTER I WITH MACRON"; + case U'\u012C': + return U"LATIN CAPITAL LETTER I WITH BREVE"; + case U'\u012D': + return U"LATIN SMALL LETTER I WITH BREVE"; + case U'\u012E': + return U"LATIN CAPITAL LETTER I WITH OGONEK"; + case U'\u012F': + return U"LATIN SMALL LETTER I WITH OGONEK"; + case U'\u0130': + return U"LATIN CAPITAL LETTER I WITH DOT ABOVE"; + case U'\u0131': + return U"LATIN SMALL LETTER DOTLESS I"; + case U'\u0132': + return U"LATIN CAPITAL LIGATURE IJ"; + case U'\u0133': + return U"LATIN SMALL LIGATURE IJ"; + case U'\u0134': + return U"LATIN CAPITAL LETTER J WITH CIRCUMFLEX"; + case U'\u0135': + return U"LATIN SMALL LETTER J WITH CIRCUMFLEX"; + case U'\u0136': + return U"LATIN CAPITAL LETTER K WITH CEDILLA"; + case U'\u0137': + return U"LATIN SMALL LETTER K WITH CEDILLA"; + case U'\u0138': + return U"LATIN SMALL LETTER KRA"; + case U'\u0139': + return U"LATIN CAPITAL LETTER L WITH ACUTE"; + case U'\u013A': + return U"LATIN SMALL LETTER L WITH ACUTE"; + case U'\u013B': + return U"LATIN CAPITAL LETTER L WITH CEDILLA"; + case U'\u013C': + return U"LATIN SMALL LETTER L WITH CEDILLA"; + case U'\u013D': + return U"LATIN CAPITAL LETTER L WITH CARON"; + case U'\u013E': + return U"LATIN SMALL LETTER L WITH CARON"; + case U'\u013F': + return U"LATIN CAPITAL LETTER L WITH MDDLE DOT"; + case U'\u0140': + return U"LATIN SMALL LETTER L WITH MIDDLE DOT"; + case U'\u0150': + return U"LATIN CAPITAL LETTER O WITH DOUBLE ACUTE"; + case U'\u0160': + return U"LATIN CAPITAL LETTER S WITH CARON"; + case U'\u0170': + return U"LATIN CAPITAL LETTER U WITH DOUBLE ACUTE"; + /* LATIN EXTENDED-B: */ + case U'\u0180': + return U"LATIN SMALL LETTER B WITH STROKE"; + case U'\u0190': + return U"LATIN CAPITAL LETTER OPEN E"; + case U'\u01A0': + return U"LATIN CAPITAL LETTER O WITH HORN"; + case U'\u01B0': + return U"LATIN SMALL LETTER U WITH HORN"; + case U'\u01C0': + return U"LATIN LETTER DENTAL CLICK"; + case U'\u01D0': + return U"LATIN SMALL LETTER I WITH CARON"; + case U'\u01E0': + return U"LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON"; + case U'\u01F0': + return U"LATIN SMALL LETTER J WITH CARON"; + case U'\u0200': + return U"LATIN CAPITAL LETTER A WITH DOUBLE GRAVE"; + case U'\u0210': + return U"LATIN CAPITAL LETTER R WITH DOUBLE GRAVE"; + case U'\u0220': + return U"LATIN CAPITAL LETTER N WITH LONG RIGHT LEG"; + case U'\u0230': + return U"LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON"; + case U'\u0240': + return U"LATIN SMALL LETTER Z WITH SWASH TAIL"; + /* IPA EXTENSIONS: */ + case U'\u0250': + return U"LATIN SMALL LETTER TURNED A"; + case U'\u0251': + return U"LATIN SMALL LETTER ALPHA"; + case U'\u0252': + return U"LATIN SMALL LETTER TURNED ALPHA"; + case U'\u0253': + return U"LATIN SMALL LETTER B WITH HOOK"; + case U'\u0254': + return U"LATIN SMALL LETTER OPEN O"; + case U'\u0255': + return U"LATIN SMALL LETTER C WITH CURL"; + case U'\u0256': + return U"LATIN SMALL LETTER D WITH TAIL"; + case U'\u0257': + return U"LATIN SMALL LETTER D WITH HOOK"; + case U'\u0258': + return U"LATIN SMALL LETTER REVERSED E"; + case U'\u0259': + return U"LATIN SMALL LETTER SCHWA"; + case U'\u025A': + return U"LATIN SMALL LETTER SCHWA WITH HOOK"; + case U'\u025B': + return U"LATIN SMALL LETTER OPEN E"; + case U'\u025C': + return U"LATIN SMALL LETTER REVERSED OPEN E"; + case U'\u025D': + return U"LATIN SMALL LETTER REVERSED OPEN E WITH HOOK"; + case U'\u025E': + return U"LATIN SMALL LETTER CLOSED REVERSED OPEN E"; + case U'\u025F': + return U"LATIN SMALL LETTER DOTLESS J WITH STROKE"; + case U'\u0260': + return U"LATIN SMALL LETTER G WITH HOOK"; + case U'\u0261': + return U"LATIN SMALL LETTER SCRIPT G"; + case U'\u0262': + return U"LATIN LETTER SMALL CAPITAL G"; + case U'\u0263': + return U"LATIN SMALL LETTER GAMMA"; + case U'\u0264': + return U"LATIN SMALL LETTER RAMS HORN"; + case U'\u0265': + return U"LATIN SMALL LETTER TURNED H"; + case U'\u0266': + return U"LATIN SMALL LETTER H WITH HOOK"; + case U'\u0267': + return U"LATIN SMALL LETTER HENG WITH HOOK"; + case U'\u0268': + return U"LATIN SMALL LETTER I WITH STROKE"; + case U'\u0269': + return U"LATIN SMALL LETTER IOTA"; + case U'\u026A': + return U"LATIN LETTER SMALL CAPITAL I"; + case U'\u026B': + return U"LATIN SMALL LETTER L WITH MIDDLE TILDE"; + case U'\u026C': + return U"LATIN SMALL LETTER L WITH BELT"; + case U'\u026D': + return U"LATIN SMALL LETTER L WITH RETROFLEX HOOK"; + case U'\u026E': + return U"LATIN SMALL LETTER LEZH"; + case U'\u026F': + return U"LATIN SMALL LETTER TURNED M"; + case U'\u0270': + return U"LATIN SMALL LETTER TURNED M WITH LONG LEG"; + case U'\u0271': + return U"LATIN SMALL LETTER M WITH HOOK"; + case U'\u0272': + return U"LATIN SMALL LETTER N WITH LEFT HOOK"; + case U'\u0273': + return U"LATIN SMALL LETTER N WITH RETROFLEX HOOK"; + case U'\u0274': + return U"LATIN LETTER SMALL CAPITAL N"; + case U'\u0275': + return U"LATIN SMALL LETTER BARRED O"; + case U'\u0276': + return U"LATIN LETTER SMALL CAPITAL OE"; + case U'\u0277': + return U"LATIN SMALL LETTER CLOSED OMEGA"; + case U'\u0278': + return U"LATIN SMALL LETTER PHI"; + case U'\u0279': + return U"LATIN SMALL LETTER TURNED R"; + case U'\u027A': + return U"LATIN SMALL LETTER TURNED R WITH LONG LEG"; + case U'\u027B': + return U"LATIN SMALL LETTER TURNED R WITH HOOK"; + case U'\u027C': + return U"LATIN SMALL LETTER R WITH LONG LEG"; + case U'\u027D': + return U"LATIN SMALL LETTER R WITH TAIL"; + case U'\u027E': + return U"LATIN SMALL LETTER R WITH FISHHOOK"; + case U'\u027F': + return U"LATIN SMALL LETTER REVERSED R WITH FISHHOOK"; + case U'\u0280': + return U"LATIN LETTER SMALL CAPITAL R"; + case U'\u0281': + return U"LATIN LETTER SMALL CAPITAL INVERTED R"; + case U'\u0282': + return U"LATIN SMALL LETTER S WITH HOOK"; + case U'\u0283': + return U"LATIN SMALL LETTER ESH"; + case U'\u0284': + return U"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK"; + case U'\u0285': + return U"LATIN SMALL LETTER SQUAT REVERSED ESH"; + case U'\u0286': + return U"LATIN SMALL LETTER SH WITH CURL"; + case U'\u0287': + return U"LATIN SMALL LETTER TURNED T"; + case U'\u0288': + return U"LATIN SMALL LETTER T WITH RETROFLEX HOOK"; + case U'\u0289': + return U"LATIN SMALL LETTER U BAR"; + case U'\u028A': + return U"LATIN SMALL LETTER UPSILON"; + case U'\u028B': + return U"LATIN SMALL LETTER V WTIH HOOK"; + case U'\u028C': + return U"LATIN SMALL LETTER TURNED V"; + case U'\u028D': + return U"LATIN SMALL LETTER TURNED W"; + case U'\u028E': + return U"LATIN SMALL LETTER TURNED Y"; + case U'\u028F': + return U"LATIN LETTER SMALL CAPITAL Y"; + case U'\u0290': + return U"LATIN SMALL LETTER Z WITH RETROFLEX HOOK"; + case U'\u0291': + return U"LATIN SMALL LETTER Z WITH RETROFLEX"; + case U'\u0292': + return U"LATIN SMALL LETTER EZH"; + case U'\u0293': + return U"LATIN SMALL LETTER EZH WITH CURL"; + case U'\u0294': + return U"LATIN LETTER GLOTTAL STOP"; + case U'\u0295': + return U"LATIN LETTER PHARYNGEAL VOICED FRICATIVE"; + case U'\u0296': + return U"LATIN LETTER INVERTED GLOTTAL STOP"; + case U'\u0297': + return U"LATIN LETTER STRETCHED C"; + case U'\u0298': + return U"LATIN LETTER BILABIAL CLICK"; + case U'\u0299': + return U"LATIN LETTER SMALL CAPITAL B"; + case U'\u029A': + return U"LATIN SMALL LETTER CLOSED OPEN E"; + case U'\u029B': + return U"LATIN LETTER SMALL CAPITAL G WITH HOOK"; + case U'\u029C': + return U"LATIN LETTER SMALL CAPITAL H"; + case U'\u029D': + return U"LATIN SMALL LETTER J WITH CROSSED-TAIL"; + case U'\u029E': + return U"LATIN SMALL LETTER TURNED K"; + case U'\u029F': + return U"LATIN LETTER SMALL CAPITAL L"; + case U'\u02A0': + return U"LATIN SMALL LETTER Q WITH HOOK"; + case U'\u02A1': + return U"LATIN LETTER GLOTTAL STOP WITH STROKE"; + case U'\u02A2': + return U"LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE"; + case U'\u02A3': + return U"LATIN SMALL LETTER DZ DIGRAPH"; + case U'\u02A4': + return U"LATIN SMALL LETTER DEZH DIGRAPH"; + case U'\u02A5': + return U"LATIN SMALL LETTER DZ DIGRAPH WITH CURL"; + case U'\u02A6': + return U"LATIN SMALL LETTER TS DIGRAPH"; + case U'\u02A7': + return U"LATIN SMALL LETTER TESH DIGRAPH"; + case U'\u02A8': + return U"LATIN SMALL LETTER TC DIGRAPH WITH CURL"; + case U'\u02A9': + return U"LATIN SMALL LETTER FENG DIGRAPH"; + case U'\u02AA': + return U"LATIN SMALL LETTER LS DIGRAPH"; + case U'\u02AB': + return U"LATIN SMALL LETTER LZ DIGRAPH"; + case U'\u02AC': + return U"LATIN LETTER BILABIAL PERCUSSIVE"; + case U'\u02AD': + return U"LATIN LETTER BIDENTAL PERCUSSIVE"; + case U'\u02AE': + return U"LATIN SMALL LETTER TURNED H WITH FISHHOOK"; + case U'\u02AF': + return U"LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL"; + /* SPACING MODIFIER LETTERS: */ + case U'\u02B0': + return U"MODIFIER LETTER SMALL H"; + case U'\u02B1': + return U"MODIFIER LETTER SMALL H WITH HOOK"; + case U'\u02B2': + return U"MODIFIER LETTER SMALL J"; + case U'\u02B3': + return U"MODIFIER LETTER SMALL R"; + case U'\u02B4': + return U"MODIFIER LETTER SMALL TURNED R"; + case U'\u02B5': + return U"MODIFIER LETTER SMALL TURNED R WITH HOOK"; + case U'\u02B6': + return U"MODIFIER LETTER SMALL CAPITAL INVERTED R"; + case U'\u02B7': + return U"MODIFIER LETTER SMALL W"; + case U'\u02B8': + return U"MODIFIER LETTER SMALL Y"; + case U'\u02B9': + return U"MODIFIER LETTER PRIME"; + case U'\u02BA': + return U"MODIFIER LETTER DOUBLE PRIME"; + case U'\u02BB': + return U"MODIFIER LETTER TURNED COMMA"; + case U'\u02BC': + return U"MODIFIER LETTER APOSTROPHE"; + case U'\u02BD': + return U"MODIFIER LETTER REVERSED COMMA"; + case U'\u02BE': + return U"MODIFIER LETTER RIGHT HALF RING"; + case U'\u02BF': + return U"MODIFIER LETTER LEFT HALF RING"; + case U'\u02C0': + return U"MODIFIER LETTER GLOTTAL STOP"; + case U'\u02C1': + return U"MODIFIER LETTER REVERSED GLOTTAL STOP"; + case U'\u02C2': + return U"MODIFIER LETTER LEFT ARROWHEAD"; + case U'\u02C3': + return U"MODIFIER LETTER RIGHT ARROWHEAD"; + case U'\u02C4': + return U"MODIFIER LETTER UP ARROWHEAD"; + case U'\u02C5': + return U"MODIFIER LETTER DOWN ARROWHEAD"; + case U'\u02C6': + return U"MODIFIER LETTER CIRCUMFLEX"; + case U'\u02C7': + return U"CARON"; + case U'\u02C8': + return U"MODIFIER LETTER VERTICAL LINE"; + case U'\u02C9': + return U"MODIFIER LETTER MACRON"; + case U'\u02CA': + return U"MODIFIER LETTER ACUTE ACCENT"; + case U'\u02CB': + return U"MODIFIER LETTER GRAVE ACCENT"; + case U'\u02CC': + return U"MODIFIER LETTER LOW VERTICAL LINE"; + case U'\u02CD': + return U"MODIFIER LETTER LOW MACRON"; + case U'\u02CE': + return U"MODIFIER LETTER LOW GRAVE ACCENT"; + case U'\u02CF': + return U"MODIFIER LETTER LOW ACUTE ACCENT"; + case U'\u02D0': + return U"MODIFIER LETTER TRIANGULAR COLON"; + case U'\u02D1': + return U"MODIFIER LETTER HALF TRIANGULAR COLON"; + case U'\u02D2': + return U"MODIFIER LETTER CENTRED RIGHT HALF RING"; + case U'\u02D3': + return U"MODIFIER LETTER CENTRED LEFT HALF RING"; + case U'\u02D4': + return U"MODIFIER LETTER UP TACK"; + case U'\u02D5': + return U"MODIFIER LETTER DOWN TACK"; + case U'\u02D6': + return U"MODIFIER LETTER PLUS SIGN"; + case U'\u02D7': + return U"MODIFIER LETTER MINUS SIGN"; + case U'\u02D8': + return U"BREVE"; + case U'\u02D9': + return U"DOT ABOVE"; + case U'\u02DA': + return U"RING ABOVE"; + case U'\u02DB': + return U"OGONEK"; + case U'\u02DC': + return U"SMALL TILDE"; + case U'\u02DD': + return U"DOUBLE ACUTE ACCENT"; + case U'\u02DE': + return U"MODIFIER LETTER RHOTIC HOOK"; + case U'\u02DF': + return U"MODIFIER LETTER CROSS ACCENT"; + case U'\u02E0': + return U"MODIFIER LETTER SMALL GAMMA"; + case U'\u02E1': + return U"MODIFIER LETTER SMALL L"; + case U'\u02E2': + return U"MODIFIER LETTER SMALL S"; + case U'\u02E3': + return U"MODIFIER LETTER SMALL X"; + case U'\u02E4': + return U"MODIFIER LETTER SMALL REVERSED GLOTTAL STOP"; + case U'\u02E5': + return U"MODIFIER LETTER EXTRA-HIGH TONE BAR"; + case U'\u02E6': + return U"MODIFIER LETTER HIGH TONE BAR"; + case U'\u02E7': + return U"MODIFIER LETTER MID TONE BAR"; + case U'\u02E8': + return U"MODIFIER LETTER LOW TONE BAR"; + case U'\u02E9': + return U"MODIFIER LETTER EXTRA-LOW TONE BAR"; + case U'\u02EA': + return U"MODIFIER LETTER YIN DEPARTING TONE MARK"; + case U'\u02EB': + return U"MODIFIER LETTER YANG DEPARTING TONE MARK"; + case U'\u02EC': + return U"MODIFIER LETTER VOICING"; + case U'\u02ED': + return U"MODIFIER LETTER UNASPIRATED"; + case U'\u02EE': + return U"MODIFIER LETTER DOUBLE APOSTROPHE"; + case U'\u02EF': + return U"MODIFIER LETTER LOW DOWN ARROWHEAD"; + case U'\u02F0': + return U"MODIFIER LETTER LOW UP ARROWHEAD"; + case U'\u02F1': + return U"MODIFIER LETTER LOW LEFT ARROWHEAD"; + case U'\u02F2': + return U"MODIFIER LETTER LOW RIGHT ARROWHEAD"; + case U'\u02F3': + return U"MODIFIER LETTER LOW RING"; + case U'\u02F4': + return U"MODIFIER LETTER MIDDLE GRAVE ACCENT"; + case U'\u02F5': + return U"MODIFIER LETTER MIDDLE DOUBLE GRAVE ACCENT"; + case U'\u02F6': + return U"MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT"; + case U'\u02F7': + return U"MODIFIER LETTER LOW TILDE"; + case U'\u02F8': + return U"MODIFIER LETTER RAISED COLON"; + case U'\u02F9': + return U"MODIFIER LETTER BEGIN HIGH TONE"; + case U'\u02FA': + return U"MODIFIER LETTER END HIGH TONE"; + case U'\u02FB': + return U"MODIFIER LETTER BEGIN LOW TONE"; + case U'\u02FC': + return U"MODIFIER LETTER END LOW TONE"; + case U'\u02FD': + return U"MODIFIER LETTER SHELF"; + case U'\u02FE': + return U"MODIFIER LETTER OPEN SHELF"; + case U'\u02FF': + return U"MODIFIER LETTER LOW LEFT ARROWHEAD"; + /* COMBINING DIACRITICAL MARKS: */ + case U'\u0300': + return U"COMBINING GRAVE ACCENT"; + case U'\u0301': + return U"COMBINING ACUTE ACCENT"; + case U'\u0302': + return U"COMBINING CIRCUMFLEX ACCENT"; + case U'\u0303': + return U"COMBINING TILDE"; + case U'\u0304': + return U"COMBINING MACRON"; + case U'\u0305': + return U"COMBINING OVERLINE"; + case U'\u0306': + return U"COMBINING BREVE"; + case U'\u0307': + return U"COMBINING DOT ABOVE"; + case U'\u0308': + return U"COMBINING DIAERESIS"; + case U'\u0309': + return U"COMBINING HOOK ABOVE"; + case U'\u030A': + return U"COMBINING RING ABOVE"; + case U'\u030B': + return U"COMBINING DOUBLE ACUTE ACCENT"; + case U'\u030C': + return U"COMBINING CARON"; + case U'\u030D': + return U"COMBINING VERTICAL LINE ABOVE"; + case U'\u030E': + return U"COMBINING DOUBLE VERTICAL LINE ABOVE"; + case U'\u030F': + return U"COMBINING DOUBLE GRAVE ACCENT"; + case U'\u0310': + return U"COMBINING CANDRABINDU"; + case U'\u0311': + return U"COMBINING INVERTED BREVE"; + case U'\u0312': + return U"COMBINING TURNED COMMA ABOVE"; + case U'\u0313': + return U"COMBINING COMMA ABOVE"; + case U'\u0314': + return U"COMBINING REVERSED COMMA ABOVE"; + case U'\u0315': + return U"COMBINING COMMA ABOVE RIGHT"; + case U'\u0316': + return U"COMBINING GRAVE ACCENT BELOW"; + case U'\u0317': + return U"COMBINING ACUTE ACCENT BELOW"; + case U'\u0318': + return U"COMBINING LEFT TACK BELOW"; + case U'\u0319': + return U"COMBINING RIGHT TACK BELOW"; + case U'\u031A': + return U"COMBINING LEFT ANGLE ABOVE"; + case U'\u031B': + return U"COMBINING HORN"; + case U'\u031C': + return U"COMBINING LEFT HALF RING BELOW"; + case U'\u031D': + return U"COMBINING UP TACK BELOW"; + case U'\u031E': + return U"COMBINING DOWN TACK BELOW"; + case U'\u031F': + return U"COMBINING PLUS SIGN BELOW"; + case U'\u0320': + return U"COMBINING MINUS SIGN BELOW"; + case U'\u0321': + return U"COMBINING PALATALIZED HOOK BELOW"; + case U'\u0322': + return U"COMBINING RETROFLEX HOOK BELOW"; + case U'\u0323': + return U"COMBINING DOT BELOW"; + case U'\u0324': + return U"COMBINING DIAERESIS BELOW"; + case U'\u0325': + return U"COMBINING RING BELOW"; + case U'\u0326': + return U"COMBINING COMMA BELOW"; + case U'\u0327': + return U"COMBINING CEDILLA"; + case U'\u0328': + return U"COMBINING OGONEK"; + case U'\u0329': + return U"COMBINING VERTICAL LINE BELOW"; + case U'\u032A': + return U"COMBINING BRDIGE BELOW"; + case U'\u032B': + return U"COMBINING INVERTED DOUBLE ARCH BELOW"; + case U'\u032C': + return U"COMBINING CARON BELOW"; + case U'\u032D': + return U"COMBINING CIRCUMFLEX ACCENT BELOW"; + case U'\u032E': + return U"COMBINING BREVE BELOW"; + case U'\u032F': + return U"COMBINING INVERTED BREVE BELOW"; + case U'\u0330': + return U"COMBINING TILDE BELOW"; + case U'\u0331': + return U"COMBINING MACRON BELOW"; + case U'\u0332': + return U"COMBINING LOW LINE"; + case U'\u0333': + return U"COMBINING DOUBLE LOW LINE"; + case U'\u0334': + return U"COMBINING TILDE OVERLAY"; + case U'\u0335': + return U"COMBINING SHORT STROKE OVERLAY"; + case U'\u0336': + return U"COMBINING LONG STROKE OVERLAY"; + case U'\u0337': + return U"COMBINING SHORT SOLIDUS OVERLAY"; + case U'\u0338': + return U"COMBINING LONG SOLIDUS OVERLAY"; + case U'\u0339': + return U"COMBINING RIGHT HALF RING BELOW"; + case U'\u033A': + return U"COMBINING INVERTED BRIDGE BELOW"; + case U'\u033B': + return U"COMBINING SQUARE BELOW"; + case U'\u033C': + return U"COMBINING SEAGULL BELOW"; + case U'\u033D': + return U"COMBINING X ABOVE"; + case U'\u033E': + return U"COMBINING VERTICAL TILDE"; + case U'\u033F': + return U"COMBINING DOUBLE OVERLINE"; + case U'\u0340': + return U"COMBINING GRAVE TONE MARK"; + case U'\u0341': + return U"COMBINING ACUTE TONE MARK"; + case U'\u0342': + return U"COMBINING GREEK PERISPOMENI"; + case U'\u0343': + return U"COMBINING GREEK KORONIS"; + case U'\u0344': + return U"COMBINING GREEK DIALYTIKA TONOS"; + case U'\u0345': + return U"COMBINING GREEK YPOGEGRAMMENI"; + case U'\u0346': + return U"COMBINING BRIDGE ABOVE"; + case U'\u0347': + return U"COMBINING EQUALS SIGN BELOW"; + case U'\u0348': + return U"COMBINING DOUBLE VERTICAL LINE BELOW"; + case U'\u0349': + return U"COMBINING LEFT ANGLE BELOW"; + case U'\u034A': + return U"COMBINING NOT TILDE ABOVE"; + case U'\u034B': + return U"COMBINING HOMOTHETIC ABOVE"; + case U'\u034C': + return U"COMBINING ALMOST EQUAL TO ABOVE"; + case U'\u034D': + return U"COMBINING LEFT RIGHT ARROW BELOW"; + case U'\u034E': + return U"COMBINING UPWARDS ARROW BELOW"; + case U'\u034F': + return U"COMBINING GRAPHEME JOINER"; + case U'\u0350': + return U"COMBINING RIGHT ARROWHEAD ABOVE"; + case U'\u0351': + return U"COMBINING LEFT HALF RING ABOVE"; + case U'\u0352': + return U"COMBINING FERMATA"; + case U'\u0353': + return U"COMBINING X BELOW"; + case U'\u0354': + return U"COMBINING LEFT ARROWHEAD BELOW"; + case U'\u0355': + return U"COMBINING RIGHT ARROWHEAD BELOW"; + case U'\u0356': + return U"COMBINING RIGHT ARROWHEAD AND UP ARROWHEAD BELOW"; + case U'\u0357': + return U"COMBINING RIGHT HALF RING ABOVE"; + case U'\u0358': + return U"COMBINING DOT ABOVE RIGHT"; + case U'\u0359': + return U"COMBINING ASTERISK BELOW"; + case U'\u035A': + return U"COMBINING DOUBLE RING BELOW"; + case U'\u035B': + return U"COMBINING ZIGZAG ABOVE"; + case U'\u035C': + return U"COMBINING DOUBLE BREVE BELOW"; + case U'\u035D': + return U"COMBINING DOUBLE BREVE"; + case U'\u035E': + return U"COMBINING DOUBLE MACRON"; + case U'\u035F': + return U"COMBINING DOUBLE MACRON BELOW"; + case U'\u0360': + return U"COMBINING DOUBLE TILDE"; + case U'\u0361': + return U"COMBINING DOUBLE INVERTED BREVE"; + case U'\u0362': + return U"COMBINING DOUBLE RIGHTWARDS ARROW BELOW"; + case U'\u0363': + return U"COMBINING LATIN SMALL LETTER A"; + case U'\u0364': + return U"COMBINING LATIN SMALL LETTER E"; + case U'\u0365': + return U"COMBINING LATIN SMALL LETTER I"; + case U'\u0366': + return U"COMBINING LATIN SMALL LETTER O"; + case U'\u0367': + return U"COMBINING LATIN SMALL LETTER U"; + case U'\u0368': + return U"COMBINING LATIN SMALL LETTER C"; + case U'\u0369': + return U"COMBINING LATIN SMALL LETTER D"; + case U'\u036A': + return U"COMBINING LATIN SMALL LETTER H"; + case U'\u036B': + return U"COMBINING LATIN SMALL LETTER M"; + case U'\u036C': + return U"COMBINING LATIN SMALL LETTER R"; + case U'\u036D': + return U"COMBINING LATIN SMALL LETTER T"; + case U'\u036E': + return U"COMBINING LATIN SMALL LETTER V"; + case U'\u036F': + return U"COMBINING LATIN SMALL LETTER X"; + /* GREEK AND COPTIC: */ + case U'\u0370': + return U"GREEK CAPITAL LETTER HETA"; + case U'\u0371': + return U"GREEK SMALL LETTER HETA"; + case U'\u0372': + return U"GREEK CAPITAL LETTER ARCHAIC SAMPI"; + case U'\u0373': + return U"GREEK SMALL LETTER ARCHAIC SAMPI"; + case U'\u0374': + return U"GREEK NUMERAL SIGN"; + case U'\u0375': + return U"GREEK LOWER NUMERAL SIGN"; + case U'\u0376': + return U"GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA"; + case U'\u0377': + return U"GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"; + case U'\u037A': + return U"GREEK YPOGEGRAMMENI"; + case U'\u037B': + return U"GREEK SMALL REVERSED LUNATE SIGMA SYMBOL"; + case U'\u037C': + return U"GREEK SMALL DOTTED LUNATE SIGMA SYMBOL"; + case U'\u037D': + return U"GREEK SMALL REVERSED DOTTED LUNATE SIGMAL SYMBOL"; + case U'\u037E': + return U"GREEK QUESTION MARK"; + case U'\u037F': + return U"GREEK CAPITAL LETTER YOT"; + case U'\u0384': + return U"GREEK TONOS"; + case U'\u0385': + return U"GREEK DIALYTIKA TONOS"; + case U'\u0386': + return U"GREEK CAPITAL LETTER ALPHA WITH TONOS"; + case U'\u0387': + return U"GREEK ANO TELEIA"; + case U'\u0388': + return U"GREEK CAPITAL LETTER EPSILON WITH TONOS"; + case U'\u0389': + return U"GREEK CAPITAL LETTER ETA WITH TONOS"; + case U'\u038A': + return U"GREEK CAPITAL LETTER IOTA WITH TONOS"; + case U'\u038C': + return U"GREEK CAPITAL LETTER OMICRON WITH TONOS"; + case U'\u038E': + return U"GREEK CAPITAL LETTER USPILON WITH TONOS"; + case U'\u038F': + return U"GREEK CAPITAL LETTER OMEGA WITH TONOS"; + case U'\u0390': + return U"GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"; + case U'\u0391': + return U"GREEK CAPITAL LETTER ALPHA"; + case U'\u0392': + return U"GREEK CAPITAL LETTER BETA"; + case U'\u0393': + return U"GREEK CAPITAL LETTER GAMMA"; + case U'\u0394': + return U"GREEK CAPITAL LETTER DELTA"; + case U'\u0395': + return U"GREEK CAPITAL LETTER EPSILON"; + case U'\u0396': + return U"GREEK CAPITAL LETTER ZETA"; + case U'\u0397': + return U"GREEK CAPITAL LETTER ETA"; + case U'\u0398': + return U"GREEK CAPITAL LETTER THETA"; + case U'\u0399': + return U"GREEK CAPITAL LETTER IOTA"; + case U'\u039A': + return U"GREEK CAPITAL LETTER KAPPA"; + case U'\u039B': + return U"GREEK CAPITAL LETTER LAMBDA"; + case U'\u039C': + return U"GREEK CAPITAL LETTER MU"; + case U'\u039D': + return U"GREEK CAPITAL LETTER NU"; + case U'\u039E': + return U"GREEK CAPITAL LETTER XI"; + case U'\u039F': + return U"GREEK CAPITAL LETTER OMICRON"; + case U'\u03A0': + return U"GREEK CAPITAL LETTER PI"; + case U'\u03A1': + return U"GREEK CAPITAL LETTER RHO"; + case U'\u03A3': + return U"GREEK CAPITAL LETTER SIGMA"; + case U'\u03A4': + return U"GREEK CAPITAL LETTER TAU"; + case U'\u03A5': + return U"GREEK CAPITAL LETTER UPSILON"; + case U'\u03A6': + return U"GREEK CAPITAL LETTER PHI"; + case U'\u03A7': + return U"GREEK CAPITAL LETTER CHI"; + case U'\u03A8': + return U"GREEK CAPITAL LETTER PSI"; + case U'\u03A9': + return U"GREEK CAPITAL LETTER OMEGA"; + case U'\u03AA': + return U"GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"; + case U'\u03AB': + return U"GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"; + case U'\u03AC': + return U"GREEK SMALL LETTER ALPHA WITH TONOS"; + case U'\u03AD': + return U"GREEK SMALL LETTER EPSILON WITH TONOS"; + case U'\u03AE': + return U"GREEK SMALL LETTER ETA WITH TONOS"; + case U'\u03AF': + return U"GREEK SMALL LETTER IOTA WITH TONOS"; + case U'\u03B0': + return U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"; + case U'\u03B1': + return U"GREEK SMALL LETTER ALPHA"; + case U'\u03B2': + return U"GREEK SMALL LETTER BETA"; + case U'\u03B3': + return U"GREEK SMALL LETTER GAMMA"; + case U'\u03B4': + return U"GREEK SMALL LETTER DELTA"; + case U'\u03B5': + return U"GREEK SMALL LETTER EPSILON"; + case U'\u03B6': + return U"GREEK SMALL LETTER ZETA"; + case U'\u03B7': + return U"GREEK SMALL LETTER ETA"; + case U'\u03B8': + return U"GREEK SMALL LETTER THETA"; + case U'\u03B9': + return U"GREEK SMALL LETTER IOTA"; + case U'\u03BA': + return U"GREEK SMALL LETTER KAPPA"; + case U'\u03BB': + return U"GREEK SMALL LETTER LAMBDA"; + case U'\u03BC': + return U"GREEK SMALL LETTER MU"; + case U'\u03BD': + return U"GREEK SMALL LETTER NU"; + case U'\u03BE': + return U"GREEK SMALL LETTER XI"; + case U'\u03BF': + return U"GREEK SMALL LETTER OMICRON"; + case U'\u03C0': + return U"GREEK SMALL LETTER PI"; + case U'\u03C1': + return U"GREEK SMALL LETTER RHO"; + case U'\u03C2': + return U"GREEK SMALL LETTER FINAL SIGMA"; + case U'\u03C3': + return U"GREEK SMALL LETTER SIGMA"; + case U'\u03C4': + return U"GREEK SMALL LETTER TAU"; + case U'\u03C5': + return U"GREEK SMALL LETTER UPSILON"; + case U'\u03C6': + return U"GREEK SMALL LETTER PHI"; + case U'\u03C7': + return U"GREEK SMALL LETTER CHI"; + case U'\u03C8': + return U"GREEK SMALL LETTER PSI"; + case U'\u03C9': + return U"GREEK SMALL LETTER OMEGA"; + case U'\u03CA': + return U"GREEK SMALL LETTER IOTA WITH DIALYTIKA"; + case U'\u03CB': + return U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA"; + case U'\u03CC': + return U"GREEK SMALL LETTER OMICRON WITH TONOS"; + case U'\u03CD': + return U"GREEK SMALL LETTER UPSILON WITH TONOS"; + case U'\u03CE': + return U"GREEK SMALL LETTER OMEGA WITH TONOS"; + case U'\u03CF': + return U"GREEK CAPITAL KAI SYMBOL"; + case U'\u03D0': + return U"GREEK BETA SYMBOL"; + case U'\u03D1': + return U"GREEK THETA SYMBOL"; + case U'\u03D2': + return U"GREEK UPSILON WITH HOOK SYMBOL"; + case U'\u03D3': + return U"GREEK UPSILON WITH ACUTE AND HOOK SYMBOL"; + case U'\u03D4': + return U"GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL"; + case U'\u03D5': + return U"GREEK PHI SYMBOL"; + case U'\u03D6': + return U"GREEK PI SYMBOL"; + case U'\u03D7': + return U"GREEK KAI SYMBOL"; + case U'\u03D8': + return U"GREEK LETTER ARCHAIC KOPPA"; + case U'\u03D9': + return U"GREEK SMALL LETTER ARCHAIC KOPPA"; + case U'\u03DA': + return U"GREEK LETTER STIGMA"; + case U'\u03DB': + return U"GREEK SMALL LETTER STIGMA"; + case U'\u03DC': + return U"GREEK LETTER DIGAMMA"; + case U'\u03DD': + return U"GREEK SMALL LETTER DIGAMMA"; + case U'\u03DE': + return U"GREEK LETTER KOPPA"; + case U'\u03DF': + return U"GREEK SMALL LETTER KOPPA"; + case U'\u03E0': + return U"GREEK LETTER SAMPI"; + case U'\u03F0': + return U"GREEK KAPPA SYMBOL"; + /* HEBREW: */ + case U'\u05D0': + return U"HEBREW LETTER ALEF"; + case U'\u05D1': + return U"HEBREW LETTER BET"; + case U'\u05D2': + return U"HEBREW LETTER GIMEL"; + case U'\u05D3': + return U"HEBREW LETTER DALET"; + case U'\u05D4': + return U"HEBREW LETTER HE"; + case U'\u05D5': + return U"HEBREW LETTER VAV"; + case U'\u05D6': + return U"HEBREW LETTER ZAYIN"; + case U'\u05D7': + return U"HEBREW LETTER HET"; + case U'\u05D8': + return U"HEBREW LETTER TET"; + case U'\u05D9': + return U"HEBREW LETTER YOD"; + case U'\u05DA': + return U"HEBREW LETTER FINAL KAF"; + case U'\u05DB': + return U"HEBREW LETTER KAF"; + case U'\u05DC': + return U"HEBREW LETTER LAMED"; + case U'\u05DD': + return U"HEBREW LETTER FINAL MEM"; + case U'\u05DE': + return U"HEBREW LETTER MEM"; + case U'\u05DF': + return U"HEBREW LETTER FINAL NUN"; + case U'\u05E0': + return U"HEBREW LETTER NUN"; + case U'\u05E1': + return U"HEBREW LETTER SAMEKH"; + case U'\u05E2': + return U"HEBREW LETTER AYIN"; + case U'\u05E3': + return U"HEBREW LETTER FINAL PE"; + case U'\u05E4': + return U"HEBREW LETTER PE"; + case U'\u05E5': + return U"HEBREW LETTER FINAL TSADI"; + case U'\u05E6': + return U"HEBREW LETTER TSADI"; + case U'\u05E7': + return U"HEBREW LETTER QOF"; + case U'\u05E8': + return U"HEBREW LETTER RESH"; + case U'\u05E9': + return U"HEBREW LETTER SHIN"; + case U'\u05EA': + return U"HEBREW LETTER TAV"; + case U'\u05EF': + return U"HEBREW YOD TRIANGLE"; + /* CYRILLIC: */ + case U'\u0400': + return U"CYRILLIC CAPITAL LETTER LE WITH GRAVE"; + case U'\u0401': + return U"CYRILLIC CAPITAL LETTER LO"; + case U'\u0402': + return U"CYRILLIC CAPITAL LETTER DJE"; + case U'\u0403': + return U"CYRILLIC CAPITAL LETTER GJE"; + case U'\u0404': + return U"CYRILLIC CAPITAL LETTER UKRAINIAN LE"; + case U'\u0405': + return U"CYRILLIC CAPITAL LETTER DZE"; + case U'\u0406': + return U"CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I"; + case U'\u0407': + return U"CYRILLIC CAPITAL LETTER YI"; + case U'\u0408': + return U"CYRILLIC CAPITAL LETTER JE"; + case U'\u0409': + return U"CYRILLIC CAPITAL LETTER LJE"; + case U'\u040A': + return U"CYRILLIC CAPITAL LETTER NJE"; + case U'\u040B': + return U"CYRILLIC CAPITAL LETTER TSHE"; + case U'\u040C': + return U"CYRILLIC CAPITAL LETTER KJE"; + case U'\u040D': + return U"CYRILLIC CAPITAL LETTER I WITH GRAVE"; + case U'\u040E': + return U"CYRILLIC CAPITAL LETTER SHORT U"; + case U'\u040F': + return U"CYRILLIC CAPITAL LETTER DZHE"; + case U'\u0410': + return U"CYRILLIC CAPITAL LETTER A"; + case U'\u0420': + return U"CYRILLIC CAPITAL LETTER ER"; + case U'\u0430': + return U"CYRILLIC SMALL LETTER A"; + case U'\u0440': + return U"CYRILLIC SMALL LETTER ER"; + case U'\u0450': + return U"CYRILLIC SMALL LETTER LE WITH GRAVE"; + case U'\u0460': + return U"CYRILLIC CAPITAL LETTER OMEGA"; + case U'\u0470': + return U"CYRILLIC CAPITAL LETTER PSI"; + case U'\u0480': + return U"CYRILLIC CAPITAL LETTER KOPPA"; + case U'\u0490': + return U"CYRILLIC CAPITAL LETTER GHE WITH UPTURN"; + case U'\u04A0': + return U"CYRILLIC CAPITAL LETTER BASHKIR KA"; + case U'\u04B0': + return U"CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE"; + case U'\u04C0': + return U"CYRILLIC LETTER PALOCHKA"; + case U'\u04D0': + return U"CYRILLIC CAPITAL LETTER A WITH BREVE"; + case U'\u04E0': + return U"CYRILLIC CAPITAL LETTER ABKHASIAN DZE"; + case U'\u04F0': + return U"CYRILLIC CAPITAL LETTER U WITH DIAERESIS"; + /* SYRIAC SUPPLEMENT: */ + case U'\u0860': + return U"SYRIAC LETTER MALAYALAM NGA"; + case U'\u0861': + return U"SYRIAC LETTER MALAYALAM JA"; + case U'\u0862': + return U"SYRIAC LETTER MALAYALAM NYA"; + case U'\u0863': + return U"SYRIAC LETTER MALAYALAM TTA"; + case U'\u0864': + return U"SYRIAC LETTER MALAYALAM NNA"; + case U'\u0865': + return U"SYRIAC LETTER MALAYALAM NNNA"; + case U'\u0866': + return U"SYRIAC LETTER MALAYALAM BHA"; + case U'\u0867': + return U"SYRIAC LETTER MALAYALAM RA"; + case U'\u0868': + return U"SYRIAC LETTER MALAYALAM LLA"; + case U'\u0869': + return U"SYRIAC LETTER MALAYALAM LLLA"; + case U'\u086A': + return U"SYRIAC LETTER MALAYALAM SSA"; + /* RUNIC: */ + case U'\u16A0': + return U"RUNIC LETTER FEHU FEOH FE F"; + case U'\u16A1': + return U"RUNIC LETTER V"; + case U'\u16A2': + return U"RUNIC LETTER URUZ UR U"; + case U'\u16A3': + return U"RUNIC LETTER YR"; + case U'\u16A4': + return U"RUNIC LETTER Y"; + case U'\u16A5': + return U"RUNIC LETTER W"; + case U'\u16A6': + return U"RUNIC LETTER THURISAZ THURS THORN"; + case U'\u16A7': + return U"RUNIC LETTER ETH"; + case U'\u16A8': + return U"RUNIC LETTER ANSUZ A"; + case U'\u16A9': + return U"RUNIC LETTER OS O"; + case U'\u16AA': + return U"RUNIC LETTER AC A"; + case U'\u16AB': + return U"RUNIC LETTER AESC"; + case U'\u16AC': + return U"RUNIC LETTER LONG-BRANCHED-OSS O"; + case U'\u16AD': + return U"RUNIC LETTER SHORT-TWIG-OSS O"; + case U'\u16AE': + return U"RUNIC LETTER O"; + case U'\u16AF': + return U"RUNIC LETTER OE"; + case U'\u16B0': + return U"RUNIC LETTER ON"; + case U'\u16C0': + return U"RUNIC LETTER DOTTED-N"; + case U'\u16D0': + return U"RUNIC LETTER SHORT-TWIG-TYR T"; + case U'\u16E0': + return U"RUNIC LETTER EAR"; + case U'\u16F0': + return U"RUNIC BELGTHOR SYMBOL"; + /* CYRILLIC EXTENDED C: */ + case U'\u1C80': + return U"CYRILLIC SMALL LETTER ROUNDED VE"; + case U'\u1C81': + return U"CYRILLIC SMALL LETTER LONG-LEGGED DE"; + case U'\u1C82': + return U"CYRILLIC SMALL LETTER NARROW O"; + case U'\u1C83': + return U"CYRILLIC SMALL LETTER WIDE ES"; + case U'\u1C84': + return U"CYRILLIC SMALL LETTER TALL TE"; + case U'\u1C85': + return U"CYRILLIC SMALL LETTER THREE-LEGGED TE"; + case U'\u1C86': + return U"CYRILLIC SMALL LETTER TALL HARD SIGN"; + case U'\u1C87': + return U"CYRILLIC SMALL LETTER TALL YAT"; + case U'\u1C88': + return U"CYRILLIC SMALL LETTER UNBLENDED UK"; + /* GENERAL PUNCTUATION: */ + case U'\u2000': + return U"EN QUAD"; + case U'\u2001': + return U"EM QUAD"; + case U'\u2002': + return U"EN SPACE"; + case U'\u2003': + return U"EM SPACE"; + case U'\u2004': + return U"THREE-PER-EM SPACE"; + case U'\u2005': + return U"FOUR-PER-EM SPACE"; + case U'\u2006': + return U"SIX-PER-EM SPACE"; + case U'\u2007': + return U"FIGURE SPACE"; + case U'\u2008': + return U"PUNCTUATION SPACE"; + case U'\u2009': + return U"THIN SPACE"; + case U'\u200A': + return U"HAIR SPACE"; + case U'\u203C': + return U"DOUBLE EXCLAMATION MARK"; + case U'\u2047': + return U"DOUBLE QUOTATION MARK"; + case U'\u2048': + return U"QUESTION EXCLAMATION MARK"; + case U'\u2049': + return U"EXCLAMATION QUESTION MARK"; + /* CURRENCY SYMBOLS: */ + case U'\u20A0': + return U"EURO-CURRENCY SIGN"; + case U'\u20A1': + return U"COLON SIGN"; + case U'\u20A2': + return U"CRUZEIRO SIGN"; + case U'\u20A3': + return U"FRENCH FRANC SIGN"; + case U'\u20A4': + return U"LIRA SIGN"; + case U'\u20A5': + return U"MILL SIGN"; + case U'\u20A6': + return U"NAIRA SIGN"; + case U'\u20A7': + return U"PESETA SIGN"; + case U'\u20A8': + return U"RUPEE SIGN"; + case U'\u20A9': + return U"WON SIGN"; + case U'\u20AA': + return U"NEW SHEQEL SIGN"; + case U'\u20AB': + return U"DONG SIGN"; + case U'\u20AC': + return U"EURO SIGN"; + case U'\u20AD': + return U"KIP SIGN"; + case U'\u20AE': + return U"TUGRIK SIGN"; + case U'\u20AF': + return U"DRACHMA SIGN"; + case U'\u20B0': + return U"GERMAN PENNY SIGN"; + case U'\u20B1': + return U"PESO SIGN"; + case U'\u20B2': + return U"GUARANI SIGN"; + case U'\u20B3': + return U"AUSTRAL SIGN"; + case U'\u20B4': + return U"HRYVNIA SIGN"; + case U'\u20B5': + return U"CEDI SIGN"; + case U'\u20B6': + return U"LIVRE TOURNOIS SIGN"; + case U'\u20B7': + return U"SPESMILO SIGN"; + case U'\u20B8': + return U"TENGE SIGN"; + case U'\u20BA': + return U"TURKISH LIRA SIGN"; + case U'\u20BB': + return U"NORDIC MARK SIGN"; + case U'\u20BC': + return U"MANAT SIGN"; + case U'\u20BD': + return U"RUBLE SYMBOL"; + case U'\u20BE': + return U"LARI SIGN"; + case U'\u20BF': + return U"BITCOIN SIGN"; + /* LETTERLIKE SYMBOLS: */ + case U'\u2100': + return U"ACCOUNT OF"; + case U'\u2101': + return U"ADRESSED TO THE SUBJECT"; + case U'\u2102': + return U"DOUBLE-STRUCK CAPITAL C"; + case U'\u2103': + return U"DEGREE CELSIUS"; + case U'\u2104': + return U"CENTRE LINE SYMBOL"; + case U'\u2105': + return U"CARE OF"; + case U'\u2106': + return U"CADA UNA"; + case U'\u2107': + return U"EULER CONSTANT"; + case U'\u2108': + return U"SCRUPLE"; + case U'\u2109': + return U"DEGREE FAHRENHEIT"; + case U'\u210A': + return U"SCRIPT SMALL G"; + case U'\u210B': + return U"SCRIPT CAPITAL H"; + case U'\u210C': + return U"BLACK-LETTER CAPITAL H"; + case U'\u210D': + return U"DOUBLE-STRUCK CAPITAL H"; + case U'\u210E': + return U"PLANCK CONSTANT"; + case U'\u210F': + return U"PLANCK CONSTANT OVER TWO PI"; + case U'\u2110': + return U"SCRIPT CAPITAL I"; + case U'\u2111': + return U"BLACK-LETTER CAPITAL I"; + case U'\u2112': + return U"SCRIPT CAPITAL L"; + case U'\u2113': + return U"SCRIPT SMALL L"; + case U'\u2114': + return U"L B BAR SYMBOL"; + case U'\u2115': + return U"DOUBLE-STRUCK CAPITAL N"; + case U'\u2116': + return U"NUMERO SIGN"; + case U'\u2117': + return U"SOUND RECORDING COPYRIGHT"; + case U'\u2118': + return U"SCRIPT CAPITAL P"; + case U'\u2119': + return U"DOUBLE-STRUCK CAPITAL P"; + case U'\u211A': + return U"DOUBLE-STRUCK CAPITAL Q"; + case U'\u211B': + return U"SCRIPT CAPITAL R"; + case U'\u211C': + return U"BLACK-LETTER CAPITAL R"; + case U'\u211D': + return U"DOUBLE-STRUCK CAPITAL R"; + case U'\u211E': + return U"PRESCRIPTION TAKE"; + case U'\u211F': + return U"RESPONSE"; + case U'\u2120': + return U"SERVICE MARK"; + case U'\u2121': + return U"TELEPHONE SIGN"; + case U'\u2122': + return U"TRADE MARK SIGN"; + case U'\u2123': + return U"VERSICLE"; + case U'\u2124': + return U"DOUBLE-STRUCK CAPITAL Z"; + case U'\u2125': + return U"OUNCE SIGN"; + case U'\u2126': + return U"OHM SIGN"; + case U'\u2127': + return U"INVERTED OHM SIGN"; + case U'\u2128': + return U"BLACK-LETTER CAPITAL Z"; + case U'\u2129': + return U"TURNED GREEK SMALL LETTER IOTA"; + case U'\u212A': + return U"KELVIN SIGN"; + case U'\u212B': + return U"ANGSTROM SIGN"; + case U'\u212C': + return U"SCRIPT CAPITAL B"; + case U'\u212D': + return U"BLACK-LETTER CAPITAL C"; + case U'\u212E': + return U"ESTIMATED SYMBOL"; + case U'\u212F': + return U"SCRIPT SMALL E"; + case U'\u2130': + return U"SCRIPT CAPITAL E"; + case U'\u2131': + return U"SCRIPT CAPITAL F"; + case U'\u2132': + return U"TURNED CAPITAL F"; + case U'\u2133': + return U"SCRIPT CAPITAL M"; + case U'\u2134': + return U"SCRIPT SMALL O"; + case U'\u2135': + return U"ALEF SYMBOL"; + case U'\u2136': + return U"BET SYMBOL"; + case U'\u2137': + return U"GIMEL SYMBOL"; + case U'\u2138': + return U"DALET SYMBOL"; + case U'\u2139': + return U"INFORMATION SOURCE"; + case U'\u213A': + return U"ROTATED CAPITAL Q"; + case U'\u213B': + return U"FACSIMILE SIGN"; + case U'\u213C': + return U"DOUBLE-STRUCK SMALL PI"; + case U'\u213D': + return U"DOUBLE-STRUCK SMALL GAMMA"; + case U'\u213E': + return U"DOUBLE-STRUCK CAPITAL GAMMA"; + case U'\u213F': + return U"DOUBLE-STRUCK CAPITAL PI"; + case U'\u2140': + return U"DOUBLE-STRUCK N-ARY SUMMATION"; + case U'\u2141': + return U"TURNED SANS-SERIF CAPITAL G"; + case U'\u2142': + return U"TURNED SANS-SERIF CAPITAL L"; + case U'\u2143': + return U"REVERSED SANS-SERIF CAPITAL L"; + case U'\u2144': + return U"TURNED SANS-SERIF CAPITAL Y"; + case U'\u2145': + return U"DOUBLE-STRUCK ITALIC CAPITAL D"; + case U'\u2146': + return U"DOUBLE-STRUCK ITALIC SMALL D"; + case U'\u2147': + return U"DOUBLE-STRUCK ITALIC SMALL E"; + case U'\u2148': + return U"DOUBLE-STRUCK ITALIC SMALL I"; + case U'\u2149': + return U"DOUBLE-STRUCK ITALIC SMALL J"; + case U'\u214A': + return U"PROPERTY LINE"; + case U'\u214B': + return U"TURNED AMPERSAND"; + case U'\u214C': + return U"PER SIGN"; + case U'\u214D': + return U"AKTIESELSKAB"; + case U'\u214E': + return U"TURNED SMALL F"; + case U'\u214F': + return U"SYMBOL FOR SAMARITAN SOURCE"; + /* NUMBER FORMS: */ + case U'\u2150': + return U"VULGAR FRACTION ONE SEVENTH"; + case U'\u2151': + return U"VULGAR FRACTION ONE NINTH"; + case U'\u2152': + return U"VULGAR FRACTION ONE TENTH"; + case U'\u2153': + return U"VULGAR FRACTION ONE THIRD"; + case U'\u2154': + return U"VULGAR FRACTION TWO THIRDS"; + case U'\u2155': + return U"VULGAR FRACTION ONE FIFTH"; + case U'\u2156': + return U"VULGAR FRACTION TWO FIFTHS"; + case U'\u2157': + return U"VULGAR FRACTION THREE FIFTHS"; + case U'\u2158': + return U"VULGAR FRACTION FOUR FIFTHS"; + case U'\u2159': + return U"VULGAR FRACTION ONE SIXTH"; + case U'\u215A': + return U"VULGAR FRACTION FIVE SIXTHS"; + case U'\u215B': + return U"VULGAR FRACTION ONE EIGTH"; + case U'\u215C': + return U"VULGAR FRACTION THREE EIGTHS"; + case U'\u215D': + return U"VULGAR FRACTION FIVE EIGHTS"; + case U'\u215E': + return U"VULGAR FRACTION SEVEN EIGTHS"; + case U'\u215F': + return U"FRACTION NUMERATOR ONE"; + case U'\u2160': + return U"ROMAN NUMERAL ONE"; + case U'\u2161': + return U"ROMAN NUMERAL TWO"; + case U'\u2162': + return U"ROMAN NUMERAL THREE"; + case U'\u2163': + return U"ROMAN NUMERAL FOUR"; + case U'\u2164': + return U"ROMAN NUMERAL FIVE"; + case U'\u2165': + return U"ROMAN NUMERAL SIX"; + case U'\u2166': + return U"ROMAN NUMERAL SEVEN"; + case U'\u2167': + return U"ROMAN NUMERAL EIGHT"; + case U'\u2168': + return U"ROMAN NUMERAL NINE"; + case U'\u2169': + return U"ROMAN NUMERAL TEN"; + case U'\u216A': + return U"ROMAN NUMERAL ELEVEN"; + case U'\u216B': + return U"ROMAN NUMERAL TWELVE"; + case U'\u216C': + return U"ROMAN NUMERAL FIFTY"; + case U'\u216D': + return U"ROMAN NUMERAL ONE HUNDRED"; + case U'\u216E': + return U"ROMAN NUMERAL FIVE HUNDRED"; + case U'\u216F': + return U"ROMAN NUMERAL ONE THOUSAND"; + case U'\u2170': + return U"SMALL ROMAN NUMERAL ONE"; + case U'\u2171': + return U"SMALL ROMAN NUMERAL TWO"; + case U'\u2172': + return U"SMALL ROMAN NUMERAL THREE"; + case U'\u2173': + return U"SMALL ROMAN NUMERAL FOUR"; + case U'\u2174': + return U"SMALL ROMAN NUMERAL FIVE"; + case U'\u2175': + return U"SMALL ROMAN NUMERAL SIX"; + case U'\u2176': + return U"SMALL ROMAN NUMERAL SEVEN"; + case U'\u2177': + return U"SMALL ROMAN NUMERAL EIGHT"; + case U'\u2178': + return U"SMALL ROMAN NUMERAL NINE"; + case U'\u2179': + return U"SMALL ROMAN NUMERAL TEN"; + case U'\u217A': + return U"SMALL ROMAN NUMERAL ELEVEN"; + case U'\u217B': + return U"SMALL ROMAN NUMERAL TWELVE"; + case U'\u217C': + return U"SMALL ROMAN NUMERAL FIFTY"; + case U'\u217D': + return U"SMALL ROMAN NUMERAL ONE HUNDRED"; + case U'\u217E': + return U"SMALL ROMAN NUMERAL FIVE HUNDRED"; + case U'\u217F': + return U"SMALL ROMAN NUMERAL ONE THOUSAND"; + case U'\u2180': + return U"ROMAN NUMERAL ONE THOUSAND C D"; + case U'\u2181': + return U"ROMAN NUMERAL FIVE THOUSAND"; + case U'\u2182': + return U"ROMAN NUMERAL TEN THOUSAND"; + case U'\u2183': + return U"ROMAN NUMERAL REVERSED ONE HUNDRED"; + case U'\u2184': + return U"LATIN SMALL LETTER REVERSED C"; + case U'\u2185': + return U"ROMAN NUMERAL SIX LATE FORM"; + case U'\u2186': + return U"ROMAN NUMERAL FIFTY EARLY FORM"; + case U'\u2187': + return U"ROMAN NUMERAL FIFTY THOUSAND"; + case U'\u2188': + return U"ROMAN NUMERAL ONE HUNDRED THOUSAND"; + case U'\u2189': + return U"VULGAR FRACTION ZERO THIRDS"; + case U'\u218A': + return U"TURNED DIGIT TWO"; + case U'\u218B': + return U"TURNED DIGIT THREE"; + /* MISCELLANEOUS SYMBOLS: */ + case U'\u2630': + return U"TRIGRAM FOR HEAVEN"; + case U'\u2631': + return U"TRIGRAM FOR LAKE"; + case U'\u2632': + return U"TRIGRAM FOR FIRE"; + case U'\u2633': + return U"TRIGRAM FOR THUNDER"; + case U'\u2634': + return U"TRIGRAM FOR WIND"; + case U'\u2635': + return U"TRIGRAM FOR WATER"; + case U'\u2636': + return U"TRIGRAM FOR MOUNTAIN"; + case U'\u2637': + return U"TRIGRAM FOR EARTH"; + case U'\u2638': + return U"WHEEL OF DHARMA"; + case U'\u2639': + return U"WHITE FROWNING FACE"; + case U'\u263A': + return U"WHITE SMILING FACE"; + case U'\u263B': + return U"BLACK SMILING FACE"; + case U'\u263C': + return U"WHITE SUN WITH RAYS"; + case U'\u263D': + return U"FIRST QUARTER MOON"; + case U'\u263E': + return U"LAST QUARTER MOON"; + case U'\u263F': + return U"MERCURY"; + case U'\u2640': + return U"FEMALE SIGN"; + case U'\u2641': + return U"EARTH"; + case U'\u2642': + return U"MALE SIGN"; + case U'\u2643': + return U"JUPITER"; + case U'\u2644': + return U"SATURN"; + case U'\u2645': + return U"URANUS"; + case U'\u2646': + return U"NEPTUNE"; + case U'\u2647': + return U"PLUTO"; + case U'\u2648': + return U"ARIES"; + case U'\u2649': + return U"TAURUS"; + case U'\u264A': + return U"GEMNINI"; + case U'\u264B': + return U"CANCER"; + case U'\u264C': + return U"LEO"; + case U'\u264D': + return U"VIRGO"; + case U'\u264E': + return U"LIBRA"; + case U'\u264F': + return U"SCORPIUS"; + case U'\u2650': + return U"SAGITTARIUS"; + case U'\u2651': + return U"CAPRICORN"; + case U'\u2652': + return U"AQUARIUS"; + case U'\u2653': + return U"PISCES"; + case U'\u2654': + return U"WHITE CHESS KING"; + case U'\u2655': + return U"WHITE CHESS QUEEN"; + case U'\u2656': + return U"WHITE CHESS ROOK"; + case U'\u2657': + return U"WHITE CHESS BISHOP"; + case U'\u2658': + return U"WHITE CHESS KNIGHT"; + case U'\u2659': + return U"WHITE CHESS PAWN"; + case U'\u265A': + return U"BLACK CHESS KING"; + case U'\u265B': + return U"BLACK CHESS QUEEN"; + case U'\u265C': + return U"BLACK CHESS ROOK"; + case U'\u265D': + return U"BLACK CHESS BISHOP"; + case U'\u265E': + return U"BLACK CHESS KNIGHT"; + case U'\u265F': + return U"BLACK CHESS PAWN"; + case U'\u2660': + return U"BLACK SPADE SUIT"; + case U'\u2661': + return U"WHITE HEART SUIT"; + case U'\u2662': + return U"WHITE DIAMOND SUIT"; + case U'\u2663': + return U"BLACK CLUB SUIT"; + case U'\u2664': + return U"WHITE SPADE SUIT"; + case U'\u2665': + return U"BLACK HEART SUIT"; + case U'\u2666': + return U"BLACK DIAMOND SUIT"; + case U'\u2667': + return U"WHITE CLUB SUIT"; + case U'\u2668': + return U"HOT SPRINGS"; + case U'\u2669': + return U"QUARTER NOTE"; + case U'\u266A': + return U"EIGHT NOTE"; + case U'\u266B': + return U"BEAMED EIGTH NOTES"; + case U'\u266C': + return U"BEAMED SIXTEENTH NOTES"; + case U'\u266D': + return U"MUSIC FLAT SIGN"; + case U'\u266E': + return U"MUSIC NEUTRAL SIGN"; + case U'\u266F': + return U"MUSIC SHARP SIGN"; + case U'\u2670': + return U"WEST SYRIAC CROSS"; + case U'\u2671': + return U"EAST SYRIAC CROSS"; + case U'\u2672': + return U"UNIVERSAL RECYCLING SYMBOL"; + case U'\u2673': + return U"RECYCLING SYMBOL FOR TYPE-1 PLASTICS"; + case U'\u2674': + return U"RECYCLING SYMBOL FOR TYPE-2 PLASTICS"; + case U'\u2675': + return U"RECYCLING SYMBOL FOR TYPE-3 PLASTICS"; + case U'\u2676': + return U"RECYCLING SYMBOL FOR TYPE-4 PLASTICS"; + case U'\u2677': + return U"RECYCLING SYMBOL FOR TYPE-5 PLASTICS"; + case U'\u2678': + return U"RECYCLING SYMBOL FOR TYPE-6 PLASTICS"; + case U'\u2679': + return U"RECYCLING SYMBOL FOR TYPE-7 PLASTICS"; + case U'\u267A': + return U"RECYCLING SYMBOL FOR GENERIC MATERIALS"; + case U'\u267B': + return U"BLACK UNIVERSAL RECYCLING SYMBOL"; + case U'\u267C': + return U"RECYCLED PAPER SYMBOL"; + case U'\u267D': + return U"PARTIALLY-RECYCLED PAPER SYMBOL"; + case U'\u267E': + return U"PERMANENT PAPER SIGN"; + case U'\u267F': + return U"WHEELCHAIR SYMBOL"; + case U'\u26B9': + return U"SEXTILE"; + /* DINGBATS: */ + case U'\u271D': + return U"LATIN CROSS"; + case U'\u2721': + return U"STAR OF DAVID"; + /* SUPPLEMENTAL PUNCTUATION: */ + case U'\u2E3B': + return U"THREE-EM DASH"; + /* ARABIC PRESENTATION FORMS-A: */ + case U'\uFDFD': + return U"ARABIC LIGATURE BISMILLAH AL-RAHMAN AR-RAHEEM"; + /* ANCIENT SYMBOLS: */ + case U'\U00010190': + return U"ROMAN SEXTANS SIGN"; + case U'\U00010191': + return U"ROMAN UNCIA SIGN"; + case U'\U00010192': + return U"ROMAN SEMUNCIA SIGN"; + case U'\U00010193': + return U"ROMAN SEXTULA SIGN"; + case U'\U00010194': + return U"ROMAN DIMIDIA SEXTULA SIGN"; + case U'\U00010195': + return U"ROMAN SILIQUA SIGN"; + case U'\U00010196': + return U"ROMAN DENARIUS SIGN"; + case U'\U00010197': + return U"ROMAN QUINARIUS SIGN"; + case U'\U00010198': + return U"ROMAN SESTERTIUS SIGN"; + case U'\U00010199': + return U"ROMAN DUPONDIUS SIGN"; + case U'\U0001019A': + return U"ROMAN AS SIGN"; + case U'\U0001019B': + return U"ROMAN CENTURIAL SIGN"; + case U'\U0001019C': + return U"ASCIA SIGN"; + /* BRAHMI: */ + case U'\U00011066': + return U"BRAHMI DIGIT ZERO"; + case U'\U00011067': + return U"BRAHMI DIGIT ONE"; + case U'\U00011068': + return U"BRAHMI DIGIT TWO"; + case U'\U00011069': + return U"BRAHMI DIGIT THREE"; + case U'\U0001106A': + return U"BRAHMI DIGIT FOUR"; + case U'\U0001106B': + return U"BRAHMI DIGIT FIVE"; + case U'\U0001106C': + return U"BRAHMI DIGIT SIX"; + case U'\U0001106D': + return U"BRAHMI DIGIT SEVEN"; + case U'\U0001106E': + return U"BRAHMI DIGIT EIGHT"; + case U'\U0001106F': + return U"BRAHMI DIGIT NINE"; + /* CUNEIFORM: */ + case U'\U00012031': + return U"CUNEIFORM SIGN AN PLUS NAGA SQUARED"; + /* CUNEIFORM NUMBERS AND PUNCTUATION: */ + case U'\U0001242B': + return U"CUNEIFORM NUMERIC SIGN NINE SHAR2"; + /* EGYPTIAN HIEROGLYPHS: */ + case U'\U000130B8': + return U"EGYPTIAN HIEROGLYPH D052"; + /* COUNTING ROD NUMERALS: */ + case U'\U0001D372': + return U"IDEOGRAPHIC TALLY MARK ONE"; + case U'\U0001D373': + return U"IDEOGRAPHIC TALLY MARK TWO"; + case U'\U0001D374': + return U"IDEOGRAPHIC TALLY MARK THREE"; + case U'\U0001D375': + return U"IDEOGRAPHIC TALLY MARK FOUR"; + case U'\U0001D376': + return U"IDEOGRAPHIC TALLY MARK FIVE"; + case U'\U0001D377': + return U"TALLY MARK ONE"; + case U'\U0001D378': + return U"TALLY MARK FIVE"; + /* ENCLOSED ALPHANUMERIC SUPPLEMENT: */ + case U'\U0001F10D': + return U"CIRCLED ZERO WITH SLASH"; + case U'\U0001F10E': + return U"CIRCLED ANTICKLOCKWISE ARROW"; + case U'\U0001F10F': + return U"CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH"; + case U'\U0001F12F': + return U"COPYLEFT SYMBOL"; + case U'\U0001F16D': + return U"CIRCLED CC"; + case U'\U0001F16E': + return U"CIRCLED C WITH OVERLAID BACKSLASH"; + case U'\U0001F16F': + return U"CIRCLED HUMAN FIGURE"; + /* EMOTICONS: */ + case U'\U0001F600': + return U"GRINNING FACE"; + case U'\U0001F601': + return U"GRINNING FACE WITH SMIRKING EYES"; + case U'\U0001F602': + return U"FACE WITH TEARS OF JOY"; + case U'\U0001F603': + return U"SMILING FACE WITH OPEN MOUTH"; + case U'\U0001F604': + return U"SMILING FACE WITH OPEN MOUTH AND SMILING EYES"; + case U'\U0001F605': + return U"SMILING FACE WITH OPEN MOUTH AND COULD SWEAT"; + case U'\U0001F606': + return U"SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES"; + case U'\U0001F607': + return U"SMILING FACE WITH HALO"; + case U'\U0001F608': + return U"SMILING FACE WITH HORNS"; + case U'\U0001F609': + return U"WINKING FACE"; + case U'\U0001F60A': + return U"SMILING FACE WITH SMILING EYES"; + case U'\U0001F60B': + return U"FACE SAVOURING DELICIOUS FOOD"; + case U'\U0001F60C': + return U"RELIEVED FACE"; + case U'\U0001F60D': + return U"SMILLING FACE HEART-SHAPED EYES"; + case U'\U0001F60E': + return U"SMILLING FACE WITH SUNGLASSES"; + case U'\U0001F60F': + return U"SMIRKING FACE"; + case U'\U0001F610': + return U"NEUTRAL FACE"; + case U'\U0001F611': + return U"EXPRESSIONLESS FACE"; + case U'\U0001F612': + return U"UNAMUSED FACE"; + case U'\U0001F613': + return U"FACE WITH COLD SWEAT"; + case U'\U0001F614': + return U"PENSIVE FACE"; + case U'\U0001F615': + return U"CONFUSED FACE"; + case U'\U0001F616': + return U"CONFOUNDED FACE"; + case U'\U0001F617': + return U"KISSING FACE"; + case U'\U0001F618': + return U"FACE THROWING A KISS"; + case U'\U0001F619': + return U"KISSING FACE WITH SMILLING EYES"; + case U'\U0001F61A': + return U"KISSING FACE WITH CLOSED EYES"; + case U'\U0001F61B': + return U"FACE WITH STUCK-OUT TONGUE"; + case U'\U0001F61C': + return U"FACE WITH STUCK-OUT TONGUE AND WINKING EYE"; + case U'\U0001F61D': + return U"FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES"; + case U'\U0001F61E': + return U"DISSAPOINTED FACE"; + case U'\U0001F61F': + return U"WORRIED FACE"; + case U'\U0001F620': + return U"ANGRY FACE"; + case U'\U0001F621': + return U"POUTING FACE"; + case U'\U0001F622': + return U"CRYING FACE"; + case U'\U0001F623': + return U"PERSEVERING FACE"; + case U'\U0001F624': + return U"FACE WITH LOOK OF TRIUMPH"; + case U'\U0001F625': + return U"DISSAPOINTED BUT RELIEVED FACE"; + case U'\U0001F626': + return U"FROWNING FACE WITH OPEN MOUTH"; + case U'\U0001F627': + return U"ANGUISHED FACE"; + case U'\U0001F628': + return U"FEARFUL FACE"; + case U'\U0001F629': + return U"WEARY FACE"; + case U'\U0001F62A': + return U"SLEEPY FACE"; + case U'\U0001F62B': + return U"TIRED FACE"; + case U'\U0001F62C': + return U"GRIMACING FACE"; + case U'\U0001F62D': + return U"LOUDLY CRYING FACE"; + case U'\U0001F62E': + return U"FACE WITH OPEN MOUTH"; + case U'\U0001F62F': + return U"HUSHED FACE"; + case U'\U0001F630': + return U"FACE WITH OPEN MOUTH AND COLD SWEAT"; + case U'\U0001F631': + return U"FACE SCREAMING IN FEAR"; + case U'\U0001F632': + return U"ASTONISHED FACE"; + case U'\U0001F633': + return U"FLUSHED FACE"; + case U'\U0001F634': + return U"SLEEPING FACE"; + case U'\U0001F635': + return U"DIZZY FACE"; + case U'\U0001F636': + return U"FACE WITHOUT MOUTH"; + case U'\U0001F637': + return U"FACE WITH MEDICAL MASK"; + case U'\U0001F641': + return U"SLIGHTLY FROWNING FACE"; + case U'\U0001F642': + return U"SLIGHTLY SMILING FACE"; + case U'\U0001F643': + return U"UPSIDE-DOWN FACE"; + case U'\U0001F644': + return U"FACE WITH ROLLING EYES"; + /* ORNAMENTAL DINGBATS: */ + case U'\U0001F670': + return U"SCRIPT LIGATURE ET ORNAMENT"; + case U'\U0001F671': + return U"HEAVY SCRIPT LIGATURE ET ORNAMENT"; + case U'\U0001F672': + return U"LIGATURE OPEN ET ORNAMENT"; + case U'\U0001F673': + return U"HEAVY LIGATURE OPEN ET ORNAMENT"; + case U'\U0001F674': + return U"HEAVY AMPERSAND ORNAMENT"; + case U'\U0001F675': + return U"SWASH AMPERSAND ORNAMENT"; + case U'\U0001F676': + return U"SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT"; + case U'\U0001F677': + return U"SANS-SERIF HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT"; + case U'\U0001F678': + return U"SANS-SERIF HEAVY LOW DOUBLE QUOTATION MARK ORNAMENT"; + case U'\U0001F679': + return U"HEAVY INTERROBANG ORNAMENT"; + case U'\U0001F67A': + return U"SANS-SERIF INTERROBANG ORNAMENT"; + case U'\U0001F67B': + return U"HEAVY SANS-SERIF INTERROBANG ORNAMENT"; + case U'\U0001F67C': + return U"VERY HEAVY SOLIDUS"; + case U'\U0001F67D': + return U"VERY HEAVY REVERSE SOLIDUS"; + case U'\U0001F67E': + return U"CHECKER BOARD"; + case U'\U0001F67F': + return U"REVERSE CHECKER BOARD"; + /* CJK UNIFIED IDEOGRAPHS EXTENSION G: */ + case U'\U0003106C': + return U"CJK UNIFIED IDEOGRAPH-3106C"; + /* TAGS: */ + case U'\U000E0001': + return U"LANGUAGE TAG"; + case U'\U000E0020': + return U"TAG SPACE"; + case U'\U000E0021': + return U"TAG EXCLAMATION MARK"; + case U'\U000E0022': + return U"TAG QUOTATION MARK"; + case U'\U000E0023': + return U"TAG NUMBER SIGN"; + case U'\U000E0024': + return U"TAG DOLLAR SIGN"; + case U'\U000E0025': + return U"TAG PERCENT SIGN"; + case U'\U000E0026': + return U"TAG AMPERSAND"; + case U'\U000E0027': + return U"TAG APOSTROPHE"; + case U'\U000E0028': + return U"TAG LEFT PARANTHESIS"; + case U'\U000E0029': + return U"TAG RIGHT PARANTHESIS"; + case U'\U000E002A': + return U"TAG ASTERISK"; + case U'\U000E002B': + return U"TAG PLUS SIGN"; + case U'\U000E002C': + return U"TAG COMMA"; + case U'\U000E002D': + return U"TAG HYPHEN-MINUS"; + case U'\U000E002E': + return U"TAG FULL STOP"; + case U'\U000E002F': + return U"TAG SOLIDUS"; + case U'\U000E0030': + return U"TAG DIGIT ZERO"; + case U'\U000E0031': + return U"TAG DIGIT ONE"; + case U'\U000E0032': + return U"TAG DIGIT TWO"; + case U'\U000E0033': + return U"TAG DIGIT THREE"; + case U'\U000E0034': + return U"TAG DIGIT FOUR"; + case U'\U000E0035': + return U"TAG DIGIT FIVE"; + case U'\U000E0036': + return U"TAG DIGIT SIX"; + case U'\U000E0037': + return U"TAG DIGIT SEVEN"; + case U'\U000E0038': + return U"TAG DIGIT EIGHT"; + case U'\U000E0039': + return U"TAG DIGIT NINE"; + case U'\U000E003A': + return U"TAG COLON"; + case U'\U000E003B': + return U"TAG SEMICOLON"; + case U'\U000E003C': + return U"TAG LESS-THAN SIGN"; + case U'\U000E003D': + return U"TAG EQUALS SIGN"; + case U'\U000E003E': + return U"TAG GREATER-THAN SIGN"; + case U'\U000E003F': + return U"TAG QUESTION MARK"; + case U'\U000E0040': + return U"TAG COMMERCIAL AT"; + case U'\U000E0041': + return U"TAG LATIN CAPITAL LETTER A"; + case U'\U000E0042': + return U"TAG LATIN CAPITAL LETTER B"; + case U'\U000E0043': + return U"TAG LATIN CAPITAL LETTER C"; + case U'\U000E0044': + return U"TAG LATIN CAPITAL LETTER D"; + case U'\U000E0045': + return U"TAG LATIN CAPITAL LETTER E"; + case U'\U000E0046': + return U"TAG LATIN CAPITAL LETTER F"; + case U'\U000E0047': + return U"TAG LATIN CAPITAL LETTER G"; + case U'\U000E0048': + return U"TAG LATIN CAPITAL LETTER H"; + case U'\U000E0049': + return U"TAG LATIN CAPITAL LETTER I"; + case U'\U000E004A': + return U"TAG LATIN CAPITAL LETTER J"; + case U'\U000E004B': + return U"TAG LATIN CAPITAL LETTER K"; + case U'\U000E004C': + return U"TAG LATIN CAPITAL LETTER L"; + case U'\U000E004D': + return U"TAG LATIN CAPITAL LETTER M"; + case U'\U000E004E': + return U"TAG LATIN CAPITAL LETTER N"; + case U'\U000E004F': + return U"TAG LATIN CAPITAL LETTER O"; + case U'\U000E0050': + return U"TAG LATIN CAPITAL LETTER P"; + case U'\U000E0051': + return U"TAG LATIN CAPITAL LETTER Q"; + case U'\U000E0052': + return U"TAG LATIN CAPITAL LETTER R"; + case U'\U000E0053': + return U"TAG LATIN CAPITAL LETTER S"; + case U'\U000E0054': + return U"TAG LATIN CAPITAL LETTER T"; + case U'\U000E0055': + return U"TAG LATIN CAPITAL LETTER U"; + case U'\U000E0056': + return U"TAG LATIN CAPITAL LETTER V"; + case U'\U000E0057': + return U"TAG LATIN CAPITAL LETTER W"; + case U'\U000E0058': + return U"TAG LATIN CAPITAL LETTER X"; + case U'\U000E0059': + return U"TAG LATIN CAPITAL LETTER Y"; + case U'\U000E005A': + return U"TAG LATIN CAPITAL LETTER Z"; + case U'\U000E005B': + return U"TAG LEFT SQUARE BRACKET"; + case U'\U000E005C': + return U"TAG REVERSE SOLIDUS"; + case U'\U000E005D': + return U"TAG RIGHT SQUARE BRACKET"; + case U'\U000E005E': + return U"TAG CIRCUMFLEX ACCENT"; + case U'\U000E005F': + return U"TAG LOW LINE"; + case U'\U000E0060': + return U"TAG GRAVE ACCENT"; + case U'\U000E0061': + return U"TAG LATIN SMALL LETTER A"; + case U'\U000E0062': + return U"TAG LATIN SMALL LETTER B"; + case U'\U000E0063': + return U"TAG LATIN SMALL LETTER C"; + case U'\U000E0064': + return U"TAG LATIN SMALL LETTER D"; + case U'\U000E0065': + return U"TAG LATIN SMALL LETTER E"; + case U'\U000E0066': + return U"TAG LATIN SMALL LETTER F"; + case U'\U000E0067': + return U"TAG LATIN SMALL LETTER G"; + case U'\U000E0068': + return U"TAG LATIN SMALL LETTER H"; + case U'\U000E0069': + return U"TAG LATIN SMALL LETTER I"; + case U'\U000E006A': + return U"TAG LATIN SMALL LETTER J"; + case U'\U000E006B': + return U"TAG LATIN SMALL LETTER K"; + case U'\U000E006C': + return U"TAG LATIN SMALL LETTER L"; + case U'\U000E006D': + return U"TAG LATIN SMALL LETTER M"; + case U'\U000E006E': + return U"TAG LATIN SMALL LETTER N"; + case U'\U000E006F': + return U"TAG LATIN SMALL LETTER O"; + case U'\U000E0070': + return U"TAG LATIN SMALL LETTER P"; + case U'\U000E0071': + return U"TAG LATIN SMALL LETTER Q"; + case U'\U000E0072': + return U"TAG LATIN SMALL LETTER R"; + case U'\U000E0073': + return U"TAG LATIN SMALL LETTER S"; + case U'\U000E0074': + return U"TAG LATIN SMALL LETTER T"; + case U'\U000E0075': + return U"TAG LATIN SMALL LETTER U"; + case U'\U000E0076': + return U"TAG LATIN SMALL LETTER V"; + case U'\U000E0077': + return U"TAG LATIN SMALL LETTER W"; + case U'\U000E0078': + return U"TAG LATIN SMALL LETTER X"; + case U'\U000E0079': + return U"TAG LATIN SMALL LETTER Y"; + case U'\U000E007A': + return U"TAG LATIN SMALL LETTER Z"; + case U'\U000E007B': + return U"TAG LEFT CURLY BRACKET"; + case U'\U000E007C': + return U"TAG VERTICAL LINE"; + case U'\U000E007D': + return U"TAG RIGHT CURLY BRACKET"; + case U'\U000E007E': + return U"TAG TILDE"; + case U'\U000E007F': + return U"CANCEL TAG"; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf b/u8c/include/u8c/utf new file mode 100644 index 0000000..15bdc44 --- /dev/null +++ b/u8c/include/u8c/utf @@ -0,0 +1,51 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_xtmbVPu5vGoJz4tw) +#define u8c_key_xtmbVPu5vGoJz4tw + +#include <u8c/arr> + +namespace u8c { + template<u8c::utf T,u8c::utf T0> [[nodiscard]] constexpr auto cnv( T0 const * begin,T0 const * end) -> u8c::arr<T>; + [[nodiscard,u8c_attr_const]] constexpr auto isalnum( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto isalpha( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto iscntrl( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto isdigit( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto islower( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto ispunct( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto isspace( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto issurro( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto isupper( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto isxdigit(char32_t chr) -> bool; +} + +#include <u8c/utf.d/cnv> +#include <u8c/utf.d/isalnum> +#include <u8c/utf.d/isalpha> +#include <u8c/utf.d/iscntrl> +#include <u8c/utf.d/isdigit> +#include <u8c/utf.d/islower> +#include <u8c/utf.d/ispunct> +#include <u8c/utf.d/isspace> +#include <u8c/utf.d/issurro> +#include <u8c/utf.d/isupper> +#include <u8c/utf.d/isxdigit> + +#endif diff --git a/u8c/include/u8c/utf.d/cnv b/u8c/include/u8c/utf.d/cnv new file mode 100644 index 0000000..95b66e9 --- /dev/null +++ b/u8c/include/u8c/utf.d/cnv @@ -0,0 +1,116 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_nVkgRbXZfcq3BG8J) +#define u8c_key_nVkgRbXZfcq3BG8J + +#include <algorithm> /* std::copy */ +#include <cstddef> /* u8c::size */ +#include <stdexcept> /* std::invalid_argument, std::out_of_range */ +#include <type_traits> /* std::is_same_v */ +#include <vector> /* std::vector */ + +template<u8c::utf T,u8c::utf T0> constexpr auto u8c::cnv(T0 const * const u8c_restr _begin,T0 const * const u8c_restr _end) -> u8c::arr<T> { + if (_begin == nullptr || _end == nullptr) [[unlikely]] { + throw std::invalid_argument("Null pointer provided as parameter."); + } + u8c::arr<T0> in(_begin,_end); + u8c::arr<T> out; + if constexpr (std::is_same_v<T0,T>) { + out.alloc(static_cast<u8c::size>(_end - _begin)); + std::copy(_begin,_end,out.begin()); + return out; + } + else { + if constexpr (std::is_same_v<T0,char16_t>) { + } + else if constexpr (std::is_same_v<T0,char32_t>) { + for (auto const tmp : in) { + if constexpr (std::is_same_v<T,char16_t>) { + if (tmp >= u8c_uint32c(0x10000)) { /* Two hextets. */ + char16_t const tmp0 = tmp - u8c_uint16c(0x10000); + out.app((tmp0 / u8c_uint16c(0x400) + u8c_uint16c(0xD800))); + out.app((tmp0 % u8c_uint16c(0x400) + u8c_uint16c(0xDC00))); + } + else { + /* One hextet. */ + out.app((static_cast<char16_t>(tmp))); + } + } + else { + if (tmp >= u8c_uint32c(0x10000)) { /* Four octets. */ + out.app((u8c_ubytec(0b11110000) + static_cast<char8_t>(tmp >> u8c_uint32c(0x12) & u8c_uint32c(0b00000111)))); + out.app((u8c_ubytec(0b10000000) + static_cast<char8_t>(tmp >> u8c_uint32c(0xC) & u8c_uint32c(0b00111111)))); + out.app((u8c_ubytec(0b10000000) + static_cast<char8_t>(tmp >> u8c_uint32c(0x6) & u8c_uint32c(0b00111111)))); + out.app((u8c_ubytec(0b10000000) + static_cast<char8_t>(tmp & u8c_uint32c(0b00111111)))); + } + else if (tmp >= U'\u0800') { /* Three octets. */ + out.app((u8c_ubytec(0xE0) + static_cast<char8_t>(tmp >> u8c_uint32c(0xC) & u8c_uint32c(0b00001111)))); + out.app((u8c_ubytec(0x80) + static_cast<char8_t>(tmp >> u8c_uint32c(0x6) & u8c_uint32c(0b00111111)))); + out.app((u8c_ubytec(0x80) + static_cast<char8_t>(tmp & u8c_uint32c(0b00111111)))); + } + else if (tmp >= U'\u0080') { /* Two octets. */ + out.app((u8c_ubytec(0xC0) + static_cast<char8_t>(tmp >> u8c_uint32c(0x6) & u8c_uint32c(0b00111111)))); + out.app((u8c_ubytec(0x80) + static_cast<char8_t>(tmp & u8c_uint32c(0b00111111)))); + } + else { + /* One octet. */ + out.app(static_cast<char8_t>(tmp)); + } + } + } + return out; + } + else { + if constexpr (std::is_same_v<T,char16_t>) { + } + else { + for (u8c::size n = 0x0uz;n < in.sz();n += 0x1uz) { + auto const tmp = in[n]; + auto chr = U'\u0000'; + if (tmp >= u8c_ubytec(0b11110000)) { /* Four octets. */ + chr = (tmp ^ u8c_uint32c(0b11110000)) << u8c_uint32c(0x12); + chr += (in[n + 0x1uz] ^ u8c_uint32c(0b10000000)) << u8c_uint32c(0xC); + chr += (in[n + 0x2uz] ^ u8c_uint32c(0b10000000)) << u8c_uint32c(0x6); + chr += in[n + 0x3uz] ^ u8c_uint32c(0b10000000); + n += 0x3uz; + } + else if (tmp >= u8c_ubytec(0b11100000)) { /* Three octets. */ + chr = (tmp ^ u8c_uint32c(0b11100000)) << u8c_uint32c(0xC); + chr += (in[n + 0x1uz] ^ u8c_uint32c(0b10000000)) << u8c_uint32c(0x6); + chr += in[n + 0x2uz] ^ u8c_uint32c(0b10000000); + n += 0x2uz; + } + else if (tmp >= u8c_ubytec(0b11000000)) { /* Two octets. */ + chr = (tmp ^ u8c_uint32c(0b11000000)) << u8c_uint32c(0x6); + chr += in[n + 0x1uz] ^ u8c_uint32c(0b10000000); + n += 0x1uz; + } + else { + /* One octet. */ + chr = tmp; + } + out.app(chr); + } + } + } + } +} + +#endif diff --git a/src/u8c/chk.h.d/isalpha.c b/u8c/include/u8c/utf.d/isalnum index 2631095..0960e57 100644 --- a/src/u8c/chk.h.d/isalpha.c +++ b/u8c/include/u8c/utf.d/isalnum @@ -3,107 +3,188 @@ This file is part of u8c. - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. - See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ - You should have received a copy of the GNU Affero General Public License along with u8c. +#if !defined(u8c_key_C8fUI0HFQi6fZDUx) +#define u8c_key_C8fUI0HFQi6fZDUx - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/chk.h> -struct u8c_isalpha_tuple u8c_isalpha(char32_t const _chr) { - struct u8c_isalpha_tuple ret = { - .stat = false, - }; - ret.res = u8c_islower(_chr).res || u8c_isupper(_chr).res; - if(ret.res) { - return ret; +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isalpha(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + if(u8c::islower(_chr) || u8c::isupper(_chr)) [[unlikely]] { + return true; } switch(_chr) { + [[likely]] default: + return false; case U'\u0297': /* LATIN LETTER GLOTTAL STOP */ + [[fallthrough]]; case U'\u16A0': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16A1': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16A2': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16A3': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16A4': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16A5': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16A6': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16A7': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16A8': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16A9': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16AA': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16AB': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16AC': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16AD': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16AE': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16AF': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16B0': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16B1': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16B2': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16B3': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16B4': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16B5': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16B6': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16B7': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16B8': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16B9': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16BA': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16BB': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16BC': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16BD': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16BE': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16BF': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16C0': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16C1': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16C2': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16C3': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16C4': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16C5': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16C6': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16C7': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16C8': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16C9': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16CA': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16CB': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16CC': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16CD': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16CE': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16CF': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16D0': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16D1': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16D2': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16D3': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16D4': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16D5': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16D6': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16D7': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16D8': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16D9': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16DA': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16DB': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16DC': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16DD': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16DE': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16DF': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16E0': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16E1': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16E2': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16E3': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16E4': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16E5': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16E6': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16E7': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16E8': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16E9': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; case U'\u16EA': /* RUNIC LETTER FEHU FEOH FE F */ - ret.res = true; - break; + return true; } - return ret; } + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isalpha b/u8c/include/u8c/utf.d/isalpha new file mode 100644 index 0000000..3a0bb9d --- /dev/null +++ b/u8c/include/u8c/utf.d/isalpha @@ -0,0 +1,29 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_YnTiCcefC1wLH21w) +#define u8c_key_YnTiCcefC1wLH21w + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isalnum(char32_t const _chr) -> bool { + return u8c::isalpha(_chr) || u8c::isdigit(_chr); +} + +#endif
\ No newline at end of file diff --git a/src/u8c/chk.h.d/iscntrl.c b/u8c/include/u8c/utf.d/iscntrl index c8532fc..083bf52 100644 --- a/src/u8c/chk.h.d/iscntrl.c +++ b/u8c/include/u8c/utf.d/iscntrl @@ -1,104 +1,180 @@ +// --C++-- /* Copyright 2021 Gabriel Jensen This file is part of u8c. - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. - See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ - You should have received a copy of the GNU Affero General Public License along with u8c. +#if !defined(u8c_key_KkxufRi4dPQDAbxV) +#define u8c_key_KkxufRi4dPQDAbxV - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/chk.h> -struct u8c_iscntrl_tuple u8c_iscntrl(char32_t const _chr) { - struct u8c_iscntrl_tuple ret = { - .stat = false, - }; +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::iscntrl(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } switch(_chr) { - default: - ret.res = false; - break; - case U'\x0': /* NULL */ - case U'\x1': /* START OF HEADING */ - case U'\x2': /* START OF TEXT */ - case U'\x3': /* END OF TEXT */ - case U'\x4': /* END OF TRANSMISSION */ - case U'\x5': /* ENQUIRY */ - case U'\x6': /* ACKNOWLEDGE */ + [[likely]] default: + return false; + case U'\u0000': /* NULL */ + [[fallthrough]]; + case U'\u0001': /* START OF HEADING */ + [[fallthrough]]; + case U'\u0002': /* START OF TEXT */ + [[fallthrough]]; + case U'\u0003': /* END OF TEXT */ + [[fallthrough]]; + case U'\u0004': /* END OF TRANSMISSION */ + [[fallthrough]]; + case U'\u0005': /* ENQUIRY */ + [[fallthrough]]; + case U'\u0006': /* ACKNOWLEDGE */ + [[fallthrough]]; case U'\a': /* BELL */ + [[fallthrough]]; case U'\b': /* BACKSPACE */ + [[fallthrough]]; case U'\t': /* HORIZONTAL TABULATION */ + [[fallthrough]]; case U'\n': /* NEW LINE */ + [[fallthrough]]; case U'\v': /* VERTICAL TABULATION */ + [[fallthrough]]; case U'\f': /* FORM FEED */ + [[fallthrough]]; case U'\r': /* CARRIAGE RETURN */ - case U'\xE': /* SHIFT OUT */ - case U'\xF': /* SHIFT IN */ + [[fallthrough]]; + case U'\u000E': /* SHIFT OUT */ + [[fallthrough]]; + case U'\u000F': /* SHIFT IN */ + [[fallthrough]]; case U'\x10': /* DATA LINK ESCAPE */ + [[fallthrough]]; case U'\x11': /* DEVICE CONTROL ONE */ + [[fallthrough]]; case U'\x12': /* DEVICE CONTROL TWO */ + [[fallthrough]]; case U'\x13': /* DEVICE CONTROL THREE */ + [[fallthrough]]; case U'\x14': /* DEVICE CONTROL FOUR */ + [[fallthrough]]; case U'\x15': /* NEGATIVE ACKNOWLEDGE */ + [[fallthrough]]; case U'\x16': /* SYNCHRONOUS IDLE */ + [[fallthrough]]; case U'\x17': /* END OF TRANSMISSION BLOCK */ + [[fallthrough]]; case U'\x18': /* CANCEL */ + [[fallthrough]]; case U'\x19': /* END OF MEDIUM */ + [[fallthrough]]; case U'\x1A': /* SUBSTITUTE */ - case U'\x1B': /* ESCAPE */ + [[fallthrough]]; + case U'\u001B': /* ESCAPE */ + [[fallthrough]]; case U'\x1C': /* FILE SEPERATOR */ + [[fallthrough]]; case U'\x1D': /* GROUP SEPERATOR */ + [[fallthrough]]; case U'\x1E': /* RECORD SEPERATOR */ + [[fallthrough]]; case U'\x1F': /* UNIT SEPERATOR */ + [[fallthrough]]; case U'\x7F': /* DELETE */ + [[fallthrough]]; case U'\x80': /* <CONTROL> */ + [[fallthrough]]; case U'\x81': /* <CONTROL */ + [[fallthrough]]; case U'\x82': /* BREAK PERMITTED HERE */ + [[fallthrough]]; case U'\x83': /* NO BREAK HERE */ - case U'\x84': /* <CONTROL> */ + [[fallthrough]]; + case U'\x84': /* INDEX */ + [[fallthrough]]; case U'\x85': /* NEXT LINE */ + [[fallthrough]]; case U'\x86': /* START OF SELECTED AREA */ + [[fallthrough]]; case U'\x87': /* END OF SELECTED AREA */ + [[fallthrough]]; case U'\x88': /* CHARACTER TABULATION SET */ + [[fallthrough]]; case U'\x89': /* CHARACTER TABULATION SET WITH JUSTIFICATION */ + [[fallthrough]]; case U'\x8A': /* LINE TABULATION SET */ + [[fallthrough]]; case U'\x8B': /* PARTIAL LINE FORWARD */ + [[fallthrough]]; case U'\x8C': /* PARTIAL LINE BACKWARD */ + [[fallthrough]]; case U'\x8D': /* REVERSE LINE FEED */ + [[fallthrough]]; case U'\x8E': /* SINGLE SHIFT TWO */ + [[fallthrough]]; case U'\x8F': /* SINGLE SHIFT THREE */ + [[fallthrough]]; case U'\x90': /* DEVICE CONTROL STRING */ + [[fallthrough]]; case U'\x91': /* PRIVATE USE ONE */ + [[fallthrough]]; case U'\x92': /* PRIVATE USE TWO */ + [[fallthrough]]; case U'\x93': /* SET TRANSMIT STATE */ + [[fallthrough]]; case U'\x94': /* CANCEL CHARACTER */ + [[fallthrough]]; case U'\x95': /* MESSAGE WAITING */ + [[fallthrough]]; case U'\x96': /* START OF GUARDED AREA */ + [[fallthrough]]; case U'\x97': /* END OF GUARDED AREA */ + [[fallthrough]]; case U'\x98': /* START OF STRING */ + [[fallthrough]]; case U'\x99': /* <CONTROL> */ + [[fallthrough]]; case U'\x9A': /* SINGLE CHARACTER INTRODUCER */ + [[fallthrough]]; case U'\x9B': /* CONTROL SEQUENCE INTRODUCER */ + [[fallthrough]]; case U'\x9C': /* STRING TERMINATOR */ + [[fallthrough]]; case U'\x9D': /* OPERATING SYSTEM COMMAND */ + [[fallthrough]]; case U'\x9E': /* PRIVACY MESSAGE */ + [[fallthrough]]; case U'\x9F': /* APPLICATION PROGRAM COMMAND */ + [[fallthrough]]; case U'\xA0': /* NO-BREAK SPACE */ + [[fallthrough]]; case U'\u2028': /* LINE SEPERATOR */ + [[fallthrough]]; case U'\u2029': /* PARAGRAPH SEPERATOR */ + [[fallthrough]]; case U'\u202D': /* LEFT-TO-RIGHT OVERRIDE */ + [[fallthrough]]; case U'\u202E': /* RIGHT-TO-LEFT OVERRIDE */ + [[fallthrough]]; case U'\u2068': /* FIRST STRONG ISOLATE */ + [[fallthrough]]; case U'\u2069': /* POP DIRECTIONAL ISOLATE */ - ret.res = true; - break; + return true; } - return ret; } + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isdigit b/u8c/include/u8c/utf.d/isdigit new file mode 100644 index 0000000..84179e6 --- /dev/null +++ b/u8c/include/u8c/utf.d/isdigit @@ -0,0 +1,59 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_8r8RPCDLujofbg3k) +#define u8c_key_8r8RPCDLujofbg3k + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isdigit(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + case U'\u0030': /* DIGIT ZERO */ + [[fallthrough]]; + case U'\u0031': /* DIGIT ONE */ + [[fallthrough]]; + case U'\u0032': /* DIGIT TWO */ + [[fallthrough]]; + case U'\u0033': /* DIGIT THREE */ + [[fallthrough]]; + case U'\u0034': /* DIGIT FOUR */ + [[fallthrough]]; + case U'\u0035': /* DIGIT FIVE */ + [[fallthrough]]; + case U'\u0036': /* DIGIT SIX */ + [[fallthrough]]; + case U'\u0037': /* DIGIT SEVEN */ + [[fallthrough]]; + case U'\u0038': /* DIGIT EIGHT */ + [[fallthrough]]; + case U'\u0039': /* DIGIT NINE */ + [[fallthrough]]; + case U'\u218A': /* TURNED DIGIT TWO */ + [[fallthrough]]; + case U'\u218B': /* TURNED DIGIT THREE */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/src/u8c/chk.h.d/islower.c b/u8c/include/u8c/utf.d/islower index 0cb3ea1..9fd40e6 100644 --- a/src/u8c/chk.h.d/islower.c +++ b/u8c/include/u8c/utf.d/islower @@ -3,185 +3,343 @@ This file is part of u8c. - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. - See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ - You should have received a copy of the GNU Affero General Public License along with u8c. +#if !defined(u8c_key_AtxlGqMDj7uXYyKb) +#define u8c_key_AtxlGqMDj7uXYyKb - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/chk.h> -struct u8c_islower_tuple u8c_islower(char32_t const _chr) { - struct u8c_islower_tuple ret = { - .stat = false, - }; +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::islower(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } switch(_chr) { - default: - ret.res = false; - break; - case U'a': /* LATIN SMALL LETTER A */ - case U'b': /* LATIN SMALL LETTER B */ - case U'c': /* LATIN SMALL LETTER C */ - case U'd': /* LATIN SMALL LETTER D */ - case U'e': /* LATIN SMALL LETTER E */ - case U'f': /* LATIN SMALL LETTER F */ - case U'g': /* LATIN SMALL LETTER G */ - case U'h': /* LATIN SMALL LETTER H */ - case U'i': /* LATIN SMALL LETTER I */ - case U'j': /* LATIN SMALL LETTER J */ - case U'k': /* LATIN SMALL LETTER K */ - case U'l': /* LATIN SMALL LETTER L */ - case U'm': /* LATIN SMALL LETTER M */ - case U'n': /* LATIN SMALL LETTER N */ - case U'o': /* LATIN SMALL LETTER O */ - case U'p': /* LATIN SMALL LETTER P */ - case U'q': /* LATIN SMALL LETTER Q */ - case U'r': /* LATIN SMALL LETTER R */ - case U's': /* LATIN SMALL LETTER S */ - case U't': /* LATIN SMALL LETTER T */ - case U'u': /* LATIN SMALL LETTER U */ - case U'v': /* LATIN SMALL LETTER V */ - case U'w': /* LATIN SMALL LETTER W */ - case U'x': /* LATIN SMALL LETTER X */ - case U'y': /* LATIN SMALL LETTER Y */ - case U'z': /* LATIN SMALL LETTER Z */ + [[likely]] default: + return false; + case U'\u0061': /* LATIN SMALL LETTER A */ + [[fallthrough]]; + case U'\u0062': /* LATIN SMALL LETTER B */ + [[fallthrough]]; + case U'\u0063': /* LATIN SMALL LETTER C */ + [[fallthrough]]; + case U'\u0064': /* LATIN SMALL LETTER D */ + [[fallthrough]]; + case U'\u0065': /* LATIN SMALL LETTER E */ + [[fallthrough]]; + case U'\u0066': /* LATIN SMALL LETTER F */ + [[fallthrough]]; + case U'\u0067': /* LATIN SMALL LETTER G */ + [[fallthrough]]; + case U'\u0068': /* LATIN SMALL LETTER H */ + [[fallthrough]]; + case U'\u0069': /* LATIN SMALL LETTER I */ + [[fallthrough]]; + case U'\u006A': /* LATIN SMALL LETTER J */ + [[fallthrough]]; + case U'\u006B': /* LATIN SMALL LETTER K */ + [[fallthrough]]; + case U'\u006C': /* LATIN SMALL LETTER L */ + [[fallthrough]]; + case U'\u006D': /* LATIN SMALL LETTER M */ + [[fallthrough]]; + case U'\u006E': /* LATIN SMALL LETTER N */ + [[fallthrough]]; + case U'\u006F': /* LATIN SMALL LETTER O */ + [[fallthrough]]; + case U'\u0070': /* LATIN SMALL LETTER P */ + [[fallthrough]]; + case U'\u0071': /* LATIN SMALL LETTER Q */ + [[fallthrough]]; + case U'\u0072': /* LATIN SMALL LETTER R */ + [[fallthrough]]; + case U'\u0073': /* LATIN SMALL LETTER S */ + [[fallthrough]]; + case U'\u0074': /* LATIN SMALL LETTER T */ + [[fallthrough]]; + case U'\u0075': /* LATIN SMALL LETTER U */ + [[fallthrough]]; + case U'\u0076': /* LATIN SMALL LETTER V */ + [[fallthrough]]; + case U'\u0077': /* LATIN SMALL LETTER W */ + [[fallthrough]]; + case U'\u0078': /* LATIN SMALL LETTER X */ + [[fallthrough]]; + case U'\u0079': /* LATIN SMALL LETTER Y */ + [[fallthrough]]; + case U'\u007A': /* LATIN SMALL LETTER Z */ + [[fallthrough]]; case U'\u00DF': /* LATIN SMALL LETTER SHARP S */ + [[fallthrough]]; case U'\u00E0': /* LATIN SMALL LETTER A WITH GRAVE */ + [[fallthrough]]; case U'\u00E1': /* LATIN SMALL LETTER A WITH ACUTE */ + [[fallthrough]]; case U'\u00E2': /* LATIN SMALL LETTER A WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u00E3': /* LATIN SMALL LETTER A WITH TILDE */ + [[fallthrough]]; case U'\u00E4': /* LATIN SMALL LETTER A WITH DIAERESIS */ + [[fallthrough]]; case U'\u00E5': /* LATIN SMALL LETTER A WITH RING ABOVE */ + [[fallthrough]]; case U'\u00E6': /* LATIN SMALL LETTER AE */ + [[fallthrough]]; case U'\u00E7': /* LATIN SMALL LETTER C WITH CEDILLA */ + [[fallthrough]]; case U'\u00E8': /* LATIN SMALL LETTER E WITH GRAVE */ + [[fallthrough]]; case U'\u00E9': /* LATIN SMALL LETTER E WITH ACUTE */ + [[fallthrough]]; case U'\u00EA': /* LATIN SMALL LETTER E WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u00EB': /* LATIN SMALL LETTER E WITH DIAERESIS */ + [[fallthrough]]; case U'\u00EC': /* LATIN SMALL LETTER I WITH GRAVE */ + [[fallthrough]]; case U'\u00ED': /* LATIN SMALL LETTER I WITH ACUTE */ + [[fallthrough]]; case U'\u00EE': /* LATIN SMALL LETTER I WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u00EF': /* LATIN SMALL LETTER I WITH DIAERESIS */ + [[fallthrough]]; case U'\u00F0': /* LATIN SMALL LETTER ETH */ + [[fallthrough]]; case U'\u00F1': /* LATIN SMALL LETTER N WITH TILDE */ + [[fallthrough]]; case U'\u00F2': /* LATIN SMALL LETTER O WITH GRAVE */ + [[fallthrough]]; case U'\u00F3': /* LATIN SMALL LETTER O WITH ACUTE */ + [[fallthrough]]; case U'\u00F4': /* LATIN SMALL LETTER O WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u00F5': /* LATIN SMALL LETTER O WITH TILDE */ + [[fallthrough]]; case U'\u00F6': /* LATIN SMALL LETTER O WITH DIAERESIS */ + [[fallthrough]]; case U'\u00F8': /* LATIN SMALL LETTER O WITH STROKE */ + [[fallthrough]]; case U'\u00F9': /* LATIN SMALL LETTER U WITH GRAVE */ + [[fallthrough]]; case U'\u00FA': /* LATIN SMALL LETTER U WITH ACUTE */ + [[fallthrough]]; case U'\u00FB': /* LATIN SMALL LETTER U WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u00FC': /* U WITH TWO DOTS */ + [[fallthrough]]; case U'\u00FD': /* LATIN SMALL LETTER Y WITH ACUTE */ + [[fallthrough]]; case U'\u00FE': /* LATIN SMALL LETTER THORN */ + [[fallthrough]]; case U'\u00FF': /* LATIN SMALL LETTER Y WITH DIAERESIS */ + [[fallthrough]]; case U'\u0105': /* LATIN SMALL LETTER A WITH OGONEK */ + [[fallthrough]]; case U'\u0107': /* LATIN SMALL LETTER C WITH ACUTE */ + [[fallthrough]]; case U'\u010D': /* LATIN SMALL LETTER C WITH CARON */ + [[fallthrough]]; case U'\u010F': /* LATIN SMALL LETTER D WITH CARON */ + [[fallthrough]]; case U'\u0119': /* LATIN SMALL LETTER E WITH OGONEK */ + [[fallthrough]]; case U'\u011B': /* LATIN SMALL LETTER E WITH CARON */ + [[fallthrough]]; case U'\u011F': /* LATIN SMALL LETTER G WITH BREVE */ + [[fallthrough]]; case U'\u0131': /* LATIN SMALL LETTER DOTLESS I */ - case U'\u0133': /* LATIN SMALL LIGATURE LJ */ + [[fallthrough]]; + case U'\u0133': /* LATIN SMALL LIGATURE IJ */ + [[fallthrough]]; case U'\u0138': /* LATIN SMALL LETTER KRA */ + [[fallthrough]]; case U'\u0142': /* LATIN SMALL LETTER L WITH STROKE */ + [[fallthrough]]; case U'\u0144': /* LATIN SMALL LETTER N WITH ACUTE */ + [[fallthrough]]; case U'\u0148': /* LATIN SMALL LETTER N WITH CARON */ + [[fallthrough]]; case U'\u014B': /* LATIN SMALL LETTER ENG */ + [[fallthrough]]; case U'\u0153': /* LATIN SMALL LIGATURE OE */ + [[fallthrough]]; case U'\u0159': /* LATIN SMALL LETTER R WITH CARON */ + [[fallthrough]]; case U'\u015B': /* LATIN SMALL LETTER S WITH ACUTE */ + [[fallthrough]]; case U'\u015F': /* LATIN SMALL LETTER S WITH CEDILLA */ + [[fallthrough]]; case U'\u0161': /* LATIN SMALL LETTER S WITH CARON */ + [[fallthrough]]; case U'\u0165': /* LATIN SMALL LETTER T WITH CARON */ + [[fallthrough]]; case U'\u016F': /* LATIN SMALL LETTER U WITH RING ABOVE */ + [[fallthrough]]; case U'\u017A': /* LATIN SMALL LETTER Z WITH ACUTE */ + [[fallthrough]]; case U'\u017C': /* LATIN SMALL LETTER Z WITH DOT ABOVE */ + [[fallthrough]]; case U'\u017E': /* LATIN SMALL LETTER Z WITH CARON */ + [[fallthrough]]; case U'\u01BF': /* LATIN LETTER WYNN */ + [[fallthrough]]; case U'\u01DD': /* LATIN SMALL LETTER TURNED E */ + [[fallthrough]]; case U'\u021D': /* LATIN SMALL LETTER YOGH */ + [[fallthrough]]; case U'\u0242': /* LATIN SMALL LETTER GLOTTAL STOP */ + [[fallthrough]]; case U'\u0250': /* LATIN SMALL LETTER TURNED A */ + [[fallthrough]]; case U'\u0251': /* LATIN SMALL LETTER ALPHA */ + [[fallthrough]]; case U'\u0252': /* LATIN SMALL LETTER TURNED ALPHA */ + [[fallthrough]]; case U'\u0253': /* LATIN SMALL LETTER B WITH HOOk */ + [[fallthrough]]; case U'\u0254': /* LATIN SMALL LETTER OPEN O */ + [[fallthrough]]; case U'\u0255': /* LATIN SMALL LETTER C WITH CURL */ + [[fallthrough]]; case U'\u0256': /* LATIN SMALL LETTER D WITH TAIL */ + [[fallthrough]]; case U'\u0257': /* LATIN SMALL LETTER D WITH HOOk */ + [[fallthrough]]; case U'\u0258': /* LATIN SMALL LETTER REVERSED E */ + [[fallthrough]]; case U'\u0259': /* LATIN SMALL LETTER SCHWA */ + [[fallthrough]]; case U'\u025A': /* LATIN SMALL LETTER SCHWA WITH HOOK */ + [[fallthrough]]; case U'\u025B': /* LATIN SMALL LETTER OPEN E */ + [[fallthrough]]; case U'\u025C': /* LATIN SMALL LETTER REVERSED OPEN E */ + [[fallthrough]]; case U'\u025D': /* LATIN SMALL LETTER REVERSED OPEN E WITH HOOK */ + [[fallthrough]]; case U'\u025E': /* LATIN SMALL LETTER CLOSED REVERSED OPEN E */ + [[fallthrough]]; case U'\u025F': /* LATIN SMALL LETTER DOTLESS J WITH STROKE */ + [[fallthrough]]; case U'\u0260': /* LATIN SMALL LETTER G WITH HOOK */ + [[fallthrough]]; case U'\u0261': /* LATIN SMALL LETTER SCRIPT G */ + [[fallthrough]]; case U'\u0262': /* LATIN LETTER SMALL CAPITAL G */ + [[fallthrough]]; case U'\u0263': /* LATIN SMALL LETTER GAMMA */ + [[fallthrough]]; case U'\u0264': /* LATIN SMALL LETTER RAMS HORN */ + [[fallthrough]]; case U'\u0265': /* LATIN SMALL LETTER TURNED H */ + [[fallthrough]]; case U'\u0266': /* LATIN SMALL LETTER H WITH HOOK */ + [[fallthrough]]; case U'\u0267': /* LATIN SMALL LETTER HENG WITH HOOK */ + [[fallthrough]]; case U'\u0268': /* LATIN SMALL LETTER I WITH STROKE */ + [[fallthrough]]; case U'\u0269': /* LATIN SMALL LETTER IOTA */ + [[fallthrough]]; case U'\u026A': /* LATIN LETTER SMALL CAPITAL I */ + [[fallthrough]]; case U'\u026B': /* LATIN SMALL LETTER L WITH MIDDLE TILDE */ + [[fallthrough]]; case U'\u026C': /* LATIN SMALL LETTER L WITH BELT */ + [[fallthrough]]; case U'\u026D': /* LATIN SMALL LETTER L WITH RETROFLEX HOOK */ + [[fallthrough]]; case U'\u026E': /* LATIN SMALL LETTER LEZH */ + [[fallthrough]]; case U'\u026F': /* LATIN SMALL LETTER TURNED M */ + [[fallthrough]]; case U'\u0270': /* LATIN SMALL LETTER TURNED M WITH LONG LEG */ + [[fallthrough]]; case U'\u0271': /* LATIN SMALL LETTER M WITH HOOK */ + [[fallthrough]]; case U'\u0272': /* LATIN SMALL LETTER N WITH LEFT HOOK */ + [[fallthrough]]; case U'\u0273': /* LATIN SMALL LETTER N WITH RETROFLEX HOOK */ + [[fallthrough]]; case U'\u0283': /* LATIN SMALL LETTER ESH */ + [[fallthrough]]; case U'\u028A': /* LATIN SMALL LETTER UPSILON */ + [[fallthrough]]; case U'\u028B': /* LATIN SMALL LETTER V WITH HOOK */ + [[fallthrough]]; case U'\u0292': /* LATIN SMALL LETTER EZH */ + [[fallthrough]]; case U'\u0294': /* LATIN SMALL LETTER GLOTTAL STOP */ + [[fallthrough]]; case U'\u03B1': /* GREEK SMALL LETTER ALPHA */ + [[fallthrough]]; case U'\u03B2': /* GREEK SMALL LETTER BETA */ + [[fallthrough]]; case U'\u03B3': /* GREEK SMALL LETTER GAMMA */ + [[fallthrough]]; case U'\u03B4': /* GREEK SMALL LETTER DELTA */ + [[fallthrough]]; case U'\u03B5': /* GREEK SMALL LETTER EPSILON */ + [[fallthrough]]; case U'\u03B6': /* GREEK SMALL LETTER ZETA */ + [[fallthrough]]; case U'\u03B7': /* GREEK SMALL LETTER ETA */ + [[fallthrough]]; case U'\u03B8': /* GREEK SMALL LETTER THETA */ + [[fallthrough]]; case U'\u03B9': /* GREEK SMALL LETTER IOTA */ + [[fallthrough]]; case U'\u03BA': /* GREEK SMALL LETTER KAPPA */ + [[fallthrough]]; case U'\u03BB': /* GREEK SMALL LETTER LAMBDA */ + [[fallthrough]]; case U'\u03BC': /* GREEK SMALL LETTER MU */ + [[fallthrough]]; case U'\u03BD': /* GREEK SMALL LETTER NU */ + [[fallthrough]]; case U'\u03BE': /* GREEK SMALL LETTER XI */ + [[fallthrough]]; case U'\u03BF': /* GREEK SMALL LETTER OMICRON */ + [[fallthrough]]; case U'\u03C0': /* GREEK SMALL LETTER PI */ + [[fallthrough]]; case U'\u03C1': /* GREEK SMALL LETTER RHO */ + [[fallthrough]]; case U'\u03C2': /* GREEK SMALL LETTER FINAL SIGMA */ + [[fallthrough]]; case U'\u03C3': /* GREEK SMALL LETTER SIGMA */ + [[fallthrough]]; case U'\u03C4': /* GREEK SMALL LETTER TAU */ + [[fallthrough]]; case U'\u03C5': /* GREEK SMALL LETTER UPSILON */ + [[fallthrough]]; case U'\u03C6': /* GREEK SMALL LETTER PHI */ + [[fallthrough]]; case U'\u03C7': /* GREEK SMALL LETTER CHI */ + [[fallthrough]]; case U'\u03C8': /* GREEK SMALL LETTER PSI */ + [[fallthrough]]; case U'\u03C9': /* GREEK SMALL LETTER OMEGA */ + [[fallthrough]]; case U'\u1D79': /* LATIN SMALL LETTER INSULAR G */ + [[fallthrough]]; case U'\uA7B7': /* LATIN SMALL LETTER OMEGA */ + [[fallthrough]]; case U'\uFB00': /* LATIN SMALL LIGATURE FF */ - ret.res = true; - break; + return true; } - return ret; } + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/ispunct b/u8c/include/u8c/utf.d/ispunct new file mode 100644 index 0000000..f82f11e --- /dev/null +++ b/u8c/include/u8c/utf.d/ispunct @@ -0,0 +1,329 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_tmcwLOAAPKkIUthE) +#define u8c_key_tmcwLOAAPKkIUthE + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::ispunct(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + case U'\u0021': /* EXCLAMATION MARK */ + [[fallthrough]]; + case U'\u0022': /* QUOTATION MARK */ + [[fallthrough]]; + case U'\u0023': /* NUMBER SIGN */ + [[fallthrough]]; + case U'\u0024': /* DOLLAR SIGN */ + [[fallthrough]]; + case U'\u0025': /* PERCENT SIGN */ + [[fallthrough]]; + case U'\u0026': /* AMPERSAND */ + [[fallthrough]]; + case U'\u0027': /* APOSTROPHE */ + [[fallthrough]]; + case U'\u0028': /* LEFT PARANTHESIS */ + [[fallthrough]]; + case U'\u0029': /* RIGHT PARANTHESIS */ + [[fallthrough]]; + case U'\u002A': /* ASTERISK */ + [[fallthrough]]; + case U'\u002B': /* PLUS SIGN */ + [[fallthrough]]; + case U'\u002C': /* COMMA */ + [[fallthrough]]; + case U'\u002D': /* HYPHEN-MINUS */ + [[fallthrough]]; + case U'\u002E': /* FULL STOP */ + [[fallthrough]]; + case U'\u002F': /* SOLIDUS */ + [[fallthrough]]; + case U'\u003A': /* COLON */ + [[fallthrough]]; + case U'\u003B': /* SEMICOLON */ + [[fallthrough]]; + case U'\u003C': /* LESS-THAN SIGN */ + [[fallthrough]]; + case U'\u003D': /* EQUALS SIGN */ + [[fallthrough]]; + case U'\u003E': /* GREATER-THAN SIGN */ + [[fallthrough]]; + case U'\u003F': /* QUESTION MARK */ + [[fallthrough]]; + case U'\u0040': /* COMMERCIAL AT */ + [[fallthrough]]; + case U'\u005B': /* LEFT SQUARE BRACKET */ + [[fallthrough]]; + case U'\u005C': /* REVERSE SOLIDUS */ + [[fallthrough]]; + case U'\u005D': /* RIGHT SQUARE BRACKET */ + [[fallthrough]]; + case U'\u005E': /* CIRCUMFLEX ACCENT */ + [[fallthrough]]; + case U'\u005F': /* LOW LINE */ + [[fallthrough]]; + case U'\u0060': /* GRAVE ACCENT */ + [[fallthrough]]; + case U'\u007B': /* LEFT CURLY BRACKET */ + [[fallthrough]]; + case U'\u007C': /* VERTICAL LINE */ + [[fallthrough]]; + case U'\u007D': /* RIGHT CURLY BRACKET */ + [[fallthrough]]; + case U'\u007E': /* TILDE */ + [[fallthrough]]; + case U'\u00A1': /* INVERT EXCLAMATION MARK */ + [[fallthrough]]; + case U'\u00A2': /* CENT SIGN */ + [[fallthrough]]; + case U'\u00A3': /* POUND SIGN */ + [[fallthrough]]; + case U'\u00A4': /* CURRENCY SIGN */ + [[fallthrough]]; + case U'\u00A5': /* YEN SIGN */ + [[fallthrough]]; + case U'\u00A6': /* BROKEN BAR */ + [[fallthrough]]; + case U'\u00A7': /* SECTION SIGN */ + [[fallthrough]]; + case U'\u00A8': /* DIAERESIS */ + [[fallthrough]]; + case U'\u00A9': /* COPYRIGHT SIGN */ + [[fallthrough]]; + case U'\u00AA': /* FEMININE ORDINAL INDICATOR */ + [[fallthrough]]; + case U'\u00AB': /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u00AC': /* NOT SIGN */ + [[fallthrough]]; + case U'\u00AE': /* REGISTERED SIGN */ + [[fallthrough]]; + case U'\u00AF': /* MACRON */ + [[fallthrough]]; + case U'\u00B0': /* DEGREE SIGN */ + [[fallthrough]]; + case U'\u00B1': /* PLUS MINUS SYMBOL */ + [[fallthrough]]; + case U'\u00B2': /* SUPERSCRIPT TWO */ + [[fallthrough]]; + case U'\u00B3': /* SUPERSCRIPT THREE */ + [[fallthrough]]; + case U'\u00B4': /* ACUTE ACCENT */ + [[fallthrough]]; + case U'\u00B5': /* MICRO SIGN */ + [[fallthrough]]; + case U'\u00B6': /* PILCROW SIGN */ + [[fallthrough]]; + case U'\u00B7': /* MIDDLE DOT */ + [[fallthrough]]; + case U'\u00B8': /* CEDILLA */ + [[fallthrough]]; + case U'\u00B9': /* SUPERSCRIPT ONE */ + [[fallthrough]]; + case U'\u00BA': /* MASCULINE ORDINAL INDICATOR */ + [[fallthrough]]; + case U'\u00BB': /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u00BC': /* VULGAR FRACTION ONE QUARTER */ + [[fallthrough]]; + case U'\u00BD': /* VULGAR FRACTION ONE HALF */ + [[fallthrough]]; + case U'\u00BE': /* VULGAR FRACTION THREE QUARTERS */ + [[fallthrough]]; + case U'\u00BF': /* INVERT QUESTION MARK */ + [[fallthrough]]; + case U'\u00D7': /* MULTIPLICATION SIGN */ + [[fallthrough]]; + case U'\u00F7': /* DIVISION SIGN */ + [[fallthrough]]; + case U'\u2010': /* HYPHEN */ + [[fallthrough]]; + case U'\u2013': /* EN DASH */ + [[fallthrough]]; + case U'\u2014': /* EM DASH */ + [[fallthrough]]; + case U'\u2018': /* LEFT SINGLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u2019': /* RIGHT SINGLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u201C': /* LEFT DOUBLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u201D': /* RIGHT DOUBLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u2026': /* HORIZONTAL ELLIPSIS */ + [[fallthrough]]; + case U'\u2030': /* PER MILLE SIGN */ + [[fallthrough]]; + case U'\u2031': /* PER TEN THOUSAND SIGN */ + [[fallthrough]]; + case U'\u2032': /* PRIME */ + [[fallthrough]]; + case U'\u2033': /* DOUBLE PRIME */ + [[fallthrough]]; + case U'\u2034': /* TRIPLE PRIME */ + [[fallthrough]]; + case U'\u2035': /* REVERSED PRIME */ + [[fallthrough]]; + case U'\u2036': /* REVERSED DOUBLE PRIME */ + [[fallthrough]]; + case U'\u2037': /* REVERSED TRIPLE PRIME */ + [[fallthrough]]; + case U'\u203C': /* DOUBLE EXCLAMATION MARK */ + [[fallthrough]]; + case U'\u203D': /* INTERROBANG */ + [[fallthrough]]; + case U'\u2047': /* DOUBLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u2048': /* QUESTION EXCLAMATION MARK */ + [[fallthrough]]; + case U'\u2049': /* EXCLAMATION QUESTION MARK */ + [[fallthrough]]; + case U'\u20A3': /* FRENCH FRANC SIGN */ + [[fallthrough]]; + case U'\u20A4': /* LIRA SIGN */ + [[fallthrough]]; + case U'\u20A8': /* RUPEE SIGN */ + [[fallthrough]]; + case U'\u20A9': /* WON SIGN */ + [[fallthrough]]; + case U'\u20AC': /* EURO SIGN */ + [[fallthrough]]; + case U'\u20B9': /* INDIAN RUPEE SIGN */ + [[fallthrough]]; + case U'\u20BF': /* BITCOIN SIGN */ + [[fallthrough]]; + case U'\u2103': /* DEGREE CELSIUS */ + [[fallthrough]]; + case U'\u2107': /* EULER CONSTANT */ + [[fallthrough]]; + case U'\u2109': /* DEGREE FAHRENHEIT */ + [[fallthrough]]; + case U'\u210E': /* PLANCK CONSTANT */ + [[fallthrough]]; + case U'\u2117': /* SOUND RECORDING COPYRIGHT */ + [[fallthrough]]; + case U'\u2122': /* TRADE MARK SIGN */ + [[fallthrough]]; + case U'\u2125': /* OUNCE SIGN */ + [[fallthrough]]; + case U'\u2126': /* OHM SIGN */ + [[fallthrough]]; + case U'\u212A': /* KELVIN SIGN */ + [[fallthrough]]; + case U'\u214D': /* AKTIESELSKAB */ + [[fallthrough]]; + case U'\u2205': /* EMPTY SET */ + [[fallthrough]]; + case U'\u2212': /* MINUS SIGN */ + [[fallthrough]]; + case U'\u221A': /* SQUARE ROOT */ + [[fallthrough]]; + case U'\u221B': /* CUBE ROOT */ + [[fallthrough]]; + case U'\u221C': /* FOURTH ROOT */ + [[fallthrough]]; + case U'\u221E': /* INFINITY */ + [[fallthrough]]; + case U'\u2228': /* LOGICAL OR */ + [[fallthrough]]; + case U'\u2248': /* ALMOST EQUAL TO */ + [[fallthrough]]; + case U'\u2260': /* NOT EQUAL TO */ + [[fallthrough]]; + case U'\u2264': /* LESS-THAN OR EQUAL TO */ + [[fallthrough]]; + case U'\u2265': /* GREATER-THAN OR EQUAL TO */ + [[fallthrough]]; + case U'\u2609': /* SUN */ + [[fallthrough]]; + case U'\u263F': /* MERCURY */ + [[fallthrough]]; + case U'\u2640': /* FEMALE SIGN */ + [[fallthrough]]; + case U'\u2641': /* EARTH */ + [[fallthrough]]; + case U'\u2642': /* MALE SIGN */ + [[fallthrough]]; + case U'\u2643': /* JUPITER */ + [[fallthrough]]; + case U'\u2644': /* SATURN */ + [[fallthrough]]; + case U'\u2645': /* URANUS */ + [[fallthrough]]; + case U'\u2646': /* NEPTUNE */ + [[fallthrough]]; + case U'\u2647': /* PLUTO */ + [[fallthrough]]; + case U'\u26A2': /* DOUBLED FEMALE SIGN */ + [[fallthrough]]; + case U'\u26A3': /* DOUBLED MALE SIGN */ + [[fallthrough]]; + case U'\u26A4': /* INTERLOCKED FEMALE AND MALE SIGN */ + [[fallthrough]]; + case U'\u26A5': /* MALE AND FEMALE SIGN */ + [[fallthrough]]; + case U'\u26B3': /* CERES */ + [[fallthrough]]; + case U'\u26B4': /* PALLAS */ + [[fallthrough]]; + case U'\u26B5': /* JUNO */ + [[fallthrough]]; + case U'\u26B6': /* VESTA */ + [[fallthrough]]; + case U'\u26B7': /* CHIRON */ + [[fallthrough]]; + case U'\u2BD8': /* PROSERPINA */ + [[fallthrough]]; + case U'\u2BD9': /* ASTRAEA */ + [[fallthrough]]; + case U'\u2BDA': /* HYGIEA */ + [[fallthrough]]; + case U'\u2BDB': /* PHOLOS */ + [[fallthrough]]; + case U'\u2BDC': /* NESSUS */ + [[fallthrough]]; + case U'\u2E2E': /* INVERTED QUESTION MARK */ + [[fallthrough]]; + case U'\u33D7': /* SQUARE PH */ + [[fallthrough]]; + case U'\uFDFC': /* RIAL SIGN */ + [[fallthrough]]; + case U'\U0001F10D': /* CIRCLED ZERO WITH SLASH */ + [[fallthrough]]; + case U'\U0001F10E': /* CIRCLED ANTICKLOCKWISE ARROW */ + [[fallthrough]]; + case U'\U0001F10F': /* CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH */ + [[fallthrough]]; + case U'\U0001F12F': /* COPYLEFT SYMBOL */ + [[fallthrough]]; + case U'\U0001F16D': /* CIRCLED CC */ + [[fallthrough]]; + case U'\U0001F16E': /* CIRCLED C WITH OVERLAID BACKSLASH */ + [[fallthrough]]; + case U'\U0001F16F': /* CIRCLED HUMAN FIGURE */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isspace b/u8c/include/u8c/utf.d/isspace new file mode 100644 index 0000000..ccf191a --- /dev/null +++ b/u8c/include/u8c/utf.d/isspace @@ -0,0 +1,47 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_fRbwVyyBwfrm0Slq) +#define u8c_key_fRbwVyyBwfrm0Slq + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isspace(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + case U'\u0009': /* HORIZONTAL TABULATION */ + [[fallthrough]]; + case U'\u000A': /* NEW LINE */ + [[fallthrough]]; + case U'\u000B': /* VERTICAL TABULATION */ + [[fallthrough]]; + case U'\u000C': /* FORM FEED */ + [[fallthrough]]; + case U'\u000D': /* CARRIAGE RETURN */ + [[fallthrough]]; + case U'\u0020': /* SPACE */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/issurro b/u8c/include/u8c/utf.d/issurro new file mode 100644 index 0000000..119c2e5 --- /dev/null +++ b/u8c/include/u8c/utf.d/issurro @@ -0,0 +1,35 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_f4d3TezxF0FFmbn4) +#define u8c_key_f4d3TezxF0FFmbn4 + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::issurro(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + if(_chr >= U'\xD800' && _chr <= U'\xDFFF') [[unlikely]] { + return true; + } + return false; +} + +#endif
\ No newline at end of file diff --git a/src/u8c/chk.h.d/isupper.c b/u8c/include/u8c/utf.d/isupper index 8c5e2ab..4a18fd5 100644 --- a/src/u8c/chk.h.d/isupper.c +++ b/u8c/include/u8c/utf.d/isupper @@ -3,145 +3,263 @@ This file is part of u8c. - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. - See the GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ - You should have received a copy of the GNU Affero General Public License along with u8c. +#if !defined(u8c_key_CNx6iimb2pI6RXGS) +#define u8c_key_CNx6iimb2pI6RXGS - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/chk.h> -struct u8c_isupper_tuple u8c_isupper(char32_t const _chr) { - struct u8c_isupper_tuple ret = { - .stat = false, - }; +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isupper(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } switch(_chr) { - default: - ret.res = false; - break; - case U'A': /* LATIN CAPITAL LETTER A */ - case U'B': /* LATIN CAPITAL LETTER B */ - case U'C': /* LATIN CAPITAL LETTER C */ - case U'D': /* LATIN CAPITAL LETTER D */ + [[likely]] default: + return false; + case U'\u0041': /* LATIN CAPITAL LETTER A */ + [[fallthrough]]; + case U'\u0042': /* LATIN CAPITAL LETTER B */ + [[fallthrough]]; + case U'\u0043': /* LATIN CAPITAL LETTER C */ + [[fallthrough]]; + case U'\u0044': /* LATIN CAPITAL LETTER D */ + [[fallthrough]]; case U'E': /* LATIN CAPITAL LETTER E */ + [[fallthrough]]; case U'F': /* LATIN CAPITAL LETTER F */ + [[fallthrough]]; case U'G': /* LATIN CAPITAL LETTER G */ + [[fallthrough]]; case U'H': /* LATIN CAPITAL LETTER H */ + [[fallthrough]]; case U'I': /* LATIN CAPITAL LETTER I */ + [[fallthrough]]; case U'J': /* LATIN CAPITAL LETTER J */ + [[fallthrough]]; case U'K': /* LATIN CAPITAL LETTER K */ + [[fallthrough]]; case U'L': /* LATIN CAPITAL LETTER L */ + [[fallthrough]]; case U'M': /* LATIN CAPITAL LETTER M */ + [[fallthrough]]; case U'N': /* LATIN CAPITAL LETTER N */ + [[fallthrough]]; case U'O': /* LATIN CAPITAL LETTER O */ + [[fallthrough]]; case U'P': /* LATIN CAPITAL LETTER P */ + [[fallthrough]]; case U'Q': /* LATIN CAPITAL LETTER Q */ + [[fallthrough]]; case U'R': /* LATIN CAPITAL LETTER R */ + [[fallthrough]]; case U'S': /* LATIN CAPITAL LETTER S */ + [[fallthrough]]; case U'T': /* LATIN CAPITAL LETTER T */ + [[fallthrough]]; case U'U': /* LATIN CAPITAL LETTER U */ + [[fallthrough]]; case U'V': /* LATIN CAPITAL LETTER V */ + [[fallthrough]]; case U'X': /* LATIN CAPITAL LETTER Y */ + [[fallthrough]]; case U'W': /* LATIN CAPITAL LETTER X */ + [[fallthrough]]; case U'Y': /* LATIN CAPITAL LETTER Y */ + [[fallthrough]]; case U'Z': /* LATIN CAPITAL LETTER Z */ + [[fallthrough]]; case U'\u00C0': /* LATIN CAPITAL LETTER A WITH GRAVE */ + [[fallthrough]]; case U'\u00C1': /* LATIN CAPITAL LETTER A WITH ACUTE */ + [[fallthrough]]; case U'\u00C2': /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u00C3': /* LATIN CAPITAL LETTER A WITH TILDE */ + [[fallthrough]]; case U'\u00C4': /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + [[fallthrough]]; case U'\u00C5': /* LATIN CAPITAL LETTER A WITH RING ABOVE */ + [[fallthrough]]; case U'\u00C6': /* LATIN CAPITAL LETTER AE */ + [[fallthrough]]; case U'\u00C7': /* LATIN CAPITAL LETTER C WITH CEDILLA */ + [[fallthrough]]; case U'\u00C8': /* LATIN CAPITAL LETTER E WITH GRAVE */ + [[fallthrough]]; case U'\u00C9': /* LATIN CAPITAL LETTER E WITH ACUTE */ + [[fallthrough]]; case U'\u00CA': /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u00CB': /* LATIN CAPITAL LETTER E WITH DIAERESIS */ + [[fallthrough]]; case U'\u00CC': /* LATIN CAPITAL LETTER I WITH GRAVE */ + [[fallthrough]]; case U'\u00CD': /* LATIN CAPITAL LETTER I WITH ACUTE */ + [[fallthrough]]; case U'\u00CE': /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u00CF': /* LATIN CAPITAL LETTER I WITH DIAERESIS */ + [[fallthrough]]; case U'\u00D0': /* LATIN CAPITAL LETTER ETH */ + [[fallthrough]]; case U'\u00D1': /* LATIN CAPITAL LETTER N WITH TILDE */ + [[fallthrough]]; case U'\u00D2': /* LATIN CAPITAL LETTER O WITH GRAVE */ + [[fallthrough]]; case U'\u00D3': /* LATIN CAPITAL LETTER O WITH ACUTE */ + [[fallthrough]]; case U'\u00D4': /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u00D5': /* LATIN CAPITAL LETTER O WITH TILDE */ + [[fallthrough]]; case U'\u00D6': /* LATIN CAPITAL LETTER O WITH DIAERESIS */ + [[fallthrough]]; case U'\u00D8': /* LATIN CAPITAL LETTER O WITH STROKE */ + [[fallthrough]]; case U'\u00D9': /* LATIN CAPITAL LETTER U WITH GRAVE */ + [[fallthrough]]; case U'\u00DA': /* LATIN CAPITAL LETTER U WITH STROKE */ + [[fallthrough]]; case U'\u00DB': /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u00DC': /* LATIN CAPITAL LETTER U WITH DIAERESIS */ + [[fallthrough]]; case U'\u00DD': /* LATIN CAPITAL LETTER Y WITH ACUTE */ + [[fallthrough]]; case U'\u00DE': /* LATIN CAPITAL LETTER THORN */ + [[fallthrough]]; case U'\u0100': /* LATIN CAPITAL LETTER A WITH MACRON */ + [[fallthrough]]; case U'\u0102': /* LATIN CAPITAL LETTER A WITH BREVE */ + [[fallthrough]]; case U'\u0104': /* LATIN CAPITAL LETTER A WITH OGONEK */ + [[fallthrough]]; case U'\u0106': /* LATIN CAPITAL LETTER C WITH ACUTE */ + [[fallthrough]]; case U'\u0108': /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u010A': /* LATIN CAPITAL LETTER C WITH DOT ABOVE */ + [[fallthrough]]; case U'\u010C': /* LATIN CAPITAL LETTER C WITH CARON */ + [[fallthrough]]; case U'\u010E': /* LATIN CAPITAL LETTER D WITH CARON */ + [[fallthrough]]; case U'\u0110': /* LATIN CAPITAL LETTER D WITH STROKE */ + [[fallthrough]]; case U'\u0112': /* LATIN CAPITAL LETTER E WITH MACRON */ + [[fallthrough]]; case U'\u0114': /* LATIN CAPITAL LETTER E WITH BREVE */ + [[fallthrough]]; case U'\u0116': /* LATIN CAPITAL LETTER E WITH DOT ABOVE */ + [[fallthrough]]; case U'\u0118': /* LATIN CAPITAL LETTER E WITH OGONEK */ + [[fallthrough]]; case U'\u011A': /* LATIN CAPITAL LETTER E WITH CARON */ + [[fallthrough]]; case U'\u011C': /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX */ + [[fallthrough]]; case U'\u014A': /* LATIN CAPITAL LETTER ENG */ + [[fallthrough]]; case U'\u0152': /* LATIN CAPITAL LIGATURE OE */ + [[fallthrough]]; case U'\u0186': /* LATIN CAPITAL LETTER OPEN O */ + [[fallthrough]]; case U'\u018E': /* LATIN CAPITAL LETTER REVERSED E */ + [[fallthrough]]; case U'\u018F': /* LATIN CAPITAL LETTER SCHWA */ + [[fallthrough]]; case U'\u0190': /* LATIN CAPITAL LETTER OPEN E */ + [[fallthrough]]; case U'\u0194': /* LATIN CAPITAL LETTER GAMMA */ + [[fallthrough]]; case U'\u0196': /* LATIN CAPITAL LETTER IOTA */ + [[fallthrough]]; case U'\u01A9': /* LATIN CAPITAL LETTER ESH */ + [[fallthrough]]; case U'\u01B1': /* LATIN CAPITAL LETTER UPSILON */ + [[fallthrough]]; case U'\u01B2': /* LATIN CAPITAL LETTER V WITH HOOk */ + [[fallthrough]]; case U'\u01B7': /* LATIN CAPITAL LETTER EZH */ + [[fallthrough]]; case U'\u01F7': /* LATIN CAPITAL LETTER WYNN */ + [[fallthrough]]; case U'\u021C': /* LATIN CAPITAL LETTER YOGH */ + [[fallthrough]]; case U'\u0241': /* LATIN CAPITAL LETTER GLOTTAL STOP */ + [[fallthrough]]; case U'\u0391': /* GREEK CAPITAL LETTER ALPHA */ + [[fallthrough]]; case U'\u0392': /* GREEK CAPITAL LETTER BETA */ + [[fallthrough]]; case U'\u0393': /* GREEK CAPITAL LETTER GAMMA */ + [[fallthrough]]; case U'\u0394': /* GREEK CAPITAL LETTER DELTA */ + [[fallthrough]]; case U'\u0395': /* GREEK CAPITAL LETTER EPSILON */ + [[fallthrough]]; case U'\u0396': /* GREEK CAPITAL LETTER ZETA */ + [[fallthrough]]; case U'\u0397': /* GREEK CAPITAL LETTER ETA */ + [[fallthrough]]; case U'\u0398': /* GREEK CAPITAL LETTER THETA */ + [[fallthrough]]; case U'\u0399': /* GREEK CAPITAL LETTER IOTA */ + [[fallthrough]]; case U'\u039A': /* GREEK CAPITAL LETTER KAPPA */ + [[fallthrough]]; case U'\u039B': /* GREEK CAPITAL LETTER LAMBDA */ + [[fallthrough]]; case U'\u039C': /* GREEK CAPITAL LETTER MU */ + [[fallthrough]]; case U'\u039D': /* GREEK CAPITAL LETTER NU */ + [[fallthrough]]; case U'\u039E': /* GREEK CAPITAL LETTER XI */ + [[fallthrough]]; case U'\u039F': /* GREEK CAPITAL LETTER OMICRON */ + [[fallthrough]]; case U'\u03A0': /* GREEK CAPITAL LETTER PI */ + [[fallthrough]]; case U'\u03A1': /* GREEK CAPITAL LETTER RHO */ + [[fallthrough]]; case U'\u03A3': /* GREEK CAPITAL LETTER SIGMA */ + [[fallthrough]]; case U'\u03A4': /* GREEK CAPITAL LETTER TAU */ + [[fallthrough]]; case U'\u03A5': /* GREEK CAPITAL LETTER UPSILON */ + [[fallthrough]]; case U'\u03A6': /* GREEK CAPITAL LETTER PHI */ + [[fallthrough]]; case U'\u03A7': /* GREEK CAPITAL LETTER CHI */ + [[fallthrough]]; case U'\u03A8': /* GREEK CAPITAL LETTER PSI */ + [[fallthrough]]; case U'\u03A9': /* GREEK CAPITAL LETTER OMEGA */ + [[fallthrough]]; case U'\u1E9E': /* LATIN CAPITAL LETTER SHARP S */ + [[fallthrough]]; case U'\u2C6D': /* LATIN CAPITAL LETTER ALPHA */ + [[fallthrough]]; case U'\uA77D': /* LATIN CAPITAL LETTER INSULAR G */ + [[fallthrough]]; case U'\uA7B4': /* LATIN CAPITAL LETTER BETA */ + [[fallthrough]]; case U'\uA7B6': /* LATIN CAPITAL LETTER OMEGA */ - ret.res = true; - break; + return true; } - return ret; } + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isxdigit b/u8c/include/u8c/utf.d/isxdigit new file mode 100644 index 0000000..cc73526 --- /dev/null +++ b/u8c/include/u8c/utf.d/isxdigit @@ -0,0 +1,68 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_NdERYC9ToUZX0vHE) +#define u8c_key_NdERYC9ToUZX0vHE + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isxdigit(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + break; + case U'\u0030': /* DIGIT ZERO */ + [[fallthrough]]; + case U'\u0031': /* DIGIT ONE */ + [[fallthrough]]; + case U'\u0032': /* DIGIT TWO */ + [[fallthrough]]; + case U'\u0033': /* DIGIT THREE */ + [[fallthrough]]; + case U'\u0034': /* DIGIT FOUR */ + [[fallthrough]]; + case U'\u0035': /* DIGIT FIVE */ + [[fallthrough]]; + case U'\u0036': /* DIGIT SIX */ + [[fallthrough]]; + case U'\u0037': /* DIGIT SEVEN */ + [[fallthrough]]; + case U'\u0038': /* DIGIT EIGHT */ + [[fallthrough]]; + case U'\u0039': /* DIGIT NINE */ + [[fallthrough]]; + case U'\u0041': /* LATIN CAPITAL LETTER A */ + [[fallthrough]]; + case U'\u0042': /* LATIN CAPITAL LETTER B */ + [[fallthrough]]; + case U'\u0043': /* LATIN CAPITAL LETTER C */ + [[fallthrough]]; + case U'\u0044': /* LATIN CAPITAL LETTER D */ + [[fallthrough]]; + case U'\u0045': /* LATIN CAPITAL LETTER E */ + [[fallthrough]]; + case U'\u0046': /* LATIN CAPITAL LETTER F */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/src/operator.cc b/u8c/src/operator.cc new file mode 100644 index 0000000..3ea8eae --- /dev/null +++ b/u8c/src/operator.cc @@ -0,0 +1,28 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#include <algorithm> /* std::copy */ +#include <cstdint> /* u8c_ubytec */ +#include <ostream> /* std::ostream */ +#include <u8c/str> + +auto u8c::operator << (std::ostream & _strm,u8c::str const & _str) -> std::ostream & { + auto const u8 = _str.u8().app(u8'\u0000'); + return _strm << reinterpret_cast<unsigned char *>(u8.begin()); +} diff --git a/u8c/src/u8c/fmt.cc b/u8c/src/u8c/fmt.cc new file mode 100644 index 0000000..654fb98 --- /dev/null +++ b/u8c/src/u8c/fmt.cc @@ -0,0 +1,39 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#include <cstddef> /* std::nullptr_t */ +#include <string> /* std::u32string, std::u8string */ +#include <u8c/u8c> +#include <u8c/str> + +template<> auto u8c::fmt(char32_t const _chr) -> u8c::str { + return _chr; +} +template<> auto u8c::fmt(std::nullptr_t) -> u8c::str { + return U"nullptr"; +} +template<> auto u8c::fmt(u8c::str const _str) -> u8c::str { + return _str; +} +template<> auto u8c::fmt(void * _ptr) -> u8c::str { + if(_ptr == nullptr) [[unlikely]] { + return U"nullptr"; + } + return U"PTR"; +} diff --git a/u8c/src/u8c/print.cc b/u8c/src/u8c/print.cc new file mode 100644 index 0000000..bb08cb4 --- /dev/null +++ b/u8c/src/u8c/print.cc @@ -0,0 +1,25 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#include <ostream> /* std::ostream */ + +#if 0x0 +auto u8c::print([[maybe_unused]] std::ostream & _strm,[[maybe_unused]] u8c::str _msg) -> void { +} +#endif diff --git a/u8c/src/u8c/println.cc b/u8c/src/u8c/println.cc new file mode 100644 index 0000000..f607ad7 --- /dev/null +++ b/u8c/src/u8c/println.cc @@ -0,0 +1,25 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#include <ostream> /* std::ostream */ + +#if 0x0 +auto u8c::println([[maybe_unused]] std::ostream & _strm,[[maybe_unused]] u8c::str _msg) -> void { +} +#endif
\ No newline at end of file |