diff options
78 files changed, 6408 insertions, 7139 deletions
@@ -1,2 +1 @@ -/.vscode /build
\ No newline at end of file diff --git a/changelog.md b/CHANGELOG.txt index 71f7f64..6ede68a 100644 --- a/changelog.md +++ b/CHANGELOG.txt @@ -1,3 +1,93 @@ +# 25 + +* Rename source directory: src => source +* Make changelog plain-text +* Update CMake style +* Rewrite in C99 +* Use separate CMake lists +* Update copyright and license notices +* Use header identifiers instead of keys for include guards +* Use ifdef/ifndef +* Remove top 'u8c' header (keep uninm) +* License under the LGPL +* Bump required CMake version +* Rename misc header to u8c +* Remove assert +* Remove attributes: + - u8c_attr_abitag + - u8c_attr_allocsz + - u8c_attr_artif + - u8c_attr_cold + - u8c_attr_fmt + - u8c_attr_malloc + - u8c_attr_hot + - u8c_attr_pure + - u8c_attr_retnonnull + - u8c_attr_sect + - u8c_attr_used + - u8c_attr_noderef + - u8c_attr_noesc + - u8c_attr_dup +* Remove type constant macros +* Remove our types +* Remove endian-related facilities +* Remove memory functions +* Remove bytesz and dbg +* Fix version number +* Remove fmt header +* Add new header 'format' +* Add new header 'character' +* Replace utf header with new 'format' and 'character' headers +* Remove math header +* Remove impl header +* Remove cstr header +* Remove arr header +* Make functions non-constexpr +* Update naming convention +* Implement UTF-16 conversions +* Split cnv into multiple functions: + - encode_utf8 (UTF-32 to UTF-8) + - decode_utf8 (UTF-8 to UTF-32) + - encode_utf16 (UTF-32 to UTF-16) + - decode_utf16 (UTF-16 to UTF-32) +* Use caller-provided buffer in conversion functions +* Rename u8c::isupper to u8c_is_majuscule +* Rename u8c::islower to u8c_is_minuscule +* Update code style +* Change type of version constant (now uint_least32_t) +* Use Git tagging for versioning +* Don't throw exceptions +* Update warning flags +* Update optimisation flags +* Rename u8c::unimax to u8c_MAX_CODE_POINT +* Remove u8c::uniblk +* Clean up code +* Don't define functions in headers +* Rename u8c::isspace to u8c_is_whitespace +* Add more characters to u8c_is_whitespace +* Rename u8c::ispunct to u8c_is_punctuation +* Add more characters to u8c_is_punctuation +* Remove u8c::isalnum +* Rename u8c::uninm to u8c_unicode_name +* Use caller-provided buffer in u8c_unicode_name +* Add constant for the maximum length of a Unicode identifier: u8c_MAXIMUM_NAME_LENGTH +* Add functions for determening the length of encodings and decodings: + - u8c_decode_utf8_length + - u8c_encode_utf8_length + - u8c_encode_utf16_length + - u8c_decode_utf16_length +* Rename u8c_attr_const to u8c_UNSEQUENCED +* Rename u8c_attr_inline to u8c_ALWAYS_INLINE +* Add new attribute u8c_NO_DISCARD +* Validate encodings +* Rework readme +* Rename u8c::isdigit and u8x::isxdigit to is_numeric and is_hexadecimal_numeric +* Rename u8c::isalpha to u8c_is_alphabetic +* Rename u8c::iscntrl to u8c_is_control +* Rename u8c::issurro to u8c_is_surrogate +* Optimise code +* Update gitignore + # 24 * Remove constructor taking a single value for `u8c::arr`. diff --git a/CMakeLists.txt b/CMakeLists.txt index d9d99ba..26030fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,138 +1,35 @@ -cmake_minimum_required( - VERSION - 3.20 -) -project( - u8c - VERSION - 27 - DESCRIPTION - "Unicode manipulation library." - HOMEPAGE_URL - "https://mandelbrot.dk/delta/u8c" - LANGUAGES - CXX -) -set( - CMAKE_CXX_STANDARD - 23 -) -set( - CMAKE_CXX_EXTENSIONS - OFF -) +# +# Copyright 2021, 2023 Gabriel Bjørnager Jensen. +# +# This file is part of u8c. +# +# u8c is free software: you can redistribute it +# and/or modify it under the terms of the GNU +# Lesser General Public License as published by +# the Free Software Foundation, either version 3 of +# the License, or (at your option) any later +# version. +# +# u8c is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even +# the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU +# Lesser General Public License along with u8c. If +# not, see <https://www.gnu.org/licenses/>. +# -# Options: -option( - U8C_TEST - "Build test program." - OFF -) +cmake_minimum_required(VERSION 3.21) -# Disable in-souce builds: -if( - "${PROJECT_BINARY_DIR}" - STREQUAL - "${PROJECT_SOURCE_DIR}" -) - message( - FATAL_ERROR - "In-source building is not allowed." - ) -endif() - -# Compiler Settings: -message( - STATUS - "Enabling colour output for Clang or GCC..." -) -if( - "${CMAKE_CXX_COMPILER_ID}" - STREQUAL - "Clang" -) - add_compile_options( - "-fcolor-diagnostics" - ) -elseif( - "${CMAKE_CXX_COMPILER_ID}" - STREQUAL - "GNU" -) - add_compile_options ( - "-fdiagnostics-color=always" - ) -endif() -message( - STATUS - "Enabling compile warnings..." -) -if( - MSVC -) - add_compile_options( - "/W4" - "/WX" - ) -else() - add_compile_options( - "-Wfatal-errors" - "-Wall" - "-Werror" - "-Wextra" - "-Wno-attributes" - "-pedantic-errors" - ) -endif() -if( - CMAKE_BUILD_TYPE - MATCHES - Release -) - message( - STATUS - "Setting optimisation level..." - ) - if( - MSVC - ) - add_compile_options( - "/Os" - ) - else() - add_compile_options( - "-Os" - ) - endif() -endif() -include_directories( - "${PROJECT_SOURCE_DIR}/u8c/include" -) - -# u8c settings: -add_library( +project( u8c - SHARED - "u8c/src/operator.cc" - "u8c/src/u8c/fmt.cc" - "u8c/src/u8c/print.cc" - "u8c/src/u8c/println.cc" + VERSION 29 + DESCRIPTION "Unicode for C." + HOMEPAGE_URL "https://mandelbrot.dk/u8c" + LANGUAGES C CXX ) -# Test settings: -if( - U8C_TEST -) - add_executable( - test - "u8c-check/src/test.cc" - ) - add_dependencies( - test - u8c - ) - target_link_libraries( - test - u8c - ) -endif() +add_subdirectory(u8c) +add_subdirectory(u8c-check) @@ -1,5 +1,5 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> Everyone is permitted to copy and distribute verbatim copies @@ -7,15 +7,17 @@ Preamble - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. + The GNU General Public License is a free, copyleft license for +software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to +the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free -software for all its users. +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you @@ -24,34 +26,44 @@ them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. @@ -60,7 +72,7 @@ modification follow. 0. Definitions. - "This License" refers to version 3 of the GNU Affero General Public License. + "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. @@ -537,45 +549,35 @@ to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. + 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single +under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General +Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published +GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's +versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. @@ -633,29 +635,40 @@ the "copyright" line and a pointer to where the full notice is found. Copyright (C) <year> <name of author> This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by + it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. + GNU General Public License for more details. - You should have received a copy of the GNU Affero General Public License + You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. Also add information on how to contact you by electronic and paper mail. - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see +For more information on this, and how to apply and follow the GNU GPL, see <https://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<https://www.gnu.org/licenses/why-not-lgpl.html>. diff --git a/COPYING.LESSER b/COPYING.LESSER new file mode 100644 index 0000000..0a04128 --- /dev/null +++ b/COPYING.LESSER @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md deleted file mode 100644 index 8569ae4..0000000 --- a/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# u8c - -[*u8c*](https://mandelbrot.dk/delta/u8c) is a free, open-source and portable general-purpose library. It is written in the C++ *(C++2b)* programming language. A wrapper for C may arrive in the future. - -## Features - -u8c has the following features: - -* Array container with support for compile-time, dynamic, and static arrays. -* Compile-time-compatible alternatives to the C maths library *(incomplete)*. -* Compile-time-compatible alternatives to the C string manipulation facilities *(incomplete)*. -* Facilities for determining the target platform using immediate functions *(Supports major platforms, including AIX, FreeBSD, Linux, macOS, OpenBSD, Windows NT)*. -* Optional platform-specific behaviour, including vendor-specific attributes, pointer-restriction. -* Quota type for fractional mathematics *(incomplete)*. -* String container with built-in UTF-conversions -* Unicode *(UTF-8, UTF-16 and UTF-32)* conversions and manipulations facilities *(UTF-16 lacking full support)*. - -With more to come in the future. - -## Installing - -TBA - -## Compiling - -u8c has been tested to work with Clang 14. - -1. Checkout u8c using `git`: - - * Clone the repository: `git clone https://mandelbrot.dk/delta/u8c.git - * Or do a shallow cone: `git clone --depth 1 https://mandelbrot.dk/delta/u8c.git` - -2. Configure the compilation of u8c: - - * `cd u8c` - * `cmake -B build` - - You can append the following options to the command: - - * `-DCMAKE_BUILD_TYPE` — Set the build type. Can be either `Debug`or `Release`. - * `-DU8C_TEST` — Enables compilation of the test program if set to `ON`. - -3. Build u8c build - - * `cmake --build build` - -## Contributing - -u8c does currently not accept **any** merge requests. - -## Copyright & License - -Copyright 2021 Gabriel Jensen - -This file is part of u8c. - -u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - -u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License along with u8c. If not, see <https://www.gnu.org/licenses/>.
\ No newline at end of file diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..0e2e875 --- /dev/null +++ b/README.txt @@ -0,0 +1,45 @@ +U8C + +Unicode for C. + +- ABOUT + +u8c is a C library for various Unicode-related functions. It is written in the +1999 edition of C - C99 - with support for C++11. + +- FEATURES + +u8c supports conversions to and from the UTF-8 and UTF-16 formats. UTF-32 is +used as the intermediate format. + +The encoding and decoding functions automatically replace invalid code +sequences with the replacement character: U+FFFD REPLACEMENT CHARACTER. + +Additionally, character trait functions can help determin the type of code +point: Alphabetic, control, numeric, punctuation, etc. + +- INSTALLATION + +A PKGBUILD is hosted on mandelbrot.dk at: +<https://mandelbrot.dk/pkgbuild_u8c> + +- COMPILATION + +u8c uses CMake as its build system. The flag 'U8C_CHECK' may be set to ON to +enable building of the check program. + +- COPYRIGHT & LICENSE + +Copyright 2021, 2023 Gabriel Bjørnager Jensen. + +This program is free software: you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License along +with this program. If not, see <https://www.gnu.org/licenses/>. diff --git a/u8c-check/CMakeLists.txt b/u8c-check/CMakeLists.txt new file mode 100644 index 0000000..8803021 --- /dev/null +++ b/u8c-check/CMakeLists.txt @@ -0,0 +1,63 @@ +# +# Copyright 2021, 2023 Gabriel Bjørnager Jensen. +# +# This file is part of u8c. +# +# u8c is free software: you can redistribute it +# and/or modify it under the terms of the GNU +# Lesser General Public License as published by +# the Free Software Foundation, either version 3 of +# the License, or (at your option) any later +# version. +# +# u8c is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even +# the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU +# Lesser General Public License along with u8c. If +# not, see <https://www.gnu.org/licenses/>. +# + +cmake_minimum_required(VERSION 3.21) + +option(U8C_CHECK "build the test program" OFF) + +if(U8C_CHECK) + set(CMAKE_CXX_STANDARD 17) + + set(CMAKE_CXX_EXTENSIONS OFF) + + add_executable( + check + + "source/check.cc" + ) + + add_dependencies( + check + + u8c + ) + + target_link_libraries( + check + + u8c + ) + + if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang|GNU") + target_compile_options( + check PRIVATE + + -Og + -Wall + -Wextra + -Wpedantic + -fdiagnostics-color=always + -g + ) + endif() +endif() diff --git a/u8c-check/source/check.cc b/u8c-check/source/check.cc new file mode 100644 index 0000000..52adcad --- /dev/null +++ b/u8c-check/source/check.cc @@ -0,0 +1,169 @@ +#include <climits> +#include <cstdint> +#include <cstdlib> +#include <iomanip> +#include <iostream> +#include <string> +#include <type_traits> +#include <u8c/character.h> +#include <u8c/format.h> + +using namespace ::std::literals::string_literals; + +int main() { + int unsigned error_count = 0x0u; + + auto const log_unit = [](::std::string const& identifier) noexcept -> void { + ::std::cerr <<"\n\x1B[38;5;75mtesting\x1B[0m " << identifier <<"\n\n"; + }; + + auto const check = [&error_count](long const line, auto const& left_value, auto const& right_value) { + ::std::cerr << " " << ::std::setbase(0xA) << line << ". "; + + auto const okay = [](auto const& left_reference, auto const& right_reference) -> bool { + auto const get_value = [](auto const& reference) -> auto { + using T = ::std::remove_cv_t<::std::remove_reference_t<decltype (left_value)>>; + + if constexpr (::std::is_same_v<T, char>) { + return static_cast<::std::uintmax_t>(static_cast<char unsigned>(reference)); + } else if constexpr (::std::is_signed_v<T>) { + return static_cast<::std::intmax_t>(reference); + } else if constexpr (::std::is_unsigned_v<T>) { + return static_cast<::std::uintmax_t>(reference); + } else { + return static_cast<T>(reference); + } + }; + + auto const left_value = get_value(left_reference); + auto const right_value = get_value(right_reference); + + ::std::cerr << "" << ::std::setbase(0x10) << left_value << " equals " << right_value << "... "; + + return left_value == right_value; + }(left_value, right_value); + + if (!okay) { + ::std::cerr << "\x1B[38;5;161mfalse\x1B[0m\n"; + ++error_count; + } else { + ::std::cerr <<"\x1B[38;5;77mtrue\x1B[0m\n"; + } + }; + #define check(left_value, right_value) (check(__LINE__, (left_value), (right_value))) + + ::std::cerr << "u8c-check " << u8c_VERSION << "\n"; + + [&] { + log_unit("UTF-8"); + + ::std::uint_least32_t const source[] { + UINT32_C(0x0026), + UINT32_C(0x00F0), + UINT32_C(0x218A), + UINT32_C(0x0001F480), + UINT32_C(0xD800), + UINT32_C(0x0010FFFF), + UINT32_C(0x00110000), + }; + auto const source_length = sizeof (source) / sizeof (source[0x0]); + + size_t const utf8_buffer_length = ::u8c_encode_utf8_length(source, source_length); + check(utf8_buffer_length, 0x14u); + + auto const utf8_buffer = new char[utf8_buffer_length]; + + ::u8c_encode_utf8(utf8_buffer, source, source_length); + + check(utf8_buffer[0x00], '\x26'); + check(utf8_buffer[0x01], '\xC3'); + check(utf8_buffer[0x02], '\xB0'); + check(utf8_buffer[0x03], '\xE2'); + check(utf8_buffer[0x04], '\x86'); + check(utf8_buffer[0x05], '\x8A'); + check(utf8_buffer[0x06], '\xF0'); + check(utf8_buffer[0x07], '\x9F'); + check(utf8_buffer[0x08], '\x92'); + check(utf8_buffer[0x09], '\x80'); + check(utf8_buffer[0x0A], '\xEF'); + check(utf8_buffer[0x0B], '\xBF'); + check(utf8_buffer[0x0C], '\xBD'); + check(utf8_buffer[0x0D], '\xF4'); + check(utf8_buffer[0x0E], '\x8F'); + check(utf8_buffer[0x0F], '\xBF'); + check(utf8_buffer[0x10], '\xBF'); + check(utf8_buffer[0x11], '\xEF'); + check(utf8_buffer[0x12], '\xBF'); + check(utf8_buffer[0x13], '\xBD'); + + auto const utf32_buffer_length = ::u8c_decode_utf8_length(utf8_buffer, utf8_buffer_length); + check(utf32_buffer_length, 0x7u); + + auto const utf32_buffer = new ::std::uint_least32_t[utf32_buffer_length]; + + ::u8c_decode_utf8(utf32_buffer, utf8_buffer, utf8_buffer_length); + + check(utf32_buffer[0x0], UINT32_C(0x0026)); + check(utf32_buffer[0x1], UINT32_C(0x00F0)); + check(utf32_buffer[0x2], UINT32_C(0x218A)); + check(utf32_buffer[0x3], UINT32_C(0x0001F480)); + check(utf32_buffer[0x4], UINT32_C(0xFFFD)); + + delete[] utf8_buffer; + delete[] utf32_buffer; + }(); + + [&] { + log_unit("UTF-16"); + + ::std::uint_least32_t const source[] { + UINT32_C(0x0026), + UINT32_C(0x00F0), + UINT32_C(0x218A), + UINT32_C(0x0001F480), + UINT32_C(0xD800), + UINT32_C(0x0010FFFF), + UINT32_C(0x00110000), + }; + auto const source_length = sizeof (source) / sizeof (source[0x0]); + + auto const utf16_buffer_length = ::u8c_encode_utf16_length(source, source_length); + check(utf16_buffer_length, 0x9u); + + auto const utf16_buffer = new ::std::uint_least16_t[utf16_buffer_length]; + + ::u8c_encode_utf16(utf16_buffer, source, source_length); + + check(utf16_buffer[0x0], UINT16_C(0x0026)); + check(utf16_buffer[0x1], UINT16_C(0x00F0)); + check(utf16_buffer[0x2], UINT16_C(0x218A)); + check(utf16_buffer[0x3], UINT16_C(0xD83D)); + check(utf16_buffer[0x4], UINT16_C(0xDC80)); + check(utf16_buffer[0x5], UINT16_C(0xFFFD)); + check(utf16_buffer[0x6], UINT16_C(0xDBFF)); + check(utf16_buffer[0x7], UINT16_C(0xDFFF)); + check(utf16_buffer[0x8], UINT16_C(0xFFFD)); + + auto const utf32_buffer_length = ::u8c_decode_utf16_length(utf16_buffer, utf16_buffer_length); + check(utf32_buffer_length, 0x7u); + + auto const utf32_buffer = new ::std::uint_least32_t[utf32_buffer_length]; + + ::u8c_decode_utf16(utf32_buffer, utf16_buffer, utf16_buffer_length); + + check(utf32_buffer[0x0], UINT32_C(0x0026)); + check(utf32_buffer[0x1], UINT32_C(0x00F0)); + check(utf32_buffer[0x2], UINT32_C(0x218A)); + check(utf32_buffer[0x3], UINT32_C(0x0001F480)); + check(utf32_buffer[0x4], UINT32_C(0xFFFD)); + check(utf32_buffer[0x5], UINT32_C(0x0010FFFF)); + check(utf32_buffer[0x6], UINT32_C(0xFFFD)); + + delete[] utf16_buffer; + delete[] utf32_buffer; + }(); + + ::std::cerr << "\ndone - " << error_count << " error(s)\n"; + + return error_count != 0x0u ? EXIT_FAILURE : EXIT_SUCCESS; +}
\ No newline at end of file diff --git a/u8c-check/src/test.cc b/u8c-check/src/test.cc deleted file mode 100644 index 1b600aa..0000000 --- a/u8c-check/src/test.cc +++ /dev/null @@ -1,116 +0,0 @@ -#if defined(NDEBUG) -#undef NDEBUG -#endif - -#include <chrono> /* std::chrono::duration, std::chrono::high_resolution_clock */ -#include <cstdlib> /* EXIT_FAILURE, EXIT_SUCCESS, std::exit */ -#include <cstring> /* std::strcmp */ -#include <iostream> /* std::cerr, std::cout, std::endl */ -#include <limits> /* std::numeric_limits */ -#include <u8c/u8c> - -# include "test0.inl" -# include "test1.inl" - -static_assert(u8c::abs(-0x1) == 0x1); -static_assert(u8c::abs(-0x1p0) == 0x1p0); -static_assert(u8c::abs(-0x100p0) == 0x100p0); - -static_assert(u8c::fma(0x10,0x10,0x100) == 0x200); - -static_assert(!u8c::isinf(0x0)); -static_assert(u8c::isinf(std::numeric_limits<float>::infinity())); - -static_assert(!u8c::isnan(0x0)); -static_assert(u8c::isnan(std::numeric_limits<float>::quiet_NaN())); - -static_assert(u8c::isprime(0x2u)); -static_assert(u8c::isprime(0x3u)); -static_assert(!u8c::isprime(0x4u)); -static_assert(u8c::isprime(0x5u)); -static_assert(!u8c::isprime(0x6u)); -static_assert(u8c::isprime(0x7u)); -static_assert(!u8c::isprime(0x8u)); -static_assert(!u8c::isprime(0x9u)); -static_assert(!u8c::isprime(0xAu)); -static_assert(u8c::isprime(0xBu)); -static_assert(!u8c::isprime(0xCu)); -static_assert(u8c::isprime(0xDu)); -static_assert(!u8c::isprime(0xEu)); -static_assert(!u8c::isprime(0xFu)); - -static_assert(u8c::pow(0x1,0x10000) == 0x1); -static_assert(u8c::pow(0x2,0x2) == 0x4); -static_assert(u8c::pow(0x2,0x4) == 0x10); -static_assert(u8c::pow(0x2,0x10) == 0x10000); -static_assert(u8c::pow(0x3,0x3) == 0x1B); - -static_assert(u8c::quota(0x1,0x3) < u8c::quota<>::inf()); -static_assert(u8c::quota(0x1,0x3) == u8c::quota(0x2,0x6)); -static_assert(u8c::quota<>::inf() == u8c::quota<>::inf()); -static_assert(u8c::quota<>::nan() != u8c::quota<>::nan()); - -static_assert(u8c::cstrlen("This is a string!") == 0x11uz); -static_assert(u8c::cstrlen("Das war ein Befehl!") == 0x13uz); - -static_assert(u8c::cstrcmp("Clang","Clang") == u8c_bytec(0x0)); -static_assert(u8c::cstrcmp("Clang","GCC") == u8c_bytec(0x1)); -static_assert(u8c::cstrcmp("GCC","Clang") == u8c_bytec(-0x1)); -static_assert(u8c::cstrcmp("GCC","GCC") == u8c_bytec(0x0)); - -auto main(int const argc,char const * const * const argv) -> int { - int constexpr maxtestn = 0x1; - auto gettestnm = [](int const _n) { - switch (_n) { - [[unlikely]] default: - return "N/A"; - case 0x0: - return "Array Stress-testing"; - case 0x1: - return "Strings"; - } - }; - auto helpscrn = [&](char const * const _prognm) { - std::cout << "u8c-test: Test u8c" << std::endl; - std::cout << "Usage: " << _prognm << " [test number]" << std::endl; - std::cout << std::endl; - std::cout << "Test numbers:" << std::endl; - for (int n = 0x0;n <= maxtestn;n += 0x1) { - std::cout << "\t " << n << " - \"" << gettestnm(n) << "\"" << std::endl; - } - std::cout << std::endl; - std::cout << "u8c version: " << u8c::ver << std::endl; - }; - auto test = [&](int const _n) { - auto const testnm = gettestnm(_n); - std::cout << ":: \u001B[95mTesting\u001B[0m test #" << _n << " \u001B[3m\"" << testnm << "\"\u001B[0m..." << std::endl << std::endl; - auto begin = std::chrono::high_resolution_clock::now(); - switch (_n) { - [[unlikely]] default: - std::exit(EXIT_FAILURE); - case 0x0: - ::test0(); - break; - case 0x1: - ::test1(); - break; - } - auto const end = std::chrono::high_resolution_clock::now(); - std::chrono::duration<double> const tmdiff = end - begin; - std::cout << std::endl << ":: \u001B[96mDone\u001B[0m testing test #" << _n << " \u001B[3m\"" << testnm << "\"\u001B[0m (took " << tmdiff.count() << " seconds)." << std::endl; - }; - if (argc > 0x1) { - if (!std::strcmp(argv[0x1uz],"--help")) { - helpscrn(argv[0x0uz]); - std::exit(EXIT_SUCCESS); - } - else { - std::cerr << "Invalid argument \"\u001B[3m" << argv[0x1uz] << "\"\u001B[0m." << std::endl; - std::exit(EXIT_FAILURE); - } - } - for (int n = 0x0;n <= maxtestn;n += 0x1) { - test(n); - } - std::exit(EXIT_SUCCESS); -} diff --git a/u8c-check/src/test0.inl b/u8c-check/src/test0.inl deleted file mode 100644 index 5b4db6c..0000000 --- a/u8c-check/src/test0.inl +++ /dev/null @@ -1,49 +0,0 @@ -#include <iostream> /* std::cerr, std::endl */ -#include <limits> /* std::numeric_limits */ -#include <random> /* std::random_device, std::uniform_int_distribution */ -#include <u8c/arr> - -auto test0() -> void { - std::cerr << "Constructing array of 256 elements, each with a value of 15..."; - u8c::arr<int> arr(0x100uz,0xF); - u8c_assert(arr.sz() == 0x100uz); - u8c_assert(static_cast<u8c::size>(arr.end() - arr.begin()) == arr.sz()); - for (auto const elm : arr) { - u8c_assert(elm == 0xF); - } - std::cerr << " okay." << std::endl; - std::random_device rd; - { - std::uniform_int_distribution<int> distr(0x0,std::numeric_limits<int>::max()); - for (u8c::byte n = u8c_bytec(0x0);n <= u8c_bytec(0x10);n += u8c_ubytec(0x1)) { - auto const val = distr(rd); - std::cerr << "Filling array with the value of " << val << "..."; - arr.fill(val); - for (auto const elm : arr) { - u8c_assert(elm == val); - } - std::cerr << " okay." << std::endl; - } - } - { - std::uniform_int_distribution<u8c::size> distr(0x1,0xFFF); - for (u8c::byte n = u8c_bytec(0x0);n <= u8c_bytec(0x4);n += u8c_ubytec(0x1)) { - auto const sz = distr(rd); - std::cerr << "Allocating the array to the size of " << sz << "..."; - arr.alloc(sz); - u8c_assert(arr.sz() == sz); - std::cerr << " okay." << std::endl; - } - } - std::cerr << "Doing some additionel tests..."; - arr.alloc(0x2uz); - u8c_assert(arr.sz() == 0x2uz); - arr.fill(0xF); - u8c_assert(arr[0x0uz] == 0xF); - u8c_assert(arr[0x1uz] == 0xF); - arr.realloc(0x4uz); - u8c_assert(arr.sz() == 0x4uz); - u8c_assert(arr[0x0uz] == 0xF); - u8c_assert(arr[0x1uz] == 0xF); - std::cerr << " okay." << std::endl; -} diff --git a/u8c-check/src/test1.inl b/u8c-check/src/test1.inl deleted file mode 100644 index 4e2a18e..0000000 --- a/u8c-check/src/test1.inl +++ /dev/null @@ -1,24 +0,0 @@ -#include <cstdint> /* u8c_uint32c, u8c_ubytec, std::int_fast8_t, u8c::uint32 */ -#include <iomanip> /* std::hex */ -#include <iostream> /* std::cout, std::endl */ -#include <random> /* std::random_device */ - -auto test1() -> void { - std::random_device rd; - std::uniform_int_distribution<char32_t> distr(u8c_uint32c(0x0),u8c_uint32c(0x100)); - for(std::int_fast8_t n = u8c_bytec(0x0);n <= u8c_bytec(0x4);n += u8c_ubytec(0x1)) { - auto const chr = distr(rd); - std::cout << "U+" << std::hex << static_cast<u8c::uint32>(chr) << " (\"" << u8c::uninm(chr) << "\" @ \"" << u8c::uniblk(chr) << "\")" << std::endl; - std::cout << "Is alphanumeric: " << u8c::isalnum(chr) << std::endl; - std::cout << "Is alphabetic: " << u8c::isalpha(chr) << std::endl; - std::cout << "Is control character: " << u8c::iscntrl(chr) << std::endl; - std::cout << "Is digit: " << u8c::isdigit(chr) << std::endl; - std::cout << "Is lowercase: " << u8c::islower(chr) << std::endl; - std::cout << "Is punctuation mark: " << u8c::ispunct(chr) << std::endl; - std::cout << "Is whitespace: " << u8c::isspace(chr) << std::endl; - std::cout << "Is surrogate point: " << u8c::issurro(chr) << std::endl; - std::cout << "Is uppercase: " << u8c::isupper(chr) << std::endl; - std::cout << "Is hexadecimal digit: " << u8c::isxdigit(chr) << std::endl; - std::cout << std::endl; - } -} diff --git a/u8c/CMakeLists.txt b/u8c/CMakeLists.txt new file mode 100644 index 0000000..25253d8 --- /dev/null +++ b/u8c/CMakeLists.txt @@ -0,0 +1,86 @@ +# +# Copyright 2021, 2023 Gabriel Bjørnager Jensen. +# +# This file is part of u8c. +# +# u8c is free software: you can redistribute it +# and/or modify it under the terms of the GNU +# Lesser General Public License as published by +# the Free Software Foundation, either version 3 of +# the License, or (at your option) any later +# version. +# +# u8c is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even +# the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU +# Lesser General Public License along with u8c. If +# not, see <https://www.gnu.org/licenses/>. +# + +cmake_minimum_required(VERSION 3.21) + +set(CMAKE_C_STANDARD 99) + +set(CMAKE_C_EXTENSIONS OFF) + +add_library( + u8c SHARED + + "source/character/is_alphabetic.c" + "source/character/is_control.c" + "source/character/is_hexadecimal_numeric.c" + "source/character/is_majuscule.c" + "source/character/is_minuscule.c" + "source/character/is_numeric.c" + "source/character/is_punctuation.c" + "source/character/is_surrogate.c" + "source/character/is_whitespace.c" + "source/character/unicode_name.c" + + "source/format/decode_utf16_length.c" + "source/format/decode_utf16.c" + "source/format/decode_utf8_length.c" + "source/format/decode_utf8.c" + "source/format/encode_utf16_length.c" + "source/format/encode_utf16.c" + "source/format/encode_utf8_length.c" + "source/format/encode_utf8.c" +) + +target_include_directories( + u8c PUBLIC + + "include" +) + +if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang|GNU") + target_compile_options( + u8c PRIVATE + + $<IF:$<STREQUAL:"${CMAKE_BUILD_TYPE}","Debug">,-Og,-Ofast> + -Wall + -Wextra + -Winvalid-utf8 + -Wmissing-declarations + -Wmissing-include-dirs + -Wnull-dereference + -Wpedantic + -Wpointer-arith + -Wstrict-overflow=5 + -fdiagnostics-color=always + -fno-strict-overflow + $<IF:$<STREQUAL:"${CMAKE_BUILD_TYPE}","Debug">,-g,> + ) +elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + target_compile_options( + u8c PRIVATE + + /O2 + /W4 + $<IF:$<STREQUAL:"${CMAKE_BUILD_TYPE}","Debug">,/Zo,> + ) +endif() diff --git a/u8c/include/u8c/arr b/u8c/include/u8c/arr deleted file mode 100644 index 5d8fc4d..0000000 --- a/u8c/include/u8c/arr +++ /dev/null @@ -1,65 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_RMLtbYyYWBICBIbq) -#define u8c_key_RMLtbYyYWBICBIbq - -#include <u8c/misc> - -namespace u8c { - template<typename T> class arr { - public: - constexpr auto alloc( u8c::size num) -> void; - template<std::convertible_to<T> T0> constexpr auto alloc( u8c::size num, T0 val) -> void; - constexpr auto app( u8c::arr<T> const & oth) -> u8c::arr<T> const &; - template<std::convertible_to<T> T0> constexpr auto app( T0 val) -> u8c::arr<T> const &; - [[nodiscard]] constexpr arr() noexcept = default; - [[nodiscard]] constexpr arr( T const * begin, T const * end); - [[nodiscard]] constexpr arr( u8c::size num); - [[nodiscard]] constexpr arr( u8c::arr<T> const & oth); - template<u8c::size N> [[nodiscard]] constexpr arr( T const (& arr)[N]) noexcept; - template<std::convertible_to<T> T0> [[nodiscard]] constexpr arr( u8c::size num, T0 val); - [[nodiscard]] constexpr auto begin() const noexcept -> T *; - [[nodiscard]] constexpr auto end() const noexcept -> T *; - template<std::convertible_to<T> T0> constexpr auto fill( T0 val) -> void; - template<std::convertible_to<T> T0> constexpr auto fill( T * begin, T * end,T0 val) -> void; - [[nodiscard]] constexpr auto isstatic() const noexcept -> bool; - constexpr auto log( bool val) noexcept -> void; - constexpr auto operator = ( u8c::arr<T> const & oth) -> u8c::arr<T> const &; - [[nodiscard]] constexpr auto operator [] (u8c::size pos) const noexcept -> T &; - constexpr auto realloc( u8c::size num) -> void; - constexpr auto set( T const * begin, T const * end) -> void; - constexpr auto set( u8c::arr<T> const & oth) -> void; - template<u8c::size N> constexpr auto set( T const (& arr)[N]) noexcept -> void; - template<std::convertible_to<T> T0> constexpr auto set( T0 val) -> void; - [[nodiscard]] constexpr auto sub( T const * begin, T const * end) const -> u8c::arr<T>; - [[nodiscard]] constexpr auto sz() const noexcept -> u8c::size; - constexpr ~arr() noexcept; - constexpr static auto npos = -0x1uz; - private: - bool _isstatic = false; - bool mutable _log = false; - T * _ptr = nullptr; - u8c::size _sz = 0x0uz; - }; -} - -#include <u8c/arr.d/arr> - -#endif diff --git a/u8c/include/u8c/arr.d/arr b/u8c/include/u8c/arr.d/arr deleted file mode 100644 index 63f35ad..0000000 --- a/u8c/include/u8c/arr.d/arr +++ /dev/null @@ -1,183 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_zQ92KNf0pxkz48g2) -#define u8c_key_zQ92KNf0pxkz48g2 - -#include <algorithm> /* std::copy, std::fill */ -#include <cstdlib> /* std::abort */ -#include <iostream> /* std::cerr, std::endl */ -#include <stdexcept> /* std::invalid_argument, std::out_of_range */ -#include <type_traits> /* std::is_constant_evaluated */ - -template<typename T> constexpr auto u8c::arr<T>::alloc(u8c::size const _num) -> void { - if (this->isstatic()) [[unlikely]] { - this->_isstatic = false; - } - else { - ::delete[] this->_ptr; - } - this->_ptr = ::new T[_num](); - this->_sz = _num; -} -template<typename T> template<std::convertible_to<T> T0> constexpr auto u8c::arr<T>::alloc(u8c::size const _num,T0 const _val) -> void { - this->alloc(_num); - this->fill(this->begin(),this->end(),_val); -} -template<typename T> constexpr auto u8c::arr<T>::app(u8c::arr<T> const & _oth) -> u8c::arr<T> const & { - this->realloc(this->sz() + _oth.sz()); - std::copy(_oth.begin(),_oth.end(),this->begin() + this->sz() - _oth.sz()); - return *this; -} -template<typename T> template<std::convertible_to<T> T0> constexpr auto u8c::arr<T>::app(T0 const _val) -> u8c::arr<T> const & { - this->realloc(this->sz() + 0x1uz); - (*this)[this->sz() - 0x1uz] = static_cast<T>(_val); - return *this; -} -template<typename T> constexpr u8c::arr<T>::arr(T const * const _begin,T const * const _end) { - this->set(_begin,_end); -} -template<typename T> constexpr u8c::arr<T>::arr(u8c::size const _num) { - this->alloc(_num); -} -template<typename T> constexpr u8c::arr<T>::arr(u8c::arr<T> const & _oth) { - this->set(_oth); -} -template<typename T> template<u8c::size N> constexpr u8c::arr<T>::arr(T const (&_arr)[N]) noexcept { - this->set(_arr); -} -template<typename T> template<std::convertible_to<T> T0> constexpr u8c::arr<T>::arr(u8c::size const _num,T0 const _val) { - this->alloc(_num,_val); -} -template<typename T> constexpr auto u8c::arr<T>::begin() const noexcept -> T * { - return this->_ptr; -} -template<typename T> constexpr auto u8c::arr<T>::end() const noexcept -> T * { - return this->begin() + this->_sz; -} -template<typename T> template<std::convertible_to<T> T0> constexpr auto u8c::arr<T>::fill(T0 const _val) -> void { - this->fill(this->begin(),this->end(),_val); -} -template<typename T> template<std::convertible_to<T> T0> constexpr auto u8c::arr<T>::fill(T * const u8c_restr _begin,T * const u8c_restr _end,T0 const _val) -> void { - if (this->sz() == 0x0uz) [[unlikely]] { - return; /* slime incident */ - } - if constexpr (u8c::dbg) { - if (_begin < this->begin() || _end > this->end()) [[unlikely]] { - throw std::out_of_range("Beginning or end are out of this array's range."); - } - } - if (this->isstatic()) [[unlikely]] { - this->alloc(static_cast<u8c::size>(_end - _begin)); - } - std::fill(this->begin(),this->end(),static_cast<T>(_val)); -} -template<typename T> constexpr auto u8c::arr<T>::isstatic() const noexcept -> bool { - return this->_isstatic; -} -template<typename T> constexpr auto u8c::arr<T>::log(bool const _val) noexcept -> void { - this->_log = _val; -} -template<typename T> constexpr auto u8c::arr<T>::operator = (u8c::arr<T> const & _oth) -> u8c::arr<T> const & { - this->set(_oth); - return *this; -} -template<typename T> constexpr auto u8c::arr<T>::operator [] (u8c::size const _pos) const noexcept -> T & { - if constexpr (u8c::dbg) { - if (_pos >= this->sz()) [[unlikely]] { - std::cerr << "u8c :: " << __func__ << " :: Input parameter is out of range." << std::endl; - std::abort(); - } - } - return this->begin()[_pos]; -} -template<typename T> constexpr auto u8c::arr<T>::realloc(u8c::size const _num) -> void { - if (this->sz() == 0x0uz) [[unlikely]] { - return this->alloc(_num); - } - if (this->isstatic()) [[unlikely]] { - this->_isstatic = false; - } - this->_ptr = u8c::renew(this->begin(),this->sz(),_num); - this->_sz = _num; -} -template<typename T> constexpr auto u8c::arr<T>::set(T const * const u8c_restr _begin,T const * const u8c_restr _end) -> void { - if constexpr (u8c::dbg) { - if (_begin == nullptr || _end == nullptr) [[unlikely]] { - throw std::invalid_argument("Provided parameter has value of nullptr."); - } - } - this->alloc(static_cast<u8c::size>(_end - _begin + 0x1uz)); - std::copy(_begin,_end,this->begin()); -} -template<typename T> constexpr auto u8c::arr<T>::set(u8c::arr<T> const & _oth) -> void { - this->set(_oth.begin(),_oth.end()); -} -template<typename T> template<u8c::size N> constexpr auto u8c::arr<T>::set(T const (&_arr)[N]) noexcept -> void { - this->~arr(); - this->_isstatic = true; - this->_ptr = _arr; - this->_sz = N; -} -template<typename T> template<std::convertible_to<T> T0> constexpr auto u8c::arr<T>::set(T0 const _val) -> void { - this->alloc(0x1uz); - *this->begin() = _val; -} -template<typename T> constexpr auto u8c::arr<T>::sub(T const * const u8c_restr _begin,T const * const u8c_restr _end) const -> u8c::arr<T> { - if (this->_log) [[unlikely]] { - std::cerr << "u8c :: Generating subarray" << std::endl; - } - if constexpr (u8c::dbg) { - if (_begin < this->begin() || _end > this->end()) [[unlikely]] { - if (this->_log) [[unlikely]] { - std::cerr << "u8c :: Input out of range" << std::endl; - } - throw std::out_of_range("Beginning or end are out of this array's range."); - } - } - u8c::size const sz = static_cast<u8c::size>(_end - _begin) + 0x1uz; - u8c::arr<T> arr; - if (this->isstatic()) [[unlikely]] { - arr._sz = sz; - arr._ptr = _begin; - } - else { - arr.alloc(sz); - std::copy(_begin,_end,arr.begin()); - } - return arr; -} -template<typename T> constexpr auto u8c::arr<T>::sz() const noexcept -> u8c::size { - return this->_sz; -} -template<typename T> constexpr u8c::arr<T>::~arr<T>() noexcept { - if (this->isstatic()) { - if (this->_log) [[unlikely]] { - std::cerr << "u8c :: Destroying static array" << std::endl; - } - return; - } - if (this->_log) [[unlikely]] { - std::cerr << "u8c :: Destroying dynamic array" << std::endl; - std::cerr << "u8c :: Deallocating" << std::endl; - } - ::delete[] this->_ptr; -} - -#endif diff --git a/u8c/include/u8c/character.h b/u8c/include/u8c/character.h new file mode 100644 index 0000000..678ea58 --- /dev/null +++ b/u8c/include/u8c/character.h @@ -0,0 +1,60 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#ifndef u8c_HEADER_CHARACTER +#define u8c_HEADER_CHARACTER + +#include <u8c/u8c.h> + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +// U+0001F676 SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT +#define u8c_MAXIMUM_NAME_LENGTH ((size_t)+0x3Cu) + +u8c_NO_DISCARD u8c_NO_THROW u8c_UNSEQUENCED bool u8c_is_surrogate(uint_least32_t _code_point); + +u8c_NO_DISCARD u8c_NO_THROW u8c_UNSEQUENCED bool u8c_is_control(uint_least32_t _code_point); + +u8c_NO_DISCARD u8c_NO_THROW u8c_UNSEQUENCED bool u8c_is_punctuation(uint_least32_t _code_point); +u8c_NO_DISCARD u8c_NO_THROW u8c_UNSEQUENCED bool u8c_is_whitespace( uint_least32_t _code_point); + +u8c_NO_DISCARD u8c_NO_THROW u8c_UNSEQUENCED bool u8c_is_numeric( uint_least32_t _code_point); +u8c_NO_DISCARD u8c_NO_THROW u8c_UNSEQUENCED bool u8c_is_hexadecimal_numeric(uint_least32_t _code_point); + +u8c_NO_DISCARD u8c_NO_THROW u8c_UNSEQUENCED bool u8c_is_alphabetic(uint_least32_t _code_point); +u8c_NO_DISCARD u8c_NO_THROW u8c_UNSEQUENCED bool u8c_is_majuscule( uint_least32_t _code_point); +u8c_NO_DISCARD u8c_NO_THROW u8c_UNSEQUENCED bool u8c_is_minuscule( uint_least32_t _code_point); + +u8c_NO_THROW size_t u8c_unicode_name(char* u8c_RESTRICT _buffer, uint_least32_t _code_point); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/u8c/include/u8c/cstr b/u8c/include/u8c/cstr deleted file mode 100644 index f8f5184..0000000 --- a/u8c/include/u8c/cstr +++ /dev/null @@ -1,37 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_gM1GPEGwZN8BgcwU) -#define u8c_key_gM1GPEGwZN8BgcwU - -#include <u8c/misc> - -namespace u8c { - constexpr auto cstrcmp(char const * lstr,char const * rstr) noexcept -> u8c::byte; - constexpr auto cstrcpy(char * dest,char const * src) noexcept -> char *; - constexpr auto cstrdup(char const * str) -> char *; - constexpr auto cstrlen(char const * str) noexcept -> u8c::size; -} - -#include <u8c/cstr.d/cstrcmp> -#include <u8c/cstr.d/cstrcpy> -#include <u8c/cstr.d/cstrdup> -#include <u8c/cstr.d/cstrlen> - -#endif diff --git a/u8c/include/u8c/cstr.d/cstrcmp b/u8c/include/u8c/cstr.d/cstrcmp deleted file mode 100644 index 9615ad2..0000000 --- a/u8c/include/u8c/cstr.d/cstrcmp +++ /dev/null @@ -1,43 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_hQ3g8CRKOedpjvM7) -#define u8c_key_hQ3g8CRKOedpjvM7 - - -#include <algorithm> /* std::min */ - -constexpr auto u8c::cstrcmp(char const * const u8c_restr _lstr,char const * const u8c_restr _rstr) noexcept -> u8c::byte { - auto const maxn = std::min(u8c::cstrlen(_lstr),u8c::cstrlen(_rstr)); - for (auto n = 0x0uz;n <= maxn;n += 0x1uz) { - auto const lchr = _lstr[n]; - auto const rchr = _rstr[n]; - if (lchr != rchr) [[unlikely]] { - if (lchr > rchr) { - return u8c_bytec(-0x1); - } - if (lchr < rchr) { - return u8c_bytec(0x1); - } - } - } - return u8c_bytec(0x0); -} - -#endif diff --git a/u8c/include/u8c/cstr.d/cstrcpy b/u8c/include/u8c/cstr.d/cstrcpy deleted file mode 100644 index 37cad96..0000000 --- a/u8c/include/u8c/cstr.d/cstrcpy +++ /dev/null @@ -1,34 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_MvAfxuZelp52mHk5) -#define u8c_key_MvAfxuZelp52mHk5 - -#include <algorithm> /* std::copy */ - -constexpr auto u8c::cstrcpy(char * const u8c_restr _dest,char const * const u8c_restr _src) noexcept -> char * { - auto const sz = u8c::cstrlen(_src); - if (sz == 0x0uz) [[unlikely]] { - return _dest; - } - std::copy(_src,_src + sz - 0x1uz,_dest); - return _dest; -} - -#endif diff --git a/u8c/include/u8c/cstr.d/cstrdup b/u8c/include/u8c/cstr.d/cstrdup deleted file mode 100644 index d37f03b..0000000 --- a/u8c/include/u8c/cstr.d/cstrdup +++ /dev/null @@ -1,29 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_vf9vDNmIndanmgPg) -#define u8c_key_vf9vDNmIndanmgPg - -constexpr auto u8c::cstrdup(char const * const u8c_restr _str1) -> char * { - auto const sz = u8c::cstrlen(_str1); - auto * const u8c_restr str = ::new char[sz]; - return u8c::cstrcpy(str,_str1);; -} - -#endif diff --git a/u8c/include/u8c/cstr.d/cstrlen b/u8c/include/u8c/cstr.d/cstrlen deleted file mode 100644 index 2f7cb5a..0000000 --- a/u8c/include/u8c/cstr.d/cstrlen +++ /dev/null @@ -1,34 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_2yXSG12yvwzu2XCt) -#define u8c_key_2yXSG12yvwzu2XCt - -constexpr auto u8c::cstrlen(char const * const u8c_restr _str) noexcept -> u8c::size { - auto sz = 0x0uz; - for (u8c::size n = 0x0uz;;n += 0x1uz) { - if (_str[n] == '\u0000') [[unlikely]] { - break; - } - sz += 0x1uz; - } - return sz; -} - -#endif diff --git a/u8c/include/u8c/format.h b/u8c/include/u8c/format.h new file mode 100644 index 0000000..60a85e9 --- /dev/null +++ b/u8c/include/u8c/format.h @@ -0,0 +1,52 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#ifndef u8c_HEADER_FORMAT +#define u8c_HEADER_FORMAT + +#include <u8c/u8c.h> + +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +u8c_NO_DISCARD u8c_NO_THROW size_t u8c_encode_utf8_length(uint_least32_t const* u8c_RESTRICT _source, size_t _count); +u8c_NO_DISCARD u8c_NO_THROW size_t u8c_decode_utf8_length(char const* u8c_RESTRICT _source, size_t _count); + +u8c_NO_DISCARD u8c_NO_THROW size_t u8c_encode_utf16_length(uint_least32_t const* u8c_RESTRICT _source, size_t _count); +u8c_NO_DISCARD u8c_NO_THROW size_t u8c_decode_utf16_length(uint_least16_t const* u8c_RESTRICT _source, size_t _count); + +u8c_NO_THROW size_t u8c_encode_utf8(char* u8c_RESTRICT _buffer, uint_least32_t const* u8c_RESTRICT _source, size_t _count); +u8c_NO_THROW size_t u8c_decode_utf8(uint_least32_t* u8c_RESTRICT _buffer, char const* u8c_RESTRICT _source, size_t _count); + +u8c_NO_THROW size_t u8c_encode_utf16(uint_least16_t* u8c_RESTRICT _buffer, uint_least32_t const* u8c_RESTRICT _source, size_t _count); +u8c_NO_THROW size_t u8c_decode_utf16(uint_least32_t* u8c_RESTRICT _buffer, uint_least16_t const* u8c_RESTRICT _source, size_t _count); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/u8c/include/u8c/impl b/u8c/include/u8c/impl deleted file mode 100644 index af6a2c4..0000000 --- a/u8c/include/u8c/impl +++ /dev/null @@ -1,33 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_9y1ZpGLV5Chmuy9U) -#define u8c_key_9y1ZpGLV5Chmuy9U - -#include <u8c/cstr> - -namespace u8c { - [[nodiscard]] consteval auto isarch(char const * arch) noexcept -> bool; - [[nodiscard]] consteval auto isos( char const * os) noexcept -> bool; -} - -#include <u8c/impl.d/isarch> -#include <u8c/impl.d/isos> - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/impl.d/isarch b/u8c/include/u8c/impl.d/isarch deleted file mode 100644 index 33b0faa..0000000 --- a/u8c/include/u8c/impl.d/isarch +++ /dev/null @@ -1,90 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_HeaDXGqHoIcCdWWR) -#define u8c_key_HeaDXGqHoIcCdWWR - -consteval auto u8c::isarch([[maybe_unused]] char const * const u8c_restr _arch) noexcept -> bool { - if (!u8c::cstrcmp(_arch,"alpha")) { -#if defined(__alpha) || defined(__alpha__) || defined(_M_ALPHA) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_arch,"arm")) { -#if defined(__TARGET_ARCH_ARM) || defined(__TARGET_ARCH_THUMB) || defined(__arm) || defined(__arm__) || defined(__thumb__) || defined(_M_ARM) || defined(_M_ARMT) - return true; -#else - return false; - } -#endif - if (!u8c::cstrcmp(_arch,"itanium")) { -#if defined(__IA64__) || defined(__ia64__) || defined(__itanium__) || defined(_IA64) || defined(_M_IA64) - return true; -#else - return false; - } -#endif - if (!u8c::cstrcmp(_arch,"m68k")) { -#if defined(__MC68K__) || defined(__m68k__) || defined(M68000) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_arch,"mips")) { -#if defined(__MIPS__) || defined(__mips) || defined(__mips__) || defined(mips) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_arch,"pa-risc")) { -#if defined(__HPPA__) || defined(__hppa) || defined(__hppa__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_arch,"ppc")) { -#if defined(__POWERPC__) || defined(__ppc) || defined(__PPC__) || defined(__powerpc) || defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(_M_PPC) || defined(_XENON) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_arch,"sparc")) { -#if defined(__sparc) || defined(__sparc__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_arch,"x86")) { -#if defined(__386) || defined(__I86__) || defined(__IA32__) || defined(__INTEL__) || defined(__THW_INTEL__) || defined(__X86__) || defined(__i386) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_I86) || defined(_M_IX86) || defined(_X86_) ||defined(i386) - return true; -#else - return false; -#endif - } - return false; -} - -#endif diff --git a/u8c/include/u8c/impl.d/isos b/u8c/include/u8c/impl.d/isos deleted file mode 100644 index 06a6934..0000000 --- a/u8c/include/u8c/impl.d/isos +++ /dev/null @@ -1,258 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_ACd4FIGZ23h2QNrU) -#define u8c_key_ACd4FIGZ23h2QNrU - -#if __has_include(<sys/param.h>) -#include <sys/param.h> /* BSD */ -#endif -#if __has_include(<unistd.h>) -#include <unistd.h> /* _POSIX_VERSION */ -#endif - -consteval auto u8c::isos(char const * const u8c_restr _os) noexcept -> bool { - if (!u8c::cstrcmp(_os,"aix")) { -#if defined(__TOS_AIX__) || defined(_AIX) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"amigaos")) { -#if defined(__amigaos__) || defined(AMIGA) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"android")) { -#if defined(__ANDROID__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"beos")) { -#if defined(__BEOS__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"bluegene")) { -#if defined(__THW_BLUEGENE__) || defined(__TOS_BGQ__) || defined(__bg__) || defined(__bgq__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"bsd")) { -#if defined(_SYSTYPE_BSD) || defined(BSD) - return true; -#else - return u8c::isos("bsdos") || u8c::isos("dragonflybsd") || u8c::isos("freebsd") || u8c::isos("netbsd") || u8c::isos("openbsd"); -#endif - } - if (!u8c::cstrcmp(_os,"bsdos")) { -#if defined(__bsdi__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"dragonflybsd")) { -#if defined(__DragonFly__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"freebsd")) { -#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"hpux")) { -#if defined(__hpux) || defined(_hpux) || defined(hpux) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"hurd")) { -#if defined(__GNU__) || defined(__gnu_hurd__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"ibmi")) { -#if defined(__OS400__) || defined(__OS400_TGTVRM__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"integrity")) { -#if defined(__INTEGRITY) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"interix")) { -#if defined(__INTERIX) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"irix")) { -#if defined(__sgi) || defined(sgi) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"linux")) { -#if defined(__linux) || defined(__linux__) || defined(linux) || defined(u8c_os_android) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"mac")) { -#if (defined(__APPLE__) && defined(__MACH__)) || defined(Macintosh) || defined(macintosh) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"minix")) { -#if defined(__minix) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"msdos")) { -#if defined(__DOS__) || defined(__MSDOS__) || defined(_MSDOS) || defined(MSDOS) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"netbsd")) { -#if defined(__NetBSD__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"nonstop")) { -#if defined(__TANDEM) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"openbsd")) { -#if defined(__OpenBSD__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"os2")) { -#if defined(__OS2__) || defined(__TOS_OS2__) || defined(_OS2) || defined(OS2) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"palmos")) { -#if defined(__palmos__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"qnx")) { -#if defined(__QNX__) || defined(__QNXNTO__) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"solaris")) { -#if (defined(__SVR4) || defined(__svr4__) || defined(__sysv__) || defined(_SYSTYPE_SVR4)) && (defined(__sun) || defined(sun)) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"sun")) { -#if defined(__sun) || defined(sun) - return !u8c::isos("solaris"); -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"unicos")) { -#if defined(_UNICOS) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"unix")) { -#if defined(__unix) || defined(__unix__) || defined(_POSIX_VERSION) || defined(u8c_os_aix) || defined(u8c_os_android)|| defined(u8c_os_bsd) || defined(u8c_os_hpux) || defined(u8c_os_hurd) || defined(u8c_os_linux) || defined(u8c_os_mac) || defined(u8c_os_minix) || defined(u8c_os_solaris) || defined(u8c_os_sun) || defined(u8c_os_unicos) || defined(u8c_os_unixware) || defined(unix) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"unixware")) { -#if defined(sco) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"win")) { -#if defined(__TOS_WIN__) || defined(__WIN32__) || defined(__WINDOWS__) || defined(_WIN16) || defined(_WIN32) || defined(_WIN32_CE) || defined(_WIN64) - return true; -#else - return false; -#endif - } - if (!u8c::cstrcmp(_os,"zos")) { -#if defined(__HOS_MVS__) || defined(__MVS__) || defined(__TOS_MVS__) - return true; -#else - return false; -#endif - } - return false; -} - -#endif diff --git a/u8c/include/u8c/math b/u8c/include/u8c/math deleted file mode 100644 index d02a827..0000000 --- a/u8c/include/u8c/math +++ /dev/null @@ -1,78 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_vm8mXaTP2bcUVL49) -#define u8c_key_vm8mXaTP2bcUVL49 - -#include <compare> /* std::partial_ordering */ -#include <concepts> /* std::convertible_to, std::floating_point, std::integral, std::signed_integral */ -#include <type_traits> /* std::is_arithmetic_v, std::is_same_v */ -#include <u8c/impl> - -namespace u8c { - template<std::signed_integral T = int> class quota { - public: - [[nodiscard]] constexpr static auto inf() noexcept -> u8c::quota<T>; - [[nodiscard]] constexpr auto isinf() const noexcept -> bool; - [[nodiscard]] constexpr auto isnan() const noexcept -> bool; - [[nodiscard]] constexpr auto lower() const noexcept -> T; - [[nodiscard]] constexpr static auto nan() noexcept -> u8c::quota<T>; - [[nodiscard]] constexpr auto upper() const noexcept -> T; - template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator + (u8c::quota<T0> const & oth) const noexcept -> u8c::quota<T>; - template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator - (u8c::quota<T0> const & oth) const noexcept -> u8c::quota<T>; - template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator <=> (u8c::quota<T0> const & oth) const noexcept -> std::partial_ordering; - template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator = ( u8c::quota<T0> const & oth) const noexcept -> bool; - template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator == ( u8c::quota<T0> const & oth) const noexcept -> bool; - template<std::integral T0> [[nodiscard]] constexpr operator T0 () const noexcept; - [[nodiscard]] constexpr quota() noexcept = default; - template<std::convertible_to<T> T0> [[nodiscard]] constexpr quota( T0 val) noexcept; - template<std::convertible_to<T> T0> [[nodiscard]] constexpr quota( u8c::quota<T0> const & oth) noexcept; - template<std::signed_integral T0> [[nodiscard]] constexpr quota( T0 upper,T0 lower) noexcept; - constexpr ~quota() noexcept = default; - private: - u8c::ubyte _flags = false; - T _lower = T{0x0}; - T _upper = T{0x0}; - - }; - template<typename T,typename T0 = void> concept arith = std::is_arithmetic_v<T> || std::is_same_v<T,u8c::quota<T0>>; - template<typename T> [[u8c_attr_const]] constexpr auto abs( u8c::quota<T> val) noexcept -> u8c::quota<T>; - template<u8c::arith T> [[u8c_attr_const]] constexpr auto abs( T val) noexcept -> T; - template<typename T> [[u8c_attr_const]] constexpr auto fma( u8c::quota<T> x, u8c::quota<T> y, u8c::quota<T> z) noexcept -> u8c::quota<T>; - template<u8c::arith T> [[u8c_attr_const]] constexpr auto fma( T x, T y, T z) noexcept -> T; - template<u8c::arith T> [[u8c_attr_const]] constexpr auto isinf( T val) noexcept -> bool; - template<u8c::arith T> [[u8c_attr_const]] constexpr auto isnan( T val) noexcept -> bool; - template<typename T> [[u8c_attr_const]] constexpr auto isprime(u8c::quota<T> val) noexcept -> bool; - template<u8c::arith T> [[u8c_attr_const]] constexpr auto isprime(T val) noexcept -> bool; - template<typename T> [[u8c_attr_const]] constexpr auto pow( u8c::quota<T> base,u8c::quota<T> exp) noexcept -> u8c::quota<T>; - template<u8c::arith T> [[u8c_attr_const]] constexpr auto pow( T base,T exp) noexcept -> T; - template<typename T> [[u8c_attr_const]] constexpr auto sqrt( u8c::quota<T> val) noexcept -> u8c::quota<T>; - template<u8c::arith T> [[u8c_attr_const]] constexpr auto sqrt( T val) noexcept -> T; -} - -#include <u8c/math.d/abs> -#include <u8c/math.d/fma> -#include <u8c/math.d/isinf> -#include <u8c/math.d/isnan> -#include <u8c/math.d/isprime> -#include <u8c/math.d/pow> -#include <u8c/math.d/quota> -#include <u8c/math.d/sqrt> - -#endif diff --git a/u8c/include/u8c/math.d/abs b/u8c/include/u8c/math.d/abs deleted file mode 100644 index 53eaba3..0000000 --- a/u8c/include/u8c/math.d/abs +++ /dev/null @@ -1,37 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_DHlzI0Min44ZJ3qF) -#define u8c_key_DHlzI0Min44ZJ3qF - -#include <type_traits> /* std::is_unsigned_v */ - -template<u8c::arith T> constexpr auto u8c::abs(T const _val) noexcept -> T { - if constexpr (std::is_unsigned_v<T>) { - return _val; - } - else { - if (_val < T{0x0}) { - return -_val; - } - return _val; - } -} - -#endif diff --git a/u8c/include/u8c/math.d/fma b/u8c/include/u8c/math.d/fma deleted file mode 100644 index e2b756d..0000000 --- a/u8c/include/u8c/math.d/fma +++ /dev/null @@ -1,27 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_3Xt4uPu34bdh53dB) -#define u8c_key_3Xt4uPu34bdh53dB - -template<u8c::arith T> constexpr auto u8c::fma(T const _x,T const _y,T const _z) noexcept -> T { - return _x * _y + _z; -} - -#endif diff --git a/u8c/include/u8c/math.d/isinf b/u8c/include/u8c/math.d/isinf deleted file mode 100644 index e77793a..0000000 --- a/u8c/include/u8c/math.d/isinf +++ /dev/null @@ -1,32 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_aSzgnLkMAeJF6xFF) -#define u8c_key_aSzgnLkMAeJF6xFF - -#include <limits> /* std::numeric_limits */ - -template<u8c::arith T> constexpr auto u8c::isinf(T const _val) noexcept -> bool { - if (std::numeric_limits<T>::has_infinity) { - return _val == std::numeric_limits<T>::infinity(); - } - return false; -} - -#endif diff --git a/u8c/include/u8c/math.d/isnan b/u8c/include/u8c/math.d/isnan deleted file mode 100644 index 9e90d12..0000000 --- a/u8c/include/u8c/math.d/isnan +++ /dev/null @@ -1,62 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_0RXxS4xdwMkbkEi6) -#define u8c_key_0RXxS4xdwMkbkEi6 - -#include <limits> /* std::numeric_limits */ - -template<u8c::arith T> constexpr auto u8c::isnan(T const _val) noexcept -> bool { - if constexpr (std::numeric_limits<T>::has_quiet_NaN) { - if constexpr (std::numeric_limits<T>::is_iec559) { - return _val != _val; - } - else { -#if defined(__cpp_if_consteval) - if consteval { - return _val != _val; - } - else { - u8c::ubyte * const u8c_restr nanval = nullptr; - u8c::ubyte * const u8c_restr valval = nullptr; - { - auto const tmp = std::numeric_limits<T>::quiet_NaN(); - nanval = reinterpret_cast<unsigned char *>(&tmp); - } - { - auto const tmp = _val; - valval = reinterpret_cast<unsigned char *>(&tmp); - } - for (std::size_t n = 0x0uz;n < sizeof(T);n += 0x1uz) { - if (valval[n] != nanval[n]) { - return false; - } - } - } -#else - return _val != _val; -#endif - } - } - else { - return false; - } -} - -#endif diff --git a/u8c/include/u8c/math.d/isprime b/u8c/include/u8c/math.d/isprime deleted file mode 100644 index 5074d4b..0000000 --- a/u8c/include/u8c/math.d/isprime +++ /dev/null @@ -1,35 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_rrAoPS9LdRWHCbsB) -#define u8c_key_rrAoPS9LdRWHCbsB - -template<u8c::arith T> constexpr auto u8c::isprime(T const _val) noexcept -> bool { - if (_val <= T{0x1}) [[unlikely]] { - return false; - } - for (T iter = T{0x2};iter < _val / T{0x2} + T{0x1};iter += T{0x1}) { - if (_val % iter == T{0x0}) [[unlikely]] { - return false; - } - } - return true; -} - -#endif diff --git a/u8c/include/u8c/math.d/pow b/u8c/include/u8c/math.d/pow deleted file mode 100644 index 53410b8..0000000 --- a/u8c/include/u8c/math.d/pow +++ /dev/null @@ -1,40 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_JD8l4B23bwAqQ2HP) -#define u8c_key_JD8l4B23bwAqQ2HP - -#include <type_traits> /* std::is_floating_point_v */ - -template<u8c::arith T> constexpr auto u8c::pow(T const _base,T const _exp) noexcept -> T { - //if constexpr (std::is_floating_point_v<T>) { - //} - //else { - if (u8c::abs(_base) <= T{0x1}) [[unlikely]] { - return _base; - } - T res = _base; - for (T iter = T{0x1};iter < _exp;iter += T{0x1}) { - res *= _base; - } - return res; - //} -} - -#endif diff --git a/u8c/include/u8c/math.d/quota b/u8c/include/u8c/math.d/quota deleted file mode 100644 index 4e3ecf8..0000000 --- a/u8c/include/u8c/math.d/quota +++ /dev/null @@ -1,94 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_k92caE4RpzrErnKD) -#define u8c_key_k92caE4RpzrErnKD - -#include <compare> /* std::partial_ordering */ -#include <concepts> /* std:convertible_to, std::integral, std::signed_integral */ - -template<std::signed_integral T> constexpr auto u8c::quota<T>::inf() noexcept -> u8c::quota<T> { - auto tmp = u8c::quota<T>(); - tmp._flags = u8c_ubytec(0b10); - return tmp; -} -template<std::signed_integral T> constexpr auto u8c::quota<T>::isinf() const noexcept -> bool { - return (this->_flags & u8c_ubytec(0b10)) == u8c_ubytec(0b10); -} -template<std::signed_integral T> constexpr auto u8c::quota<T>::isnan() const noexcept -> bool { - return (this->_flags & u8c_ubytec(0b1)) == u8c_ubytec(0b1); -} -template<std::signed_integral T> constexpr auto u8c::quota<T>::lower() const noexcept -> T { - return this->_lower; -} -template<std::signed_integral T> constexpr auto u8c::quota<T>::nan() noexcept -> u8c::quota<T> { - auto tmp = u8c::quota<T>(); - tmp._flags = u8c_ubytec(0b1); - return tmp; -} -template<std::signed_integral T> constexpr auto u8c::quota<T>::upper() const noexcept -> T { - return this->_upper; -} -template<std::signed_integral T> template<std::convertible_to<T> T0> constexpr auto u8c::quota<T>::operator <=> (u8c::quota<T0> const & _oth) const noexcept -> std::partial_ordering { - if (this->isinf()) { - return std::partial_ordering::greater; - } - if (_oth.isinf()) { - return std::partial_ordering::less; - } - if (this->isnan() || _oth.isnan()) [[unlikely]] { - return std::partial_ordering::unordered; - } - auto const tmp0 = this->upper() * _oth.lower(); - auto const tmp1 = _oth.upper() * this->lower(); - return tmp0 <=> tmp1; -} -template<std::signed_integral T> template<std::convertible_to<T> T0> constexpr auto u8c::quota<T>::operator == (u8c::quota<T0> const & _oth) const noexcept -> bool { - if (this->isinf() && _oth.isinf()) [[unlikely]] { - return true; - } - if (this->isnan() || _oth.isnan()) [[unlikely]] { - return false; - } - if (this->_flags != _oth._flags) [[unlikely]] { - return false; - } - return this->upper() * _oth.lower() == _oth.upper() * this->lower(); -} -template<std::signed_integral T> template<std::integral T0> constexpr u8c::quota<T>::operator T0 () const noexcept { - return static_cast<T0>(this->_upper / this->_lower); -} -template<std::signed_integral T> template<std::convertible_to<T> T0> constexpr u8c::quota<T>::quota(T0 const _val) noexcept { - if (u8c::isnan(_val)) [[unlikely]] { - this->_flags |= u8c_ubytec(0b1); - } - else if (u8c::isinf(_val)) [[unlikely]] { - this->_flags |= u8c_ubytec(0b10); - } - else { - this->_upper = T{_val}; - this->_lower = T{0x1}; - } -} -template<std::signed_integral T> template<std::signed_integral T0> constexpr u8c::quota<T>::quota(T0 const _upper,T0 const _lower) noexcept { - this->_upper = _upper; - this->_lower = _lower; -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/math.d/sqrt b/u8c/include/u8c/math.d/sqrt deleted file mode 100644 index d1eb5e1..0000000 --- a/u8c/include/u8c/math.d/sqrt +++ /dev/null @@ -1,42 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_GPVreQYtljZ3JsPu) -#define u8c_key_GPVreQYtljZ3JsPu - -#include <limits> /* std::numeric_limits */ -#include <type_traits> /* std::is_integral_v */ - -template<u8c::arith T> constexpr auto u8c::sqrt(T const _val) noexcept -> T { - if (_val < T{0x0}) [[unlikely]] { - return std::numeric_limits<T>::quiet_NaN(); - } - if (_val == T{0x0}) [[unlikely]] { - return _val; - } - /*constexpr auto err = []() { - if constexpr(std::is_integral_v<T>) { - return T{0x1}; - } - return std::numeric_limits<T>::epsilon(); - }();*/ - return _val; -} - -#endif diff --git a/u8c/include/u8c/misc b/u8c/include/u8c/misc deleted file mode 100644 index f7deca1..0000000 --- a/u8c/include/u8c/misc +++ /dev/null @@ -1,132 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_4grZQR1RdsRJL14e) -#define u8c_key_4grZQR1RdsRJL14e - -#include <climits> /* CHAR_BIT */ -#include <cstdint> /* std::int_least16_t, std::int_least32_t, std::int_least64_t, std::int_least8_t, std::intmax_t, std::uint_least16_t, std::uint_least32_t, std::uint_least64_t, std::uint_least8_t, std::uintmax_t */ -#include <cstdlib> /* std::abort */ -#include <iostream> /* std::cerr, std::endl */ - -#define u8c_assert(expr) \ - if constexpr (!u8c::dbg) { \ - /*if consteval { \ - static_assert(expr); \ - } \ - else*/ if (!(expr)) [[unlikely]] { \ - std::cerr << "u8c :: \"" << __FILE__ "\" @ " << __LINE__ << ": Assertion failed: expression \"" << #expr << "\" evaluates to false." << std::endl; \ - std::abort(); \ - } \ - } \ - -#if defined(__GNUC__) || defined(__clang__) -#define u8c_attr_abitag(...) gnu::abi_tag(__VA_ARGS__) -#define u8c_attr_allocsz(...) gnu::alloc_size(__VA_ARGS__) -#define u8c_attr_artif gnu::artificial -#define u8c_attr_cold gnu::cold -#define u8c_attr_const gnu::const -#define u8c_attr_fmt gnu::format -#define u8c_attr_malloc gnu::malloc -#define u8c_attr_nonnull(...) gnu::nonnull(__VA_ARGS__) -#define u8c_attr_hot gnu::hot -#define u8c_attr_inline gnu::always_inline -#define u8c_attr_pure gnu::pure -#define u8c_attr_retnonnull gnu::returns_nonnull -#define u8c_attr_sect gnu::section -#define u8c_attr_used gnu::used -#else -#define u8c_attr_abitag(...) -#define u8c_attr_allocsz(...) -#define u8c_attr_artif -#define u8c_attr_cold -#define u8c_attr_const -#define u8c_attr_fmt -#define u8c_attr_hot -#define u8c_attr_inline -#define u8c_attr_malloc -#define u8c_attr_nonnull(...) -#define u8c_attr_pure -#define u8c_attr_retnonnull -#define u8c_attr_sect -#define u8c_attr_used -#endif -#if defined(__clang__) -#define u8c_attr_noderef clang::noderef -#define u8c_attr_nodup clang::noduplicate -#define u8c_attr_noesc(...) clang::noescape(__VA_ARGS__) -#else -#define u8c_attr_noderef -#define u8c_attr_nodup -#define u8c_attr_noesc(...) -#endif - -#define u8c_bytec(expr) (static_cast<u8c::byte>(INT8_C(expr))) -#define u8c_int16c(expr) (static_cast<u8c::int16>(INT16_C(expr))) -#define u8c_int32c(expr) (static_cast<u8c::int32>(INT32_C(expr))) -#define u8c_int64c(expr) (static_cast<u8c::int64>(INT64_C(expr))) -#define u8c_intmaxc(expr) (static_cast<u8c::intmax>(INTMAX_C(expr))) -#define u8c_ubytec(expr) (static_cast<u8c::ubyte>(UINT8_C(expr))) -#define u8c_uint16c(expr) (static_cast<u8c::uint16>(UINT16_C(expr))) -#define u8c_uint32c(expr) (static_cast<u8c::uint32>(UINT32_C(expr))) -#define u8c_uint64c(expr) (static_cast<u8c::uint64>(UINT64_C(expr))) -#define u8c_uintmaxc(expr) (static_cast<u8c::uintmax>(UINTMAX_C(expr))) - -#if defined(__GNUC__) || defined(__clang__) -#define u8c_restr __restrict__ -#elif defined(__INTEL_COMPILER) || defined(_MSC_VER) -#define u8c_restr __restrict -#else -#define u8c_restr -#endif - -namespace u8c { - using byte = signed char; - using int16 = std::int_least16_t; - using int32 = std::int_least32_t; - using int64 = std::int_least64_t; - using intmax = std::intmax_t; - using size = decltype(0x0uz); - using ssize = decltype(0x0z); - using ubyte = unsigned char; - using uint16 = std::uint_least16_t; - using uint32 = std::uint_least32_t; - using uint64 = std::uint_least64_t; - using uintmax = std::uintmax_t; - template<typename T> concept utf = std::is_same_v<T,char16_t> || std::is_same_v<T,char32_t> || std::is_same_v<T,char8_t>; - enum class endi : bool { - big = true, - little = false, - }; - template<typename T> [[nodiscard,u8c_attr_allocsz(0x3),u8c_attr_malloc,u8c_attr_nonnull(0x1)]] constexpr auto renew(T * ptr,u8c::size sz,u8c::size newsz) -> T *; - template<typename T> constexpr auto renew(std::nullptr_t, u8c::size sz,u8c::size newsz) -> T * = delete; - constexpr auto bytesz = static_cast<u8c::ubyte>(CHAR_BIT); - constexpr auto dbg = -#if defined(NDEBUG) || !defined(_DEBUG) - false; -#else - true; -#endif - constexpr auto unimax = U'\U00010FFF'; - constexpr auto ver = u8c_uint64c(0x1B); -} - -#include <u8c/misc.d/renew> - -#endif diff --git a/u8c/include/u8c/misc.d/renew b/u8c/include/u8c/misc.d/renew deleted file mode 100644 index 8d67b78..0000000 --- a/u8c/include/u8c/misc.d/renew +++ /dev/null @@ -1,32 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_PDOxHgtcAGQDjPjZ) -#define u8c_key_PDOxHgtcAGQDjPjZ - -#include <algorithm> /* std::copy_n, std::min */ - -template<typename T> auto constexpr u8c::renew(T * const u8c_restr _ptr,u8c::size const _sz,u8c::size const _newsz) -> T * { - T * const u8c_restr ptr = ::new T[_newsz]; - std::copy_n(_ptr,std::min(_sz,_newsz),ptr); - ::delete[] _ptr; - return ptr; -} - -#endif diff --git a/u8c/include/u8c/str b/u8c/include/u8c/str deleted file mode 100644 index d8f5753..0000000 --- a/u8c/include/u8c/str +++ /dev/null @@ -1,55 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_ywSpAOVLrorTYrkK) -#define u8c_key_ywSpAOVLrorTYrkK - -#include <u8c/arr> -#include <u8c/utf> - -namespace u8c { - class str { - public: - constexpr auto app( u8c::str const & oth) -> u8c::str const &; - constexpr auto begin() const noexcept -> char32_t *; - constexpr auto end() const noexcept -> char32_t *; - constexpr auto operator = ( u8c::str const & oth) -> u8c::str const &; - constexpr auto operator [] (u8c::str const & oth) const noexcept -> u8c::str const &; - [[nodiscard]] constexpr str() noexcept; - [[nodiscard]] constexpr str( u8c::str const & oth); - template<u8c::utf T> [[nodiscard]] constexpr str( T chr); - template<u8c::utf T,u8c::size N> [[nodiscard]] constexpr str( T const (& strlit)[N]) noexcept; - [[nodiscard]] constexpr auto u8() const -> u8c::arr<char8_t>; - private: - u8c::arr<char32_t> _arr; - }; - template<typename T> [[nodiscard,u8c_attr_hot]] constexpr auto fmt( T fmt) -> u8c::str; - auto operator << (std::ostream & strm, u8c::str const & str) -> std::ostream &; - /*class { - public: - bool ascii = false; - u8c::ubyte base = u8c_ubytec(0xB); - u8c::endi endi = u8c::endi::little; - } inline fmtsets;*/ -} - -#include <u8c/str.d/dbgprint> -#include <u8c/str.d/str> - -#endif diff --git a/u8c/include/u8c/str.d/dbgprint b/u8c/include/u8c/str.d/dbgprint deleted file mode 100644 index 4ecb6b9..0000000 --- a/u8c/include/u8c/str.d/dbgprint +++ /dev/null @@ -1,32 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ -#if !defined(u8c_key_WQGQeKhIxLI16CtT) -#define u8c_key_WQGQeKhIxLI16CtT - -#if 0x0 - -auto u8c::dbgprint(u8c::str const _msg) -> void { - if constexpr (u8c::dbg) { - return u8c::println(stderr,_msg); - } -} - -#endif - -#endif diff --git a/u8c/include/u8c/str.d/fmt b/u8c/include/u8c/str.d/fmt deleted file mode 100644 index 3c602f0..0000000 --- a/u8c/include/u8c/str.d/fmt +++ /dev/null @@ -1,59 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_6cyujV0FoSmPeQWl) -#define u8c_key_6cyujV0FoSmPeQWl - -#if 0x0 -auto u8c::dbgprint(u8c::str const _msg) -> void { -#if defined(NDEBUG) - constexpr auto dbg = true; -#else - constexpr auto dbg = false; -#endif - if constexpr(dbg) { - return u8c::println(stderr,_msg); - } -} -auto u8c::fmt(u8c::str const _str) -> u8c::str { - return u8c::str(_str); -} -template<typename T> auto u8c::fmt(u8c::str const _str,T const _fmt) -> u8c::str { - u8c::str str; - for(auto chr : _str) { - if(chr == U'\uFFFC') [[unlikely]] { - return str + u8c::fmter::fmt(_fmt); - } - str += chr; - } - return str; -} -template<typename T,typename... TArgs> auto u8c::fmt(u8c::str const _str,T const _fmt,TArgs... _args) -> u8c::str { - u8c::str str; - for(auto chr : _str) { - if(chr == U'\uFFFC') [[unlikely]] { - return str + u8c::fmt(str,_fmt,_args...); - } - str += chr; - } - return str; -} -#endif - -#endif diff --git a/u8c/include/u8c/str.d/str b/u8c/include/u8c/str.d/str deleted file mode 100644 index f708f8a..0000000 --- a/u8c/include/u8c/str.d/str +++ /dev/null @@ -1,50 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_3zvMYqWFDYnlTEyW) -#define u8c_key_3zvMYqWFDYnlTEyW - -#include <type_traits> /* std::is_same_v */ - -constexpr auto u8c::str::begin() const noexcept -> char32_t * { - return this->_arr.begin(); -} -constexpr auto u8c::str::end() const noexcept -> char32_t * { - return this->_arr.end(); -} -constexpr auto u8c::str::operator = (u8c::str const & _oth) -> u8c::str const & { - this->_arr = _oth._arr; - return *this; -} -constexpr u8c::str::str(u8c::str const & _oth) { - *this = _oth; -} -template<u8c::utf T> constexpr u8c::str::str(T const _chr) { - if constexpr (std::is_same_v<T,char32_t>) { - this->_arr.set(_chr); - } -} -template<u8c::utf T,u8c::size N> constexpr u8c::str::str(T const (& _strlit)[N]) noexcept { - this->_arr.set(_strlit,_strlit + N); -} -constexpr auto u8c::str::u8() const -> u8c::arr<char8_t> { - return u8c::cnv<char8_t>(this->begin(),this->end()); -} - -#endif diff --git a/u8c/include/u8c/u8c b/u8c/include/u8c/u8c deleted file mode 100644 index f9591c5..0000000 --- a/u8c/include/u8c/u8c +++ /dev/null @@ -1,50 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -/* - Greater Header Dependencies: - - misc →┬─────────────────────────→┬→ u8c - ├→ arr ─→┬────────────────→┤ - │ └→ utf ─→┬───────→┤ - │ └→ str ─→┤ - └→ cstr →┬────────────────→┤ - └→ impl →┬───────→┤ - └→ math →┘ -*/ - -#if !defined(u8c_key_piDyeERQmK9By1n3) -#define u8c_key_piDyeERQmK9By1n3 - -#include <concepts> /* std::convertible_to */ -#include <ostream> /* std::ostream */ -#include <type_traits> /* std::is_same_v */ - -#include <u8c/math> -#include <u8c/str> - -namespace u8c { - [[nodiscard]] constexpr auto uniblk(char32_t chr) -> u8c::str; - [[nodiscard]] constexpr auto uninm( char32_t chr) -> u8c::str; -} - -#include <u8c/u8c.d/uniblk> -#include <u8c/u8c.d/uninm> - -#endif diff --git a/u8c/include/u8c/u8c.d/uniblk b/u8c/include/u8c/u8c.d/uniblk deleted file mode 100644 index e216833..0000000 --- a/u8c/include/u8c/u8c.d/uniblk +++ /dev/null @@ -1,497 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_Z0dBX6z5KYfITIHo) -#define u8c_key_Z0dBX6z5KYfITIHo - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::uniblk(char32_t const _chr) -> u8c::str { - if(_chr > u8c::unimax) [[unlikely]] { - throw std::domain_error("Unicode codepoint too big!"); - } - if(_chr <= U'\u007F') { - return U"BASIC LATIN"; - } - if(_chr >= U'\u0080' && _chr <= U'\u00FF') { - return U"LATIN-1 SUPPLEMENT"; - } - if(_chr >= U'\u0100' && _chr <= U'\u017F') { - return U"LATIN EXTENDED-A"; - } - if(_chr >= U'\u0180' && _chr <= U'\u024F') { - return U"LATIN EXTENDED-B"; - } - if(_chr >= U'\u0250' && _chr <= U'\u02AF') { - return U"IPA EXTENSIONS"; - } - if(_chr >= U'\u02B0' && _chr <= U'\u02FF') { - return U"SPACING MODIFIER LETTERS"; - } - if(_chr >= U'\u0300' && _chr <= U'\u036F') { - return U"COMBINING DIRACITICAL MARKS"; - } - if(_chr >= U'\u0370' && _chr <= U'\u03FF') { - return U"GREEK AND COPTIC"; - } - if(_chr >= U'\u0400' && _chr <= U'\u04FF') { - return U"CYRILLIC"; - } - if(_chr >= U'\u0500' && _chr <= U'\u052F') { - return U"CYRILLIC SUPPLEMENT"; - } - if(_chr >= U'\u0530' && _chr <= U'\u058F') { - return U"ARMENIAN"; - } - if(_chr >= U'\u0590' && _chr <= U'\u05FF') { - return U"HEBREW"; - } - if(_chr >= U'\u0600' && _chr <= U'\u06FF') { - return U"ARABIC"; - } - if(_chr >= U'\u0700' && _chr <= U'\u074F') { - return U"SYRIAC"; - } - if(_chr >= U'\u0750' && _chr <= U'\u077F') { - return U"ARABIC SUPPLEMENT"; - } - if(_chr >= U'\u0780' && _chr <= U'\u07BF') { - return U"THAANA"; - } - if(_chr >= U'\u07C0' && _chr <= U'\u07FF') { - return U"NKO"; - } - if(_chr >= U'\u0800' && _chr <= U'\u083F') { - return U"SAMARITAN"; - } - if(_chr >= U'\u0840' && _chr <= U'\u085F') { - return U"MANDAIC"; - } - if(_chr >= U'\u0860' && _chr <= U'\u086F') { - return U"SYRIAC SUPPLEMENT"; - } - if(_chr >= U'\u08A0' && _chr <= U'\u08FF') { - return U"ARABIC EXTENDED-A"; - } - if(_chr >= U'\u0900' && _chr <= U'\u097F') { - return U"DEVANAGARI"; - } - if(_chr >= U'\u0980' && _chr <= U'\u09FF') { - return U"BENGALI"; - } - if(_chr >= U'\u0A00' && _chr <= U'\u0A7F') { - return U"GURMUKHI"; - } - if(_chr >= U'\u0A80' && _chr <= U'\u0AFF') { - return U"GUJARATI"; - } - if(_chr >= U'\u0B00' && _chr <= U'\u0B7F') { - return U"ORIYAS"; - } - if(_chr >= U'\u0B80' && _chr <= U'\u0BFF') { - return U"TAMIL"; - } - if(_chr >= U'\u0C00' && _chr <= U'\u0C7F') { - return U"TELUGU"; - } - if(_chr >= U'\u0C80' && _chr <= U'\u0CFF') { - return U"KANNADA"; - } - if(_chr >= U'\u0D00' && _chr <= U'\u0D7F') { - return U"MALAYALAM"; - } - if(_chr >= U'\u0D80' && _chr <= U'\u0DFF') { - return U"SINHALA"; - } - if(_chr >= U'\u0E00' && _chr <= U'\u0E7F') { - return U"THAI"; - } - if(_chr >= U'\u0E80' && _chr <= U'\u0EFF') { - return U"LAO"; - } - if(_chr >= U'\u0F00' && _chr <= U'\u0FFF') { - return U"TIBETAN"; - } - if(_chr >= U'\u1000' && _chr <= U'\u109F') { - return U"MYANMAR"; - } - if(_chr >= U'\u10A0' && _chr <= U'\u10FF') { - return U"GEORGIAN"; - } - if(_chr >= U'\u1100' && _chr <= U'\u11FF') { - return U"HANGUL JAMO"; - } - if(_chr >= U'\u1200' && _chr <= U'\u137F') { - return U"ETHIOPIC"; - } - if(_chr >= U'\u1380' && _chr <= U'\u139F') { - return U"ETHIOPIC SUPPLEMENT"; - } - if(_chr >= U'\u13A0' && _chr <= U'\u13FF') { - return U"CHEROKEE"; - } - if(_chr >= U'\u1400' && _chr <= U'\u167F') { - return U"UNIFIED CANADIAN ABORIGINAL SYLLABICS"; - } - if(_chr >= U'\u1680' && _chr <= U'\u169F') { - return U"OGHAM"; - } - if(_chr >= U'\u16A0' && _chr <= U'\u16FF') { - return U"RUNIC"; - } - if(_chr >= U'\u1700' && _chr <= U'\u171F') { - return U"TAGALOG"; - } - if(_chr >= U'\u1720' && _chr <= U'\u173F') { - return U"HANUNOO"; - } - if(_chr >= U'\u1740' && _chr <= U'\u175F') { - return U"BUHID"; - } - if(_chr >= U'\u1760' && _chr <= U'\u177F') { - return U"TAGBANWA"; - } - if(_chr >= U'\u1700' && _chr <= U'\u17FF') { - return U"TAGALOG"; - } - if(_chr >= U'\u1780' && _chr <= U'\u171F') { - return U"KHMER"; - } - if(_chr >= U'\u1800' && _chr <= U'\u18AF') { - return U"MONGOLIAN"; - } - if(_chr >= U'\u18B0' && _chr <= U'\u18FF') { - return U"UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED"; - } - if(_chr >= U'\u1900' && _chr <= U'\u194F') { - return U"LIMBU"; - } - if(_chr >= U'\u1950' && _chr <= U'\u197F') { - return U"TAI LE"; - } - if(_chr >= U'\u1980' && _chr <= U'\u19DF') { - return U"NEW TAI LUE"; - } - if(_chr >= U'\u19E0' && _chr <= U'\u19FF') { - return U"KHMER SYMBOLS"; - } - if(_chr >= U'\u1A00' && _chr <= U'\u1A1F') { - return U"BUGINESE"; - } - if(_chr >= U'\u1A20' && _chr <= U'\u1AAF') { - return U"TAI THAM"; - } - if(_chr >= U'\u1AB0' && _chr <= U'\u1AFF') { - return U"COMBINING DIACRITICAL MARKS EXTENDED"; - } - if(_chr >= U'\u1B00' && _chr <= U'\u1B7F') { - return U"BALINESE"; - } - if(_chr >= U'\u1B80' && _chr <= U'\u1BBF') { - return U"SUNDANESE"; - } - if(_chr >= U'\u1BC0' && _chr <= U'\u1BFF') { - return U"BATAK"; - } - if(_chr >= U'\u1C00' && _chr <= U'\u1C4F') { - return U"LEPCHA"; - } - if(_chr >= U'\u1C50' && _chr <= U'\u1C7F') { - return U"OL CHIKI"; - } - if(_chr >= U'\u1C80' && _chr <= U'\u1C8F') { - return U"CYRILLIC EXTENDED C"; - } - if(_chr >= U'\u1C90' && _chr <= U'\u1CBF') { - return U"GEORGIAN EXTENDED"; - } - if(_chr >= U'\u1CC0' && _chr <= U'\u1CCF') { - return U"SUNDANESE SUPPLEMENT"; - } - if(_chr >= U'\u1CD0' && _chr <= U'\u1CFF') { - return U"VEDIC EXTENSIONS"; - } - if(_chr >= U'\u1D00' && _chr <= U'\u1D7F') { - return U"PHONETIC EXTENSIONS"; - } - if(_chr >= U'\u1D80' && _chr <= U'\u1DBF') { - return U"PHONETIC EXTENSIONS SUPPLEMENT"; - } - if(_chr >= U'\u1DC0' && _chr <= U'\u1DFF') { - return U"COMBINING DIACRITICAL MARKS SUPPLEMENT"; - } - if(_chr >= U'\u1E00' && _chr <= U'\u1EFF') { - return U"LATIN EXTENDED ADDITIONAL"; - } - if(_chr >= U'\u1F00' && _chr <= U'\u1FFF') { - return U"GREEK EXTENDED"; - } - if(_chr >= U'\u2000' && _chr <= U'\u206F') { - return U"GENERAL PUNCTUATION"; - } - if(_chr >= U'\u2070' && _chr <= U'\u209F') { - return U"SUPERSCRIPTS AND SUBSCRIPTS"; - } - if(_chr >= U'\u20A0' && _chr <= U'\u20CF') { - return U"CURRENCY SYMBOLS"; - } - if(_chr >= U'\u20D0' && _chr <= U'\u20FF') { - return U"COMBINING DIACRITICAL MARKS FOR SYMBOLS"; - } - if(_chr >= U'\u2100' && _chr <= U'\u214F') { - return U"LETTERLIKE SYMBOLS"; - } - if(_chr >= U'\u2150' && _chr <= U'\u218F') { - return U"NUMBER FORMS"; - } - if(_chr >= U'\u2190' && _chr <= U'\u21FF') { - return U"ARROWS"; - } - if(_chr >= U'\U00011A00' && _chr <= U'\U00011A4F') { - return U"ZANABAZAR SQUARE"; - } - if(_chr >= U'\U00011A50' && _chr <= U'\U00011AAF') { - return U"SOYOMBO"; - } - if(_chr >= U'\U00011AC0' && _chr <= U'\U00011AFF') { - return U"PAU CIN HAU"; - } - if(_chr >= U'\U00011C00' && _chr <= U'\U00011C6F') { - return U"BHAIKSUKI"; - } - if(_chr >= U'\U00011C70' && _chr <= U'\U00011CBF') { - return U"MARCHEN"; - } - if(_chr >= U'\U00011D00' && _chr <= U'\U00011D5F') { - return U"MASARAM GONDI"; - } - if(_chr >= U'\U00011D60' && _chr <= U'\U00011DAF') { - return U"GUNJALA GONDI"; - } - if(_chr >= U'\U00011EE0' && _chr <= U'\U00011EFF') { - return U"MAKASAR"; - } - if(_chr >= U'\U00011FB0' && _chr <= U'\U00011FBF') { - return U"LISU SUPPLEMENT"; - } - if(_chr >= U'\U00011FC0' && _chr <= U'\U00011FFF') { - return U"TAMIL SUPPLEMENT"; - } - if(_chr >= U'\U00012000' && _chr <= U'\U000123FF') { - return U"CUNEIFORM"; - } - if(_chr >= U'\U00012400' && _chr <= U'\U0001247F') { - return U"CUNEIFORM NUMBERS AND PUNCTUATION"; - } - if(_chr >= U'\U00012480' && _chr <= U'\U0001254F') { - return U"EARLY DYNASTIC CUNEIFORM"; - } - if(_chr >= U'\U00013000' && _chr <= U'\U0001342F') { - return U"EGYPTIAN HIEROGLYPHS"; - } - if(_chr >= U'\U00013430' && _chr <= U'\U0001343F') { - return U"EGYPTIAN HIEROGLYPH FORMAT CONTROLS"; - } - if(_chr >= U'\U00014400' && _chr <= U'\U0001467F') { - return U"ANATOLIAN HIEROGLYPHS"; - } - if(_chr >= U'\U00016800' && _chr <= U'\U00016A3F') { - return U"BAMUM SUPPLEMENT"; - } - if(_chr >= U'\U00016A40' && _chr <= U'\U00016A6F') { - return U"MRO"; - } - if(_chr >= U'\U00016AD0' && _chr <= U'\U00016AFF') { - return U"BASSA VAH"; - } - if(_chr >= U'\U00016B00' && _chr <= U'\U00016B8F') { - return U"PAHAWH HMONG"; - } - if(_chr >= U'\U00016E40' && _chr <= U'\U00016E9F') { - return U"MEDEFAIDRIN"; - } - if(_chr >= U'\U00016F00' && _chr <= U'\U00016F9F') { - return U"MIAO"; - } - if(_chr >= U'\U00016FE0' && _chr <= U'\U00016FFF') { - return U"IDEOGRAPHIC SYMBOLS AND PUNCTUATION"; - } - if(_chr >= U'\U00017000' && _chr <= U'\U000187FF') { - return U"TANGUT"; - } - if(_chr >= U'\U00018800' && _chr <= U'\U00018AFF') { - return U"TANGUT COMPONENTS"; - } - if(_chr >= U'\U00018B00' && _chr <= U'\U00018CFF') { - return U"KHITAN SMALL SCRIPT"; - } - if(_chr >= U'\U00018D00' && _chr <= U'\U00018D8F') { - return U"TANGUT SUPPLEMENT"; - } - if(_chr >= U'\U0001B000' && _chr <= U'\U0001B0FF') { - return U"KANA SUPPLEMENT"; - } - if(_chr >= U'\U0001B100' && _chr <= U'\U0001B12F') { - return U"KANA EXTENDED-A"; - } - if(_chr >= U'\U0001B130' && _chr <= U'\U0001B16F') { - return U"SMALL KANA EXTENSION"; - } - if(_chr >= U'\U0001B170' && _chr <= U'\U0001B2FF') { - return U"NUSHU"; - } - if(_chr >= U'\U0001BC00' && _chr <= U'\U0001BC9F') { - return U"DUPLOYAN"; - } - if(_chr >= U'\U0001BCA0' && _chr <= U'\U0001BCAF') { - return U"SHORTHAND FORMAT CONTROLS"; - } - if(_chr >= U'\U0001D000' && _chr <= U'\U0001D0FF') { - return U"BYZANTINE MUSICAL SYMBOLS"; - } - if(_chr >= U'\U0001D100' && _chr <= U'\U0001D1FF') { - return U"MUSICAL SYMBOLS"; - } - if(_chr >= U'\U0001D200' && _chr <= U'\U0001D24F') { - return U"ANCIENT GREEK MUSICAL NOTATION"; - } - if(_chr >= U'\U0001D2E0' && _chr <= U'\U0001D2FF') { - return U"MAYAN NUMERALS"; - } - if(_chr >= U'\U0001D300' && _chr <= U'\U0001D35F') { - return U"TAI XUAN JING SYMBOLS"; - } - if(_chr >= U'\U0001D360' && _chr <= U'\U0001D37F') { - return U"COUNTING ROD NUMERALS"; - } - if(_chr >= U'\U0001D400' && _chr <= U'\U0001D7FF') { - return U"MATHEMATICAL ALPHANUMERIC SYMBOLS"; - } - if(_chr >= U'\U0001D800' && _chr <= U'\U0001DAAF') { - return U"SUTTON SIGNWRITING"; - } - if(_chr >= U'\U0001E000' && _chr <= U'\U0001E02F') { - return U"GLAGOLITIC SUPPLEMENT"; - } - if(_chr >= U'\U0001E100' && _chr <= U'\U0001E14F') { - return U"NYIAKENG PUACHUE HMONG"; - } - if(_chr >= U'\U0001E2C0' && _chr <= U'\U0001E2FF') { - return U"WANCHO"; - } - if(_chr >= U'\U0001E800' && _chr <= U'\U0001E8DF') { - return U"MENDE KIKAKUI"; - } - if(_chr >= U'\U0001E900' && _chr <= U'\U0001E95F') { - return U"ADLAM"; - } - if(_chr >= U'\U0001EC70' && _chr <= U'\U0001ECBF') { - return U"INDIC SIYAQ NUMBERS"; - } - if(_chr >= U'\U0001ED00' && _chr <= U'\U0001ED4F') { - return U"OTTOMAN SIYAQ NUMBERS"; - } - if(_chr >= U'\U0001EE00' && _chr <= U'\U0001EEFF') { - return U"ARABIC MATHEMATICAL ALPHABETIC SYMBOLS"; - } - if(_chr >= U'\U0001F000' && _chr <= U'\U0001F02F') { - return U"MAHJONG TILES"; - } - if(_chr >= U'\U0001F030' && _chr <= U'\U0001F09F') { - return U"DOMINO TILES"; - } - if(_chr >= U'\U0001F0A0' && _chr <= U'\U0001F0FF') { - return U"PLAYING CARDS"; - } - if(_chr >= U'\U0001F100' && _chr <= U'\U0001F1FF') { - return U"ENCLOSED ALPHANUMERIC SUPPLEMENT"; - } - if(_chr >= U'\U0001F200' && _chr <= U'\U0001F2FF') { - return U"ENCLOSED IDEOGRAPHIC SUPPLEMENT"; - } - if(_chr >= U'\U0001F300' && _chr <= U'\U0001F5FF') { - return U"MISCELLANEOUS SYMBOLS AND PICTOGRAPHS"; - } - if(_chr >= U'\U0001F600' && _chr <= U'\U0001F64F') { - return U"EMOTICONS"; - } - if(_chr >= U'\U0001F650' && _chr <= U'\U0001F67F') { - return U"ORNAMENTAL DINGBATS"; - } - if(_chr >= U'\U0001F680' && _chr <= U'\U0001F6FF') { - return U"TRANSPORT AND MAP SYMBOLS"; - } - if(_chr >= U'\U0001F700' && _chr <= U'\U0001F77F') { - return U"ALCHEMICAL SYMBOLS"; - } - if(_chr >= U'\U0001F780' && _chr <= U'\U0001F7FF') { - return U"GEOMETRIC SHAPES EXTENDED"; - } - if(_chr >= U'\U0001F800' && _chr <= U'\U0001F8FF') { - return U"SUPPLEMENTAL ARROWS-C"; - } - if(_chr >= U'\U0001F900' && _chr <= U'\U0001F9FF') { - return U"SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS"; - } - if(_chr >= U'\U0001FA00' && _chr <= U'\U0001FA6F') { - return U"CHESS SYMBOLS"; - } - if(_chr >= U'\U0001FA70' && _chr <= U'\U0001FAFF') { - return U"SYMBOLS AND PICTOGRAPHS EXTENDED-A"; - } - if(_chr >= U'\U0001FB00' && _chr <= U'\U0001FBFF') { - return U"SYMBOLS FOR LEGACY COMPUTING"; - } - if(_chr >= U'\U00020000' && _chr <= U'\U0002A6DF') { - return U"CJK UNIFIED IDEOGRAPHS EXTENSION B"; - } - if(_chr >= U'\U0002A700' && _chr <= U'\U0002B73F') { - return U"CJK UNIFIED IDEOGRAPHS EXTENSION C"; - } - if(_chr >= U'\U0002B740' && _chr <= U'\U0002B81F') { - return U"CJK UNIFIED IDEOGRAPHS EXTENSION D"; - } - if(_chr >= U'\U0002B820' && _chr <= U'\U0002CEAF') { - return U"CJK UNIFIED IDEOGRAPHS EXTENSION E"; - } - if(_chr >= U'\U0002CEB0' && _chr <= U'\U0002EBEF') { - return U"CJK UNIFIED IDEOGRAPHS EXTENSION F"; - } - if(_chr >= U'\U0002F800' && _chr <= U'\U0002FA1F') { - return U"CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT"; - } - if(_chr >= U'\U00030000' && _chr <= U'\U0003134F') { - return U"CJK UNIFIED IDEOGRAPHS EXTENSION G"; - } - if(_chr >= U'\U000E0000' && _chr <= U'\U000E007F') { - return U"TAGS"; - } - if(_chr >= U'\U000E0100' && _chr <= U'\U000E1EFF') { - return U"VARIATION SELECTORS SUPPLEMENT"; - } - if(_chr >= U'\U000F0000' && _chr <= U'\U000FFFFF') { - return U"SUPPLEMENTARY PRIVATE USE AREA-A"; - } - if(_chr >= U'\U00100000' && _chr <= U'\U0010FFFF') { - return U"SUPPLEMENTARY PRIVATE USE AREA-B"; - } - return U"UNDEFINED IN UNICODE"; -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/u8c.d/uninm b/u8c/include/u8c/u8c.d/uninm deleted file mode 100644 index 3fc67f3..0000000 --- a/u8c/include/u8c/u8c.d/uninm +++ /dev/null @@ -1,2697 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_EW3CUEOMiNBCpImA) -#define u8c_key_EW3CUEOMiNBCpImA - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::uninm(char32_t const _chr) -> u8c::str{ - switch(_chr) { - [[unlikely]] default: - if(_chr > u8c::unimax) [[unlikely]] { - throw std::out_of_range("Character out of range."); - } - return U"UNDEFINED IN UNICODE"; - /* BASIC LATIN: */ - case U'\u0000': - return U"NULL"; - case U'\u0001': - return U"START OF HEADING"; - case U'\u0002': - return U"START OF TEXT"; - case U'\u0003': - return U"END OF TEXT"; - case U'\u0004': - return U"END OF TRANSMISSION"; - case U'\u0005': - return U"ENQUIRY"; - case U'\u0006': - return U"ACKNOWLEDGE"; - case U'\u0007': - return U"BELL"; - case U'\u0008': - return U"BACKSPACE"; - case U'\u0009': - return U"HORIZONTAL TABULATION"; - case U'\u000A': - return U"NEW LINE"; - case U'\u000B': - return U"VERTICAL TABULATION"; - case U'\u000C': - return U"FORM FEED"; - case U'\u000D': - return U"CARRIAGE RETURN"; - case U'\u000E': - return U"SHIFT OUT"; - case U'\u000F': - return U"SHIFT IN"; - case U'\u0010': - return U"DATA LINK ESCAPE"; - case U'\u0011': - return U"DEVICE CONTROL ONE"; - case U'\u0012': - return U"DEVICE CONTROL TWO"; - case U'\u0013': - return U"DEVICE CONTROL THREE"; - case U'\u0014': - return U"DEVICE CONTROL FOUR"; - case U'\u0015': - return U"NEGATIVE ACKNOWLEDGE"; - case U'\u0016': - return U"SYNCHRONOUS IDLE"; - case U'\u0017': - return U"END OF TRANSMISSION BLOCk"; - case U'\u0018': - return U"CANCEL"; - case U'\u0019': - return U"END OF MEDIUM"; - case U'\u001A': - return U"SUBSTITUTE"; - case U'\u001B': - return U"ESCAPE"; - case U'\u001C': - return U"FILE SEPERATOR"; - case U'\u001D': - return U"GROUP SEPERATOR"; - case U'\u001E': - return U"RECORD SEPERATOR"; - case U'\u001F': - return U"UNIT SEPERATOR"; - case U'\u0020': - return U"SPACE"; - case U'\u0021': - return U"EXCLAMATION MARK"; - case U'\u0022': - return U"QUOTATION MARK"; - case U'\u0023': - return U"NUMBER SIGN"; - case U'\u0024': - return U"DOLLAR SIGN"; - case U'\u0025': - return U"PERCENT SIGN"; - case U'\u0026': - return U"AMPERSAND"; - case U'\u0027': - return U"APOSTROPHE"; - case U'\u0028': - return U"LEFT PARANTHESIS"; - case U'\u0029': - return U"RIGHT PARANTHESIS"; - case U'\u002A': - return U"ASTERISK"; - case U'\u002B': - return U"PLUS SIGN"; - case U'\u002C': - return U"COMMA"; - case U'\u002D': - return U"HYPHEN-MINUS"; - case U'\u002E': - return U"FULL STOP"; - case U'\u002F': - return U"SOLIDUS"; - case U'\u0030': - return U"DIGIT ZERO"; - case U'\u0031': - return U"DIGIT ONE"; - case U'\u0032': - return U"DIGIT TWO"; - case U'\u0033': - return U"DIGIT THREE"; - case U'\u0034': - return U"DIGIT FOUR"; - case U'\u0035': - return U"DIGIT FIVE"; - case U'\u0036': - return U"DIGIT SIX"; - case U'\u0037': - return U"DIGIT SEVEN"; - case U'\u0038': - return U"DIGIT EIGHT"; - case U'\u0039': - return U"DIGIT NINE"; - case U'\u003A': - return U"COLON"; - case U'\u003B': - return U"SEMICOLON"; - case U'\u003C': - return U"LESS-THAN SIGN"; - case U'\u003D': - return U"EQUALS SIGN"; - case U'\u003E': - return U"GREATER-THAN SIGN"; - case U'\u003F': - return U"QUESTION MARK"; - case U'\u0040': - return U"COMMERCIAL AT"; - case U'\u0041': - return U"LATIN CAPITAL LETTER A"; - case U'\u0042': - return U"LATIN CAPITAL LETTER B"; - case U'\u0043': - return U"LATIN CAPITAL LETTER C"; - case U'\u0044': - return U"LATIN CAPITAL LETTER D"; - case U'\u0045': - return U"LATIN CAPITAL LETTER E"; - case U'\u0046': - return U"LATIN CAPITAL LETTER F"; - case U'\u0047': - return U"LATIN CAPITAL LETTER G"; - case U'\u0048': - return U"LATIN CAPITAL LETTER H"; - case U'\u0049': - return U"LATIN CAPITAL LETTER I"; - case U'\u004A': - return U"LATIN CAPITAL LETTER J"; - case U'\u004B': - return U"LATIN CAPITAL LETTER K"; - case U'\u004C': - return U"LATIN CAPITAL LETTER L"; - case U'\u004D': - return U"LATIN CAPITAL LETTER M"; - case U'\u004E': - return U"LATIN CAPITAL LETTER N"; - case U'\u004F': - return U"LATIN CAPITAL LETTER O"; - case U'\u0050': - return U"LATIN CAPITAL LETTER P"; - case U'\u0051': - return U"LATIN CAPITAL LETTER Q"; - case U'\u0052': - return U"LATIN CAPITAL LETTER R"; - case U'\u0053': - return U"LATIN CAPITAL LETTER S"; - case U'\u0054': - return U"LATIN CAPITAL LETTER T"; - case U'\u0055': - return U"LATIN CAPITAL LETTER U"; - case U'\u0056': - return U"LATIN CAPITAL LETTER V"; - case U'\u0057': - return U"LATIN CAPITAL LETTER W"; - case U'\u0058': - return U"LATIN CAPITAL LETTER X"; - case U'\u0059': - return U"LATIN CAPITAL LETTER Y"; - case U'\u005A': - return U"LATIN CAPITAL LETTER Z"; - case U'\u005B': - return U"LEFT SQUARE BRACKET"; - case U'\u005C': - return U"REVERSE SOLIDUS"; - case U'\u005D': - return U"RIGHT SQUARE BRACKET"; - case U'\u005E': - return U"CIRCUMFLEX ACCENT"; - case U'\u005F': - return U"LOW LINE"; - case U'\u0060': - return U"GRAVE ACCENT"; - case U'\u0061': - return U"LATIN SMALL LETTER A"; - case U'\u0062': - return U"LATIN SMALL LETTER B"; - case U'\u0063': - return U"LATIN SMALL LETTER C"; - case U'\u0064': - return U"LATIN SMALL LETTER D"; - case U'\u0065': - return U"LATIN SMALL LETTER E"; - case U'\u0066': - return U"LATIN SMALL LETTER F"; - case U'\u0067': - return U"LATIN SMALL LETTER G"; - case U'\u0068': - return U"LATIN SMALL LETTER H"; - case U'\u0069': - return U"LATIN SMALL LETTER I"; - case U'\u006A': - return U"LATIN SMALL LETTER J"; - case U'\u006B': - return U"LATIN SMALL LETTER K"; - case U'\u006C': - return U"LATIN SMALL LETTER L"; - case U'\u006D': - return U"LATIN SMALL LETTER M"; - case U'\u006E': - return U"LATIN SMALL LETTER N"; - case U'\u006F': - return U"LATIN SMALL LETTER O"; - case U'\u0070': - return U"LATIN SMALL LETTER P"; - case U'\u0071': - return U"LATIN SMALL LETTER Q"; - case U'\u0072': - return U"LATIN SMALL LETTER R"; - case U'\u0073': - return U"LATIN SMALL LETTER S"; - case U'\u0074': - return U"LATIN SMALL LETTER T"; - case U'\u0075': - return U"LATIN SMALL LETTER U"; - case U'\u0076': - return U"LATIN SMALL LETTER V"; - case U'\u0077': - return U"LATIN SMALL LETTER W"; - case U'\u0078': - return U"LATIN SMALL LETTER X"; - case U'\u0079': - return U"LATIN SMALL LETTER Y"; - case U'\u007A': - return U"LATIN SMALL LETTER Z"; - case U'\u007B': - return U"LEFT CURLY BRACKET"; - case U'\u007C': - return U"VERTICAL LINE"; - case U'\u007D': - return U"RIGHT CURLY BRACKET"; - case U'\u007E': - return U"TILDE"; - case U'\u007F': - return U"DELETE"; - /* LATIN-1 SUPPLEMENT: */ - case U'\u0080': - return U"PADDING CHARACTER"; - case U'\u0081': - return U"HIGH OCTET PRESET"; - case U'\u0082': - return U"BREAK PERMITTED HERE"; - case U'\u0083': - return U"NO BREAK HERE"; - case U'\u0084': - return U"INDEX"; - case U'\u0085': - return U"NEXT LINE"; - case U'\u0086': - return U"START OF SELECTED AREA"; - case U'\u0087': - return U"END OF SELECTED AREA"; - case U'\u0088': - return U"CHARACTER TABULATION SET"; - case U'\u0089': - return U"CHARACTER TABULATION WITH JUSTIFICATION"; - case U'\u008A': - return U"LINE TABULATION SET"; - case U'\u008B': - return U"PARTIAL LINE FORWARD"; - case U'\u008C': - return U"PARTIAL LINE BACKWARD"; - case U'\u008D': - return U"REVERSE LINE FEED"; - case U'\u008E': - return U"SINGLE SHIFT TWO"; - case U'\u008F': - return U"SINGLE SHIFT THREE"; - case U'\u0090': - return U"DEVICE CONTROL STRING"; - case U'\u0091': - return U"PRIVATE USE ONE"; - case U'\u0092': - return U"PRIVATE USE TWO"; - case U'\u0093': - return U"SET TRANSMIT STATE"; - case U'\u0094': - return U"CANCEL CHARACTER"; - case U'\u0095': - return U"MESSAGE WAITING"; - case U'\u0096': - return U"START OF GUARDED AREA"; - case U'\u0097': - return U"END OF GUARDED AREA"; - case U'\u0098': - return U"START OF STRING"; - case U'\u0099': - return U"SINGLE GRAPHIC CHARACTER INTRODUCER"; - case U'\u009A': - return U"SINGLE CHARACTER INTRODUCER"; - case U'\u009B': - return U"CONTROL SEQUENCE INTRODUCER"; - case U'\u009C': - return U"STRING TERMINATOR"; - case U'\u009D': - return U"OPERATING SYSTEM COMMAND"; - case U'\u009E': - return U"PRIVACY MESSAGE"; - case U'\u009F': - return U"APPLICATION PROGRAM COMMAND"; - case U'\u00A0': - return U"NO-BREAK SPACE"; - case U'\u00A1': - return U"INVERTED EXCLAMATION MARK"; - case U'\u00A2': - return U"CENT SIGN"; - case U'\u00A3': - return U"POUND SIGN"; - case U'\u00A4': - return U"CURRENCY SIGN"; - case U'\u00A5': - return U"YEN SIGN"; - case U'\u00A6': - return U"BROKEN BAR"; - case U'\u00A7': - return U"SECTION SIGN"; - case U'\u00A8': - return U"DIAERESIS"; - case U'\u00A9': - return U"COPYRIGHT SIGN"; - case U'\u00AA': - return U"FEMININE ORDINAL INDICATOR"; - case U'\u00AB': - return U"LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"; - case U'\u00AC': - return U"NOT SIGN"; - case U'\u00AD': - return U"SOFT HYPHEN"; - case U'\u00AE': - return U"REGISTERED SIGN"; - case U'\u00AF': - return U"MACRON"; - case U'\u00B0': - return U"DEGREE SIGN"; - case U'\u00B1': - return U"PLUS MINUS SYMBOL"; - case U'\u00B2': - return U"SUPERSCRIPT TWO"; - case U'\u00B3': - return U"SUPERSCRIPT THREE"; - case U'\u00B4': - return U"ACUTE ACCENT"; - case U'\u00B5': - return U"MICRO SIGN"; - case U'\u00B6': - return U"PILCROW SIGN"; - case U'\u00B7': - return U"MIDDLE DOT"; - case U'\u00B8': - return U"CEDILLA"; - case U'\u00B9': - return U"SUPERSCRIPT ONE"; - case U'\u00BA': - return U"MASCULINE ORDINAL INDICATOR"; - case U'\u00BB': - return U"RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"; - case U'\u00BC': - return U"VULGAR FRACTION ONE QUARTER"; - case U'\u00BD': - return U"VULGAR FRACTION ONE HALF"; - case U'\u00BE': - return U"VULGAR FRACTION THREE QUARTERS"; - case U'\u00BF': - return U"INVERTED QUESTION MARK"; - case U'\u00C0': - return U"LATIN CAPITAL LETTER A WITH GRAVE"; - case U'\u00C1': - return U"LATIN CAPITAL LETTER A WITH ACUTE"; - case U'\u00C2': - return U"LATIN CAPITAL LETTER A WITH CIRCUMFLEX"; - case U'\u00C3': - return U"LATIN CAPITAL LETTER A WITH TILDE"; - case U'\u00C4': - return U"LATIN CAPITAL LETTER A WITH DIAERESIS"; - case U'\u00C5': - return U"LATIN CAPITAL LETTER A WITH RING ABOVE"; - case U'\u00C6': - return U"LATIN CAPITAL LETTER AE"; - case U'\u00C7': - return U"LATIN CAPITAL LETTER C WITH CEDILLA"; - case U'\u00C8': - return U"LATIN CAPITAL LETTER E WITH GRAVE"; - case U'\u00C9': - return U"LATIN CAPITAL LETTER E WITH ACUTE"; - case U'\u00CA': - return U"LATIN CAPITAL LETTER E WITH CIRCUMFLEX"; - case U'\u00CB': - return U"LATIN CAPITAL LETTER E WITH DIAERESIS"; - case U'\u00CC': - return U"LATIN CAPITAL LETTER I WITH GRAVE"; - case U'\u00CD': - return U"LATIN CAPITAL LETTER I WITH ACUTE"; - case U'\u00CE': - return U"LATIN CAPITAL LETTER I WITH CIRCUMFLEX"; - case U'\u00CF': - return U"LATIN CAPITAL LETTER I WITH DIAERESIS"; - case U'\u00D0': - return U"LATIN CAPITAL LETTER ETH"; - case U'\u00D1': - return U"LATIN CAPITAL LETTER N WITH TILDE"; - case U'\u00D2': - return U"LATIN CAPITAL LETTER O WITH GRAVE"; - case U'\u00D3': - return U"LATIN CAPITAL LETTER O WITH ACUTE"; - case U'\u00D4': - return U"LATIN CAPITAL LETTER O WITH CIRCUMFLEX"; - case U'\u00D5': - return U"LATIN CAPITAL LETTER O WITH TILDE"; - case U'\u00D6': - return U"LATIN CAPITAL LETTER O WITH DIAERESIS"; - case U'\u00D7': - return U"MULTIPLICATION SIGN"; - case U'\u00D8': - return U"LATIN CAPITAL LETTER O WITH STROKE"; - case U'\u00D9': - return U"LATIN CAPITAL LETTER U WITH GRAVE"; - case U'\u00DA': - return U"LATIN CAPITAL LETTER U WITH ACUTE"; - case U'\u00DB': - return U"LATIN CAPITAL LETTER U WITH CIRCUMFLEX"; - case U'\u00DC': - return U"LATIN CAPITAL LETTER U WITH DIAERESIS"; - case U'\u00DD': - return U"LATIN CAPITAL LETTER Y WITH ACUTE"; - case U'\u00DE': - return U"LATIN CAPITAL LETTER THORN"; - case U'\u00DF': - return U"LATIN SMALL LETTER SHARP S"; - case U'\u00E0': - return U"LATIN SMALL LETTER A WITH GRAVE"; - case U'\u00E1': - return U"LATIN SMALL LETTER A WITH ACUTE"; - case U'\u00E2': - return U"LATIN SMALL LETTER A WITH CIRCUMFLEX"; - case U'\u00E3': - return U"LATIN SMALL LETTER A WITH TILDE"; - case U'\u00E4': - return U"LATIN SMALL LETTER A WITH DIAERESIS"; - case U'\u00E5': - return U"LATIN SMALL LETTER A WITH RING ABOVE"; - case U'\u00E6': - return U"LATIN SMALL LETTER AE"; - case U'\u00E7': - return U"LATIN SMALL LETTER C WITH CEDILLA"; - case U'\u00E8': - return U"LATIN SMALL LETTER E WITH GRAVE"; - case U'\u00E9': - return U"LATIN SMALL LETTER E WITH ACUTE"; - case U'\u00EA': - return U"LATIN SMALL LETTER E WITH CIRCUMFLEX"; - case U'\u00EB': - return U"LATIN SMALL LETTER E WITH DIAERESIS"; - case U'\u00EC': - return U"LATIN SMALL LETTER I WITH GRAVE"; - case U'\u00ED': - return U"LATIN SMALL LETTER I WITH ACUTE"; - case U'\u00EE': - return U"LATIN SMALL LETTER I WITH CIRCUMFLEX"; - case U'\u00EF': - return U"LATIN SMALL LETTER I WITH DIAERESIS"; - case U'\u00F0': - return U"LATIN SMALL LETTER ETH"; - case U'\u00F1': - return U"LATIN SMALL LETTER N WITH TILDE"; - case U'\u00F2': - return U"LATIN SMALL LETTER O WITH GRAVE"; - case U'\u00F3': - return U"LATIN SMALL LETTER O WITH ACUTE"; - case U'\u00F4': - return U"LATIN SMALL LETTER O WITH CIRCUMFLEX"; - case U'\u00F5': - return U"LATIN SMALL LETTER O WITH TILDE"; - case U'\u00F6': - return U"LATIN SMALL LETTER O WITH DIAERESIS"; - case U'\u00F7': - return U"DIVISION SIGN"; - case U'\u00F8': - return U"LATIN SMALL LETTER O WITH STROKE"; - case U'\u00F9': - return U"LATIN SMALL LETTER U WITH GRAVE"; - case U'\u00FA': - return U"LATIN SMALL LETTER U WITH ACUTE"; - case U'\u00FB': - return U"LATIN SMALL LETTER U WITH CIRCUMFLEX"; - case U'\u00FC': - return U"U WITH TWO DOTS"; - case U'\u00FD': - return U"LATIN SMALL LETTER Y WITH ACUTE"; - case U'\u00FE': - return U"LATIN SMALL LETTER THORN"; - case U'\u00FF': - return U"LATIN SMALL LETTER Y WITH DIAERESIS"; - /* LATIN EXTENDED-A: */ - case U'\u0100': - return U"LATIN CAPITAL LETTER A WITH MACRON"; - case U'\u0101': - return U"LATIN SMALL LETTER A WITH MACRON"; - case U'\u0102': - return U"LATIN CAPITAL LETTER A WITH BREVE"; - case U'\u0103': - return U"LATIN SMALL LETTER A WITH BREVE"; - case U'\u0104': - return U"LATIN CAPITAL LETTER A WITH OGONEK"; - case U'\u0105': - return U"LATIN SMALL LETTER A WITH OGONEK"; - case U'\u0106': - return U"LATIN CAPITAL LETTER C WITH ACUTE"; - case U'\u0107': - return U"LATIN SMALL LETTER C WITH ACUTE"; - case U'\u0108': - return U"LATIN CAPITAL LETTER C WITH CIRCUMFLEX"; - case U'\u0109': - return U"LATIN SMALL LETTER C WITH CIRCUMFLEX"; - case U'\u010A': - return U"LATIN CAPITAL LETTER C WITH DOT ABOVE"; - case U'\u010B': - return U"LATIN SMALL LETTER C WITH DOT ABOVE"; - case U'\u010C': - return U"LATIN CAPITAL LETTER C WITH CARON"; - case U'\u010D': - return U"LATIN SMALL LETTER C WITH CARON"; - case U'\u010E': - return U"LATIN CAPITAL LETTER D WITH CARON"; - case U'\u010F': - return U"LATIN SMALL LETTER D WITH CARON"; - case U'\u0110': - return U"LATIN CAPITAL LETTER D WITH STROKE"; - case U'\u0111': - return U"LATIN SMALL LETTER D WITH STROKE"; - case U'\u0112': - return U"LATIN CAPITAL LETTER E WITH MACRON"; - case U'\u0113': - return U"LATIN SMALL LETTER E WITH MACRON"; - case U'\u0114': - return U"LATIN CAPITAL LETTER E WITH BREVE"; - case U'\u0115': - return U"LATIN SMALL LETTER E WITH BREVE"; - case U'\u0116': - return U"LATIN CAPITAL LETTER E WITH DOT ABOVE"; - case U'\u0117': - return U"LATIN SMALL LETTER E WITH DOT ABOVE"; - case U'\u0118': - return U"LATIN CAPITAL LETTER E WITH OGONEK"; - case U'\u0119': - return U"LATIN SMALL LETTER E WITH OGONEK"; - case U'\u011A': - return U"LATIN CAPITAL LETTER E WITH CARON"; - case U'\u011B': - return U"LATIN SMALL LETTER E WITH CARON"; - case U'\u011C': - return U"LATIN CAPITAL LETTER G WITH CIRCUMFLEX"; - case U'\u011D': - return U"LATIN SMALL LETTER G WITH CIRCUMFLEX"; - case U'\u011E': - return U"LATIN CAPITAL LETTER G WITH BREVE"; - case U'\u011F': - return U"LATIN SMALL LETTER G WITH BREVE"; - case U'\u0120': - return U"LATIN CAPITAL LETTER G WITH DOT ABOVE"; - case U'\u0121': - return U"LATIN SMALL LETTER G WITH DOT ABOVE"; - case U'\u0122': - return U"LATIN CAPITAL LETTER G WITH CEDILLA"; - case U'\u0123': - return U"LATIN SMALL LETTER G WITH CEDILLA"; - case U'\u0124': - return U"LATIN CAPITAL LETTER H WITH CIRCUMFLEX"; - case U'\u0125': - return U"LATIN SMALL LETTER H WITH CIRCUMFLEX"; - case U'\u0126': - return U"LATIN CAPITAL LETTER H WITH STROKE"; - case U'\u0127': - return U"LATIN SMALL LETTER H WITH STROKE"; - case U'\u0128': - return U"LATIN CAPITAL LETTER I WITH TILDE"; - case U'\u0129': - return U"LATIN SMALL LETTER I WITH TILDE"; - case U'\u012A': - return U"LATIN CAPITAL LETTER I WITH MACRON"; - case U'\u012B': - return U"LATIN SMALL LETTER I WITH MACRON"; - case U'\u012C': - return U"LATIN CAPITAL LETTER I WITH BREVE"; - case U'\u012D': - return U"LATIN SMALL LETTER I WITH BREVE"; - case U'\u012E': - return U"LATIN CAPITAL LETTER I WITH OGONEK"; - case U'\u012F': - return U"LATIN SMALL LETTER I WITH OGONEK"; - case U'\u0130': - return U"LATIN CAPITAL LETTER I WITH DOT ABOVE"; - case U'\u0131': - return U"LATIN SMALL LETTER DOTLESS I"; - case U'\u0132': - return U"LATIN CAPITAL LIGATURE IJ"; - case U'\u0133': - return U"LATIN SMALL LIGATURE IJ"; - case U'\u0134': - return U"LATIN CAPITAL LETTER J WITH CIRCUMFLEX"; - case U'\u0135': - return U"LATIN SMALL LETTER J WITH CIRCUMFLEX"; - case U'\u0136': - return U"LATIN CAPITAL LETTER K WITH CEDILLA"; - case U'\u0137': - return U"LATIN SMALL LETTER K WITH CEDILLA"; - case U'\u0138': - return U"LATIN SMALL LETTER KRA"; - case U'\u0139': - return U"LATIN CAPITAL LETTER L WITH ACUTE"; - case U'\u013A': - return U"LATIN SMALL LETTER L WITH ACUTE"; - case U'\u013B': - return U"LATIN CAPITAL LETTER L WITH CEDILLA"; - case U'\u013C': - return U"LATIN SMALL LETTER L WITH CEDILLA"; - case U'\u013D': - return U"LATIN CAPITAL LETTER L WITH CARON"; - case U'\u013E': - return U"LATIN SMALL LETTER L WITH CARON"; - case U'\u013F': - return U"LATIN CAPITAL LETTER L WITH MDDLE DOT"; - case U'\u0140': - return U"LATIN SMALL LETTER L WITH MIDDLE DOT"; - case U'\u0150': - return U"LATIN CAPITAL LETTER O WITH DOUBLE ACUTE"; - case U'\u0160': - return U"LATIN CAPITAL LETTER S WITH CARON"; - case U'\u0170': - return U"LATIN CAPITAL LETTER U WITH DOUBLE ACUTE"; - /* LATIN EXTENDED-B: */ - case U'\u0180': - return U"LATIN SMALL LETTER B WITH STROKE"; - case U'\u0190': - return U"LATIN CAPITAL LETTER OPEN E"; - case U'\u01A0': - return U"LATIN CAPITAL LETTER O WITH HORN"; - case U'\u01B0': - return U"LATIN SMALL LETTER U WITH HORN"; - case U'\u01C0': - return U"LATIN LETTER DENTAL CLICK"; - case U'\u01D0': - return U"LATIN SMALL LETTER I WITH CARON"; - case U'\u01E0': - return U"LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON"; - case U'\u01F0': - return U"LATIN SMALL LETTER J WITH CARON"; - case U'\u0200': - return U"LATIN CAPITAL LETTER A WITH DOUBLE GRAVE"; - case U'\u0210': - return U"LATIN CAPITAL LETTER R WITH DOUBLE GRAVE"; - case U'\u0220': - return U"LATIN CAPITAL LETTER N WITH LONG RIGHT LEG"; - case U'\u0230': - return U"LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON"; - case U'\u0240': - return U"LATIN SMALL LETTER Z WITH SWASH TAIL"; - /* IPA EXTENSIONS: */ - case U'\u0250': - return U"LATIN SMALL LETTER TURNED A"; - case U'\u0251': - return U"LATIN SMALL LETTER ALPHA"; - case U'\u0252': - return U"LATIN SMALL LETTER TURNED ALPHA"; - case U'\u0253': - return U"LATIN SMALL LETTER B WITH HOOK"; - case U'\u0254': - return U"LATIN SMALL LETTER OPEN O"; - case U'\u0255': - return U"LATIN SMALL LETTER C WITH CURL"; - case U'\u0256': - return U"LATIN SMALL LETTER D WITH TAIL"; - case U'\u0257': - return U"LATIN SMALL LETTER D WITH HOOK"; - case U'\u0258': - return U"LATIN SMALL LETTER REVERSED E"; - case U'\u0259': - return U"LATIN SMALL LETTER SCHWA"; - case U'\u025A': - return U"LATIN SMALL LETTER SCHWA WITH HOOK"; - case U'\u025B': - return U"LATIN SMALL LETTER OPEN E"; - case U'\u025C': - return U"LATIN SMALL LETTER REVERSED OPEN E"; - case U'\u025D': - return U"LATIN SMALL LETTER REVERSED OPEN E WITH HOOK"; - case U'\u025E': - return U"LATIN SMALL LETTER CLOSED REVERSED OPEN E"; - case U'\u025F': - return U"LATIN SMALL LETTER DOTLESS J WITH STROKE"; - case U'\u0260': - return U"LATIN SMALL LETTER G WITH HOOK"; - case U'\u0261': - return U"LATIN SMALL LETTER SCRIPT G"; - case U'\u0262': - return U"LATIN LETTER SMALL CAPITAL G"; - case U'\u0263': - return U"LATIN SMALL LETTER GAMMA"; - case U'\u0264': - return U"LATIN SMALL LETTER RAMS HORN"; - case U'\u0265': - return U"LATIN SMALL LETTER TURNED H"; - case U'\u0266': - return U"LATIN SMALL LETTER H WITH HOOK"; - case U'\u0267': - return U"LATIN SMALL LETTER HENG WITH HOOK"; - case U'\u0268': - return U"LATIN SMALL LETTER I WITH STROKE"; - case U'\u0269': - return U"LATIN SMALL LETTER IOTA"; - case U'\u026A': - return U"LATIN LETTER SMALL CAPITAL I"; - case U'\u026B': - return U"LATIN SMALL LETTER L WITH MIDDLE TILDE"; - case U'\u026C': - return U"LATIN SMALL LETTER L WITH BELT"; - case U'\u026D': - return U"LATIN SMALL LETTER L WITH RETROFLEX HOOK"; - case U'\u026E': - return U"LATIN SMALL LETTER LEZH"; - case U'\u026F': - return U"LATIN SMALL LETTER TURNED M"; - case U'\u0270': - return U"LATIN SMALL LETTER TURNED M WITH LONG LEG"; - case U'\u0271': - return U"LATIN SMALL LETTER M WITH HOOK"; - case U'\u0272': - return U"LATIN SMALL LETTER N WITH LEFT HOOK"; - case U'\u0273': - return U"LATIN SMALL LETTER N WITH RETROFLEX HOOK"; - case U'\u0274': - return U"LATIN LETTER SMALL CAPITAL N"; - case U'\u0275': - return U"LATIN SMALL LETTER BARRED O"; - case U'\u0276': - return U"LATIN LETTER SMALL CAPITAL OE"; - case U'\u0277': - return U"LATIN SMALL LETTER CLOSED OMEGA"; - case U'\u0278': - return U"LATIN SMALL LETTER PHI"; - case U'\u0279': - return U"LATIN SMALL LETTER TURNED R"; - case U'\u027A': - return U"LATIN SMALL LETTER TURNED R WITH LONG LEG"; - case U'\u027B': - return U"LATIN SMALL LETTER TURNED R WITH HOOK"; - case U'\u027C': - return U"LATIN SMALL LETTER R WITH LONG LEG"; - case U'\u027D': - return U"LATIN SMALL LETTER R WITH TAIL"; - case U'\u027E': - return U"LATIN SMALL LETTER R WITH FISHHOOK"; - case U'\u027F': - return U"LATIN SMALL LETTER REVERSED R WITH FISHHOOK"; - case U'\u0280': - return U"LATIN LETTER SMALL CAPITAL R"; - case U'\u0281': - return U"LATIN LETTER SMALL CAPITAL INVERTED R"; - case U'\u0282': - return U"LATIN SMALL LETTER S WITH HOOK"; - case U'\u0283': - return U"LATIN SMALL LETTER ESH"; - case U'\u0284': - return U"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK"; - case U'\u0285': - return U"LATIN SMALL LETTER SQUAT REVERSED ESH"; - case U'\u0286': - return U"LATIN SMALL LETTER SH WITH CURL"; - case U'\u0287': - return U"LATIN SMALL LETTER TURNED T"; - case U'\u0288': - return U"LATIN SMALL LETTER T WITH RETROFLEX HOOK"; - case U'\u0289': - return U"LATIN SMALL LETTER U BAR"; - case U'\u028A': - return U"LATIN SMALL LETTER UPSILON"; - case U'\u028B': - return U"LATIN SMALL LETTER V WTIH HOOK"; - case U'\u028C': - return U"LATIN SMALL LETTER TURNED V"; - case U'\u028D': - return U"LATIN SMALL LETTER TURNED W"; - case U'\u028E': - return U"LATIN SMALL LETTER TURNED Y"; - case U'\u028F': - return U"LATIN LETTER SMALL CAPITAL Y"; - case U'\u0290': - return U"LATIN SMALL LETTER Z WITH RETROFLEX HOOK"; - case U'\u0291': - return U"LATIN SMALL LETTER Z WITH RETROFLEX"; - case U'\u0292': - return U"LATIN SMALL LETTER EZH"; - case U'\u0293': - return U"LATIN SMALL LETTER EZH WITH CURL"; - case U'\u0294': - return U"LATIN LETTER GLOTTAL STOP"; - case U'\u0295': - return U"LATIN LETTER PHARYNGEAL VOICED FRICATIVE"; - case U'\u0296': - return U"LATIN LETTER INVERTED GLOTTAL STOP"; - case U'\u0297': - return U"LATIN LETTER STRETCHED C"; - case U'\u0298': - return U"LATIN LETTER BILABIAL CLICK"; - case U'\u0299': - return U"LATIN LETTER SMALL CAPITAL B"; - case U'\u029A': - return U"LATIN SMALL LETTER CLOSED OPEN E"; - case U'\u029B': - return U"LATIN LETTER SMALL CAPITAL G WITH HOOK"; - case U'\u029C': - return U"LATIN LETTER SMALL CAPITAL H"; - case U'\u029D': - return U"LATIN SMALL LETTER J WITH CROSSED-TAIL"; - case U'\u029E': - return U"LATIN SMALL LETTER TURNED K"; - case U'\u029F': - return U"LATIN LETTER SMALL CAPITAL L"; - case U'\u02A0': - return U"LATIN SMALL LETTER Q WITH HOOK"; - case U'\u02A1': - return U"LATIN LETTER GLOTTAL STOP WITH STROKE"; - case U'\u02A2': - return U"LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE"; - case U'\u02A3': - return U"LATIN SMALL LETTER DZ DIGRAPH"; - case U'\u02A4': - return U"LATIN SMALL LETTER DEZH DIGRAPH"; - case U'\u02A5': - return U"LATIN SMALL LETTER DZ DIGRAPH WITH CURL"; - case U'\u02A6': - return U"LATIN SMALL LETTER TS DIGRAPH"; - case U'\u02A7': - return U"LATIN SMALL LETTER TESH DIGRAPH"; - case U'\u02A8': - return U"LATIN SMALL LETTER TC DIGRAPH WITH CURL"; - case U'\u02A9': - return U"LATIN SMALL LETTER FENG DIGRAPH"; - case U'\u02AA': - return U"LATIN SMALL LETTER LS DIGRAPH"; - case U'\u02AB': - return U"LATIN SMALL LETTER LZ DIGRAPH"; - case U'\u02AC': - return U"LATIN LETTER BILABIAL PERCUSSIVE"; - case U'\u02AD': - return U"LATIN LETTER BIDENTAL PERCUSSIVE"; - case U'\u02AE': - return U"LATIN SMALL LETTER TURNED H WITH FISHHOOK"; - case U'\u02AF': - return U"LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL"; - /* SPACING MODIFIER LETTERS: */ - case U'\u02B0': - return U"MODIFIER LETTER SMALL H"; - case U'\u02B1': - return U"MODIFIER LETTER SMALL H WITH HOOK"; - case U'\u02B2': - return U"MODIFIER LETTER SMALL J"; - case U'\u02B3': - return U"MODIFIER LETTER SMALL R"; - case U'\u02B4': - return U"MODIFIER LETTER SMALL TURNED R"; - case U'\u02B5': - return U"MODIFIER LETTER SMALL TURNED R WITH HOOK"; - case U'\u02B6': - return U"MODIFIER LETTER SMALL CAPITAL INVERTED R"; - case U'\u02B7': - return U"MODIFIER LETTER SMALL W"; - case U'\u02B8': - return U"MODIFIER LETTER SMALL Y"; - case U'\u02B9': - return U"MODIFIER LETTER PRIME"; - case U'\u02BA': - return U"MODIFIER LETTER DOUBLE PRIME"; - case U'\u02BB': - return U"MODIFIER LETTER TURNED COMMA"; - case U'\u02BC': - return U"MODIFIER LETTER APOSTROPHE"; - case U'\u02BD': - return U"MODIFIER LETTER REVERSED COMMA"; - case U'\u02BE': - return U"MODIFIER LETTER RIGHT HALF RING"; - case U'\u02BF': - return U"MODIFIER LETTER LEFT HALF RING"; - case U'\u02C0': - return U"MODIFIER LETTER GLOTTAL STOP"; - case U'\u02C1': - return U"MODIFIER LETTER REVERSED GLOTTAL STOP"; - case U'\u02C2': - return U"MODIFIER LETTER LEFT ARROWHEAD"; - case U'\u02C3': - return U"MODIFIER LETTER RIGHT ARROWHEAD"; - case U'\u02C4': - return U"MODIFIER LETTER UP ARROWHEAD"; - case U'\u02C5': - return U"MODIFIER LETTER DOWN ARROWHEAD"; - case U'\u02C6': - return U"MODIFIER LETTER CIRCUMFLEX"; - case U'\u02C7': - return U"CARON"; - case U'\u02C8': - return U"MODIFIER LETTER VERTICAL LINE"; - case U'\u02C9': - return U"MODIFIER LETTER MACRON"; - case U'\u02CA': - return U"MODIFIER LETTER ACUTE ACCENT"; - case U'\u02CB': - return U"MODIFIER LETTER GRAVE ACCENT"; - case U'\u02CC': - return U"MODIFIER LETTER LOW VERTICAL LINE"; - case U'\u02CD': - return U"MODIFIER LETTER LOW MACRON"; - case U'\u02CE': - return U"MODIFIER LETTER LOW GRAVE ACCENT"; - case U'\u02CF': - return U"MODIFIER LETTER LOW ACUTE ACCENT"; - case U'\u02D0': - return U"MODIFIER LETTER TRIANGULAR COLON"; - case U'\u02D1': - return U"MODIFIER LETTER HALF TRIANGULAR COLON"; - case U'\u02D2': - return U"MODIFIER LETTER CENTRED RIGHT HALF RING"; - case U'\u02D3': - return U"MODIFIER LETTER CENTRED LEFT HALF RING"; - case U'\u02D4': - return U"MODIFIER LETTER UP TACK"; - case U'\u02D5': - return U"MODIFIER LETTER DOWN TACK"; - case U'\u02D6': - return U"MODIFIER LETTER PLUS SIGN"; - case U'\u02D7': - return U"MODIFIER LETTER MINUS SIGN"; - case U'\u02D8': - return U"BREVE"; - case U'\u02D9': - return U"DOT ABOVE"; - case U'\u02DA': - return U"RING ABOVE"; - case U'\u02DB': - return U"OGONEK"; - case U'\u02DC': - return U"SMALL TILDE"; - case U'\u02DD': - return U"DOUBLE ACUTE ACCENT"; - case U'\u02DE': - return U"MODIFIER LETTER RHOTIC HOOK"; - case U'\u02DF': - return U"MODIFIER LETTER CROSS ACCENT"; - case U'\u02E0': - return U"MODIFIER LETTER SMALL GAMMA"; - case U'\u02E1': - return U"MODIFIER LETTER SMALL L"; - case U'\u02E2': - return U"MODIFIER LETTER SMALL S"; - case U'\u02E3': - return U"MODIFIER LETTER SMALL X"; - case U'\u02E4': - return U"MODIFIER LETTER SMALL REVERSED GLOTTAL STOP"; - case U'\u02E5': - return U"MODIFIER LETTER EXTRA-HIGH TONE BAR"; - case U'\u02E6': - return U"MODIFIER LETTER HIGH TONE BAR"; - case U'\u02E7': - return U"MODIFIER LETTER MID TONE BAR"; - case U'\u02E8': - return U"MODIFIER LETTER LOW TONE BAR"; - case U'\u02E9': - return U"MODIFIER LETTER EXTRA-LOW TONE BAR"; - case U'\u02EA': - return U"MODIFIER LETTER YIN DEPARTING TONE MARK"; - case U'\u02EB': - return U"MODIFIER LETTER YANG DEPARTING TONE MARK"; - case U'\u02EC': - return U"MODIFIER LETTER VOICING"; - case U'\u02ED': - return U"MODIFIER LETTER UNASPIRATED"; - case U'\u02EE': - return U"MODIFIER LETTER DOUBLE APOSTROPHE"; - case U'\u02EF': - return U"MODIFIER LETTER LOW DOWN ARROWHEAD"; - case U'\u02F0': - return U"MODIFIER LETTER LOW UP ARROWHEAD"; - case U'\u02F1': - return U"MODIFIER LETTER LOW LEFT ARROWHEAD"; - case U'\u02F2': - return U"MODIFIER LETTER LOW RIGHT ARROWHEAD"; - case U'\u02F3': - return U"MODIFIER LETTER LOW RING"; - case U'\u02F4': - return U"MODIFIER LETTER MIDDLE GRAVE ACCENT"; - case U'\u02F5': - return U"MODIFIER LETTER MIDDLE DOUBLE GRAVE ACCENT"; - case U'\u02F6': - return U"MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT"; - case U'\u02F7': - return U"MODIFIER LETTER LOW TILDE"; - case U'\u02F8': - return U"MODIFIER LETTER RAISED COLON"; - case U'\u02F9': - return U"MODIFIER LETTER BEGIN HIGH TONE"; - case U'\u02FA': - return U"MODIFIER LETTER END HIGH TONE"; - case U'\u02FB': - return U"MODIFIER LETTER BEGIN LOW TONE"; - case U'\u02FC': - return U"MODIFIER LETTER END LOW TONE"; - case U'\u02FD': - return U"MODIFIER LETTER SHELF"; - case U'\u02FE': - return U"MODIFIER LETTER OPEN SHELF"; - case U'\u02FF': - return U"MODIFIER LETTER LOW LEFT ARROWHEAD"; - /* COMBINING DIACRITICAL MARKS: */ - case U'\u0300': - return U"COMBINING GRAVE ACCENT"; - case U'\u0301': - return U"COMBINING ACUTE ACCENT"; - case U'\u0302': - return U"COMBINING CIRCUMFLEX ACCENT"; - case U'\u0303': - return U"COMBINING TILDE"; - case U'\u0304': - return U"COMBINING MACRON"; - case U'\u0305': - return U"COMBINING OVERLINE"; - case U'\u0306': - return U"COMBINING BREVE"; - case U'\u0307': - return U"COMBINING DOT ABOVE"; - case U'\u0308': - return U"COMBINING DIAERESIS"; - case U'\u0309': - return U"COMBINING HOOK ABOVE"; - case U'\u030A': - return U"COMBINING RING ABOVE"; - case U'\u030B': - return U"COMBINING DOUBLE ACUTE ACCENT"; - case U'\u030C': - return U"COMBINING CARON"; - case U'\u030D': - return U"COMBINING VERTICAL LINE ABOVE"; - case U'\u030E': - return U"COMBINING DOUBLE VERTICAL LINE ABOVE"; - case U'\u030F': - return U"COMBINING DOUBLE GRAVE ACCENT"; - case U'\u0310': - return U"COMBINING CANDRABINDU"; - case U'\u0311': - return U"COMBINING INVERTED BREVE"; - case U'\u0312': - return U"COMBINING TURNED COMMA ABOVE"; - case U'\u0313': - return U"COMBINING COMMA ABOVE"; - case U'\u0314': - return U"COMBINING REVERSED COMMA ABOVE"; - case U'\u0315': - return U"COMBINING COMMA ABOVE RIGHT"; - case U'\u0316': - return U"COMBINING GRAVE ACCENT BELOW"; - case U'\u0317': - return U"COMBINING ACUTE ACCENT BELOW"; - case U'\u0318': - return U"COMBINING LEFT TACK BELOW"; - case U'\u0319': - return U"COMBINING RIGHT TACK BELOW"; - case U'\u031A': - return U"COMBINING LEFT ANGLE ABOVE"; - case U'\u031B': - return U"COMBINING HORN"; - case U'\u031C': - return U"COMBINING LEFT HALF RING BELOW"; - case U'\u031D': - return U"COMBINING UP TACK BELOW"; - case U'\u031E': - return U"COMBINING DOWN TACK BELOW"; - case U'\u031F': - return U"COMBINING PLUS SIGN BELOW"; - case U'\u0320': - return U"COMBINING MINUS SIGN BELOW"; - case U'\u0321': - return U"COMBINING PALATALIZED HOOK BELOW"; - case U'\u0322': - return U"COMBINING RETROFLEX HOOK BELOW"; - case U'\u0323': - return U"COMBINING DOT BELOW"; - case U'\u0324': - return U"COMBINING DIAERESIS BELOW"; - case U'\u0325': - return U"COMBINING RING BELOW"; - case U'\u0326': - return U"COMBINING COMMA BELOW"; - case U'\u0327': - return U"COMBINING CEDILLA"; - case U'\u0328': - return U"COMBINING OGONEK"; - case U'\u0329': - return U"COMBINING VERTICAL LINE BELOW"; - case U'\u032A': - return U"COMBINING BRDIGE BELOW"; - case U'\u032B': - return U"COMBINING INVERTED DOUBLE ARCH BELOW"; - case U'\u032C': - return U"COMBINING CARON BELOW"; - case U'\u032D': - return U"COMBINING CIRCUMFLEX ACCENT BELOW"; - case U'\u032E': - return U"COMBINING BREVE BELOW"; - case U'\u032F': - return U"COMBINING INVERTED BREVE BELOW"; - case U'\u0330': - return U"COMBINING TILDE BELOW"; - case U'\u0331': - return U"COMBINING MACRON BELOW"; - case U'\u0332': - return U"COMBINING LOW LINE"; - case U'\u0333': - return U"COMBINING DOUBLE LOW LINE"; - case U'\u0334': - return U"COMBINING TILDE OVERLAY"; - case U'\u0335': - return U"COMBINING SHORT STROKE OVERLAY"; - case U'\u0336': - return U"COMBINING LONG STROKE OVERLAY"; - case U'\u0337': - return U"COMBINING SHORT SOLIDUS OVERLAY"; - case U'\u0338': - return U"COMBINING LONG SOLIDUS OVERLAY"; - case U'\u0339': - return U"COMBINING RIGHT HALF RING BELOW"; - case U'\u033A': - return U"COMBINING INVERTED BRIDGE BELOW"; - case U'\u033B': - return U"COMBINING SQUARE BELOW"; - case U'\u033C': - return U"COMBINING SEAGULL BELOW"; - case U'\u033D': - return U"COMBINING X ABOVE"; - case U'\u033E': - return U"COMBINING VERTICAL TILDE"; - case U'\u033F': - return U"COMBINING DOUBLE OVERLINE"; - case U'\u0340': - return U"COMBINING GRAVE TONE MARK"; - case U'\u0341': - return U"COMBINING ACUTE TONE MARK"; - case U'\u0342': - return U"COMBINING GREEK PERISPOMENI"; - case U'\u0343': - return U"COMBINING GREEK KORONIS"; - case U'\u0344': - return U"COMBINING GREEK DIALYTIKA TONOS"; - case U'\u0345': - return U"COMBINING GREEK YPOGEGRAMMENI"; - case U'\u0346': - return U"COMBINING BRIDGE ABOVE"; - case U'\u0347': - return U"COMBINING EQUALS SIGN BELOW"; - case U'\u0348': - return U"COMBINING DOUBLE VERTICAL LINE BELOW"; - case U'\u0349': - return U"COMBINING LEFT ANGLE BELOW"; - case U'\u034A': - return U"COMBINING NOT TILDE ABOVE"; - case U'\u034B': - return U"COMBINING HOMOTHETIC ABOVE"; - case U'\u034C': - return U"COMBINING ALMOST EQUAL TO ABOVE"; - case U'\u034D': - return U"COMBINING LEFT RIGHT ARROW BELOW"; - case U'\u034E': - return U"COMBINING UPWARDS ARROW BELOW"; - case U'\u034F': - return U"COMBINING GRAPHEME JOINER"; - case U'\u0350': - return U"COMBINING RIGHT ARROWHEAD ABOVE"; - case U'\u0351': - return U"COMBINING LEFT HALF RING ABOVE"; - case U'\u0352': - return U"COMBINING FERMATA"; - case U'\u0353': - return U"COMBINING X BELOW"; - case U'\u0354': - return U"COMBINING LEFT ARROWHEAD BELOW"; - case U'\u0355': - return U"COMBINING RIGHT ARROWHEAD BELOW"; - case U'\u0356': - return U"COMBINING RIGHT ARROWHEAD AND UP ARROWHEAD BELOW"; - case U'\u0357': - return U"COMBINING RIGHT HALF RING ABOVE"; - case U'\u0358': - return U"COMBINING DOT ABOVE RIGHT"; - case U'\u0359': - return U"COMBINING ASTERISK BELOW"; - case U'\u035A': - return U"COMBINING DOUBLE RING BELOW"; - case U'\u035B': - return U"COMBINING ZIGZAG ABOVE"; - case U'\u035C': - return U"COMBINING DOUBLE BREVE BELOW"; - case U'\u035D': - return U"COMBINING DOUBLE BREVE"; - case U'\u035E': - return U"COMBINING DOUBLE MACRON"; - case U'\u035F': - return U"COMBINING DOUBLE MACRON BELOW"; - case U'\u0360': - return U"COMBINING DOUBLE TILDE"; - case U'\u0361': - return U"COMBINING DOUBLE INVERTED BREVE"; - case U'\u0362': - return U"COMBINING DOUBLE RIGHTWARDS ARROW BELOW"; - case U'\u0363': - return U"COMBINING LATIN SMALL LETTER A"; - case U'\u0364': - return U"COMBINING LATIN SMALL LETTER E"; - case U'\u0365': - return U"COMBINING LATIN SMALL LETTER I"; - case U'\u0366': - return U"COMBINING LATIN SMALL LETTER O"; - case U'\u0367': - return U"COMBINING LATIN SMALL LETTER U"; - case U'\u0368': - return U"COMBINING LATIN SMALL LETTER C"; - case U'\u0369': - return U"COMBINING LATIN SMALL LETTER D"; - case U'\u036A': - return U"COMBINING LATIN SMALL LETTER H"; - case U'\u036B': - return U"COMBINING LATIN SMALL LETTER M"; - case U'\u036C': - return U"COMBINING LATIN SMALL LETTER R"; - case U'\u036D': - return U"COMBINING LATIN SMALL LETTER T"; - case U'\u036E': - return U"COMBINING LATIN SMALL LETTER V"; - case U'\u036F': - return U"COMBINING LATIN SMALL LETTER X"; - /* GREEK AND COPTIC: */ - case U'\u0370': - return U"GREEK CAPITAL LETTER HETA"; - case U'\u0371': - return U"GREEK SMALL LETTER HETA"; - case U'\u0372': - return U"GREEK CAPITAL LETTER ARCHAIC SAMPI"; - case U'\u0373': - return U"GREEK SMALL LETTER ARCHAIC SAMPI"; - case U'\u0374': - return U"GREEK NUMERAL SIGN"; - case U'\u0375': - return U"GREEK LOWER NUMERAL SIGN"; - case U'\u0376': - return U"GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA"; - case U'\u0377': - return U"GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"; - case U'\u037A': - return U"GREEK YPOGEGRAMMENI"; - case U'\u037B': - return U"GREEK SMALL REVERSED LUNATE SIGMA SYMBOL"; - case U'\u037C': - return U"GREEK SMALL DOTTED LUNATE SIGMA SYMBOL"; - case U'\u037D': - return U"GREEK SMALL REVERSED DOTTED LUNATE SIGMAL SYMBOL"; - case U'\u037E': - return U"GREEK QUESTION MARK"; - case U'\u037F': - return U"GREEK CAPITAL LETTER YOT"; - case U'\u0384': - return U"GREEK TONOS"; - case U'\u0385': - return U"GREEK DIALYTIKA TONOS"; - case U'\u0386': - return U"GREEK CAPITAL LETTER ALPHA WITH TONOS"; - case U'\u0387': - return U"GREEK ANO TELEIA"; - case U'\u0388': - return U"GREEK CAPITAL LETTER EPSILON WITH TONOS"; - case U'\u0389': - return U"GREEK CAPITAL LETTER ETA WITH TONOS"; - case U'\u038A': - return U"GREEK CAPITAL LETTER IOTA WITH TONOS"; - case U'\u038C': - return U"GREEK CAPITAL LETTER OMICRON WITH TONOS"; - case U'\u038E': - return U"GREEK CAPITAL LETTER USPILON WITH TONOS"; - case U'\u038F': - return U"GREEK CAPITAL LETTER OMEGA WITH TONOS"; - case U'\u0390': - return U"GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"; - case U'\u0391': - return U"GREEK CAPITAL LETTER ALPHA"; - case U'\u0392': - return U"GREEK CAPITAL LETTER BETA"; - case U'\u0393': - return U"GREEK CAPITAL LETTER GAMMA"; - case U'\u0394': - return U"GREEK CAPITAL LETTER DELTA"; - case U'\u0395': - return U"GREEK CAPITAL LETTER EPSILON"; - case U'\u0396': - return U"GREEK CAPITAL LETTER ZETA"; - case U'\u0397': - return U"GREEK CAPITAL LETTER ETA"; - case U'\u0398': - return U"GREEK CAPITAL LETTER THETA"; - case U'\u0399': - return U"GREEK CAPITAL LETTER IOTA"; - case U'\u039A': - return U"GREEK CAPITAL LETTER KAPPA"; - case U'\u039B': - return U"GREEK CAPITAL LETTER LAMBDA"; - case U'\u039C': - return U"GREEK CAPITAL LETTER MU"; - case U'\u039D': - return U"GREEK CAPITAL LETTER NU"; - case U'\u039E': - return U"GREEK CAPITAL LETTER XI"; - case U'\u039F': - return U"GREEK CAPITAL LETTER OMICRON"; - case U'\u03A0': - return U"GREEK CAPITAL LETTER PI"; - case U'\u03A1': - return U"GREEK CAPITAL LETTER RHO"; - case U'\u03A3': - return U"GREEK CAPITAL LETTER SIGMA"; - case U'\u03A4': - return U"GREEK CAPITAL LETTER TAU"; - case U'\u03A5': - return U"GREEK CAPITAL LETTER UPSILON"; - case U'\u03A6': - return U"GREEK CAPITAL LETTER PHI"; - case U'\u03A7': - return U"GREEK CAPITAL LETTER CHI"; - case U'\u03A8': - return U"GREEK CAPITAL LETTER PSI"; - case U'\u03A9': - return U"GREEK CAPITAL LETTER OMEGA"; - case U'\u03AA': - return U"GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"; - case U'\u03AB': - return U"GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"; - case U'\u03AC': - return U"GREEK SMALL LETTER ALPHA WITH TONOS"; - case U'\u03AD': - return U"GREEK SMALL LETTER EPSILON WITH TONOS"; - case U'\u03AE': - return U"GREEK SMALL LETTER ETA WITH TONOS"; - case U'\u03AF': - return U"GREEK SMALL LETTER IOTA WITH TONOS"; - case U'\u03B0': - return U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"; - case U'\u03B1': - return U"GREEK SMALL LETTER ALPHA"; - case U'\u03B2': - return U"GREEK SMALL LETTER BETA"; - case U'\u03B3': - return U"GREEK SMALL LETTER GAMMA"; - case U'\u03B4': - return U"GREEK SMALL LETTER DELTA"; - case U'\u03B5': - return U"GREEK SMALL LETTER EPSILON"; - case U'\u03B6': - return U"GREEK SMALL LETTER ZETA"; - case U'\u03B7': - return U"GREEK SMALL LETTER ETA"; - case U'\u03B8': - return U"GREEK SMALL LETTER THETA"; - case U'\u03B9': - return U"GREEK SMALL LETTER IOTA"; - case U'\u03BA': - return U"GREEK SMALL LETTER KAPPA"; - case U'\u03BB': - return U"GREEK SMALL LETTER LAMBDA"; - case U'\u03BC': - return U"GREEK SMALL LETTER MU"; - case U'\u03BD': - return U"GREEK SMALL LETTER NU"; - case U'\u03BE': - return U"GREEK SMALL LETTER XI"; - case U'\u03BF': - return U"GREEK SMALL LETTER OMICRON"; - case U'\u03C0': - return U"GREEK SMALL LETTER PI"; - case U'\u03C1': - return U"GREEK SMALL LETTER RHO"; - case U'\u03C2': - return U"GREEK SMALL LETTER FINAL SIGMA"; - case U'\u03C3': - return U"GREEK SMALL LETTER SIGMA"; - case U'\u03C4': - return U"GREEK SMALL LETTER TAU"; - case U'\u03C5': - return U"GREEK SMALL LETTER UPSILON"; - case U'\u03C6': - return U"GREEK SMALL LETTER PHI"; - case U'\u03C7': - return U"GREEK SMALL LETTER CHI"; - case U'\u03C8': - return U"GREEK SMALL LETTER PSI"; - case U'\u03C9': - return U"GREEK SMALL LETTER OMEGA"; - case U'\u03CA': - return U"GREEK SMALL LETTER IOTA WITH DIALYTIKA"; - case U'\u03CB': - return U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA"; - case U'\u03CC': - return U"GREEK SMALL LETTER OMICRON WITH TONOS"; - case U'\u03CD': - return U"GREEK SMALL LETTER UPSILON WITH TONOS"; - case U'\u03CE': - return U"GREEK SMALL LETTER OMEGA WITH TONOS"; - case U'\u03CF': - return U"GREEK CAPITAL KAI SYMBOL"; - case U'\u03D0': - return U"GREEK BETA SYMBOL"; - case U'\u03D1': - return U"GREEK THETA SYMBOL"; - case U'\u03D2': - return U"GREEK UPSILON WITH HOOK SYMBOL"; - case U'\u03D3': - return U"GREEK UPSILON WITH ACUTE AND HOOK SYMBOL"; - case U'\u03D4': - return U"GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL"; - case U'\u03D5': - return U"GREEK PHI SYMBOL"; - case U'\u03D6': - return U"GREEK PI SYMBOL"; - case U'\u03D7': - return U"GREEK KAI SYMBOL"; - case U'\u03D8': - return U"GREEK LETTER ARCHAIC KOPPA"; - case U'\u03D9': - return U"GREEK SMALL LETTER ARCHAIC KOPPA"; - case U'\u03DA': - return U"GREEK LETTER STIGMA"; - case U'\u03DB': - return U"GREEK SMALL LETTER STIGMA"; - case U'\u03DC': - return U"GREEK LETTER DIGAMMA"; - case U'\u03DD': - return U"GREEK SMALL LETTER DIGAMMA"; - case U'\u03DE': - return U"GREEK LETTER KOPPA"; - case U'\u03DF': - return U"GREEK SMALL LETTER KOPPA"; - case U'\u03E0': - return U"GREEK LETTER SAMPI"; - case U'\u03F0': - return U"GREEK KAPPA SYMBOL"; - /* HEBREW: */ - case U'\u05D0': - return U"HEBREW LETTER ALEF"; - case U'\u05D1': - return U"HEBREW LETTER BET"; - case U'\u05D2': - return U"HEBREW LETTER GIMEL"; - case U'\u05D3': - return U"HEBREW LETTER DALET"; - case U'\u05D4': - return U"HEBREW LETTER HE"; - case U'\u05D5': - return U"HEBREW LETTER VAV"; - case U'\u05D6': - return U"HEBREW LETTER ZAYIN"; - case U'\u05D7': - return U"HEBREW LETTER HET"; - case U'\u05D8': - return U"HEBREW LETTER TET"; - case U'\u05D9': - return U"HEBREW LETTER YOD"; - case U'\u05DA': - return U"HEBREW LETTER FINAL KAF"; - case U'\u05DB': - return U"HEBREW LETTER KAF"; - case U'\u05DC': - return U"HEBREW LETTER LAMED"; - case U'\u05DD': - return U"HEBREW LETTER FINAL MEM"; - case U'\u05DE': - return U"HEBREW LETTER MEM"; - case U'\u05DF': - return U"HEBREW LETTER FINAL NUN"; - case U'\u05E0': - return U"HEBREW LETTER NUN"; - case U'\u05E1': - return U"HEBREW LETTER SAMEKH"; - case U'\u05E2': - return U"HEBREW LETTER AYIN"; - case U'\u05E3': - return U"HEBREW LETTER FINAL PE"; - case U'\u05E4': - return U"HEBREW LETTER PE"; - case U'\u05E5': - return U"HEBREW LETTER FINAL TSADI"; - case U'\u05E6': - return U"HEBREW LETTER TSADI"; - case U'\u05E7': - return U"HEBREW LETTER QOF"; - case U'\u05E8': - return U"HEBREW LETTER RESH"; - case U'\u05E9': - return U"HEBREW LETTER SHIN"; - case U'\u05EA': - return U"HEBREW LETTER TAV"; - case U'\u05EF': - return U"HEBREW YOD TRIANGLE"; - /* CYRILLIC: */ - case U'\u0400': - return U"CYRILLIC CAPITAL LETTER LE WITH GRAVE"; - case U'\u0401': - return U"CYRILLIC CAPITAL LETTER LO"; - case U'\u0402': - return U"CYRILLIC CAPITAL LETTER DJE"; - case U'\u0403': - return U"CYRILLIC CAPITAL LETTER GJE"; - case U'\u0404': - return U"CYRILLIC CAPITAL LETTER UKRAINIAN LE"; - case U'\u0405': - return U"CYRILLIC CAPITAL LETTER DZE"; - case U'\u0406': - return U"CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I"; - case U'\u0407': - return U"CYRILLIC CAPITAL LETTER YI"; - case U'\u0408': - return U"CYRILLIC CAPITAL LETTER JE"; - case U'\u0409': - return U"CYRILLIC CAPITAL LETTER LJE"; - case U'\u040A': - return U"CYRILLIC CAPITAL LETTER NJE"; - case U'\u040B': - return U"CYRILLIC CAPITAL LETTER TSHE"; - case U'\u040C': - return U"CYRILLIC CAPITAL LETTER KJE"; - case U'\u040D': - return U"CYRILLIC CAPITAL LETTER I WITH GRAVE"; - case U'\u040E': - return U"CYRILLIC CAPITAL LETTER SHORT U"; - case U'\u040F': - return U"CYRILLIC CAPITAL LETTER DZHE"; - case U'\u0410': - return U"CYRILLIC CAPITAL LETTER A"; - case U'\u0420': - return U"CYRILLIC CAPITAL LETTER ER"; - case U'\u0430': - return U"CYRILLIC SMALL LETTER A"; - case U'\u0440': - return U"CYRILLIC SMALL LETTER ER"; - case U'\u0450': - return U"CYRILLIC SMALL LETTER LE WITH GRAVE"; - case U'\u0460': - return U"CYRILLIC CAPITAL LETTER OMEGA"; - case U'\u0470': - return U"CYRILLIC CAPITAL LETTER PSI"; - case U'\u0480': - return U"CYRILLIC CAPITAL LETTER KOPPA"; - case U'\u0490': - return U"CYRILLIC CAPITAL LETTER GHE WITH UPTURN"; - case U'\u04A0': - return U"CYRILLIC CAPITAL LETTER BASHKIR KA"; - case U'\u04B0': - return U"CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE"; - case U'\u04C0': - return U"CYRILLIC LETTER PALOCHKA"; - case U'\u04D0': - return U"CYRILLIC CAPITAL LETTER A WITH BREVE"; - case U'\u04E0': - return U"CYRILLIC CAPITAL LETTER ABKHASIAN DZE"; - case U'\u04F0': - return U"CYRILLIC CAPITAL LETTER U WITH DIAERESIS"; - /* SYRIAC SUPPLEMENT: */ - case U'\u0860': - return U"SYRIAC LETTER MALAYALAM NGA"; - case U'\u0861': - return U"SYRIAC LETTER MALAYALAM JA"; - case U'\u0862': - return U"SYRIAC LETTER MALAYALAM NYA"; - case U'\u0863': - return U"SYRIAC LETTER MALAYALAM TTA"; - case U'\u0864': - return U"SYRIAC LETTER MALAYALAM NNA"; - case U'\u0865': - return U"SYRIAC LETTER MALAYALAM NNNA"; - case U'\u0866': - return U"SYRIAC LETTER MALAYALAM BHA"; - case U'\u0867': - return U"SYRIAC LETTER MALAYALAM RA"; - case U'\u0868': - return U"SYRIAC LETTER MALAYALAM LLA"; - case U'\u0869': - return U"SYRIAC LETTER MALAYALAM LLLA"; - case U'\u086A': - return U"SYRIAC LETTER MALAYALAM SSA"; - /* RUNIC: */ - case U'\u16A0': - return U"RUNIC LETTER FEHU FEOH FE F"; - case U'\u16A1': - return U"RUNIC LETTER V"; - case U'\u16A2': - return U"RUNIC LETTER URUZ UR U"; - case U'\u16A3': - return U"RUNIC LETTER YR"; - case U'\u16A4': - return U"RUNIC LETTER Y"; - case U'\u16A5': - return U"RUNIC LETTER W"; - case U'\u16A6': - return U"RUNIC LETTER THURISAZ THURS THORN"; - case U'\u16A7': - return U"RUNIC LETTER ETH"; - case U'\u16A8': - return U"RUNIC LETTER ANSUZ A"; - case U'\u16A9': - return U"RUNIC LETTER OS O"; - case U'\u16AA': - return U"RUNIC LETTER AC A"; - case U'\u16AB': - return U"RUNIC LETTER AESC"; - case U'\u16AC': - return U"RUNIC LETTER LONG-BRANCHED-OSS O"; - case U'\u16AD': - return U"RUNIC LETTER SHORT-TWIG-OSS O"; - case U'\u16AE': - return U"RUNIC LETTER O"; - case U'\u16AF': - return U"RUNIC LETTER OE"; - case U'\u16B0': - return U"RUNIC LETTER ON"; - case U'\u16C0': - return U"RUNIC LETTER DOTTED-N"; - case U'\u16D0': - return U"RUNIC LETTER SHORT-TWIG-TYR T"; - case U'\u16E0': - return U"RUNIC LETTER EAR"; - case U'\u16F0': - return U"RUNIC BELGTHOR SYMBOL"; - /* CYRILLIC EXTENDED C: */ - case U'\u1C80': - return U"CYRILLIC SMALL LETTER ROUNDED VE"; - case U'\u1C81': - return U"CYRILLIC SMALL LETTER LONG-LEGGED DE"; - case U'\u1C82': - return U"CYRILLIC SMALL LETTER NARROW O"; - case U'\u1C83': - return U"CYRILLIC SMALL LETTER WIDE ES"; - case U'\u1C84': - return U"CYRILLIC SMALL LETTER TALL TE"; - case U'\u1C85': - return U"CYRILLIC SMALL LETTER THREE-LEGGED TE"; - case U'\u1C86': - return U"CYRILLIC SMALL LETTER TALL HARD SIGN"; - case U'\u1C87': - return U"CYRILLIC SMALL LETTER TALL YAT"; - case U'\u1C88': - return U"CYRILLIC SMALL LETTER UNBLENDED UK"; - /* GENERAL PUNCTUATION: */ - case U'\u2000': - return U"EN QUAD"; - case U'\u2001': - return U"EM QUAD"; - case U'\u2002': - return U"EN SPACE"; - case U'\u2003': - return U"EM SPACE"; - case U'\u2004': - return U"THREE-PER-EM SPACE"; - case U'\u2005': - return U"FOUR-PER-EM SPACE"; - case U'\u2006': - return U"SIX-PER-EM SPACE"; - case U'\u2007': - return U"FIGURE SPACE"; - case U'\u2008': - return U"PUNCTUATION SPACE"; - case U'\u2009': - return U"THIN SPACE"; - case U'\u200A': - return U"HAIR SPACE"; - case U'\u203C': - return U"DOUBLE EXCLAMATION MARK"; - case U'\u2047': - return U"DOUBLE QUOTATION MARK"; - case U'\u2048': - return U"QUESTION EXCLAMATION MARK"; - case U'\u2049': - return U"EXCLAMATION QUESTION MARK"; - /* CURRENCY SYMBOLS: */ - case U'\u20A0': - return U"EURO-CURRENCY SIGN"; - case U'\u20A1': - return U"COLON SIGN"; - case U'\u20A2': - return U"CRUZEIRO SIGN"; - case U'\u20A3': - return U"FRENCH FRANC SIGN"; - case U'\u20A4': - return U"LIRA SIGN"; - case U'\u20A5': - return U"MILL SIGN"; - case U'\u20A6': - return U"NAIRA SIGN"; - case U'\u20A7': - return U"PESETA SIGN"; - case U'\u20A8': - return U"RUPEE SIGN"; - case U'\u20A9': - return U"WON SIGN"; - case U'\u20AA': - return U"NEW SHEQEL SIGN"; - case U'\u20AB': - return U"DONG SIGN"; - case U'\u20AC': - return U"EURO SIGN"; - case U'\u20AD': - return U"KIP SIGN"; - case U'\u20AE': - return U"TUGRIK SIGN"; - case U'\u20AF': - return U"DRACHMA SIGN"; - case U'\u20B0': - return U"GERMAN PENNY SIGN"; - case U'\u20B1': - return U"PESO SIGN"; - case U'\u20B2': - return U"GUARANI SIGN"; - case U'\u20B3': - return U"AUSTRAL SIGN"; - case U'\u20B4': - return U"HRYVNIA SIGN"; - case U'\u20B5': - return U"CEDI SIGN"; - case U'\u20B6': - return U"LIVRE TOURNOIS SIGN"; - case U'\u20B7': - return U"SPESMILO SIGN"; - case U'\u20B8': - return U"TENGE SIGN"; - case U'\u20BA': - return U"TURKISH LIRA SIGN"; - case U'\u20BB': - return U"NORDIC MARK SIGN"; - case U'\u20BC': - return U"MANAT SIGN"; - case U'\u20BD': - return U"RUBLE SYMBOL"; - case U'\u20BE': - return U"LARI SIGN"; - case U'\u20BF': - return U"BITCOIN SIGN"; - /* LETTERLIKE SYMBOLS: */ - case U'\u2100': - return U"ACCOUNT OF"; - case U'\u2101': - return U"ADRESSED TO THE SUBJECT"; - case U'\u2102': - return U"DOUBLE-STRUCK CAPITAL C"; - case U'\u2103': - return U"DEGREE CELSIUS"; - case U'\u2104': - return U"CENTRE LINE SYMBOL"; - case U'\u2105': - return U"CARE OF"; - case U'\u2106': - return U"CADA UNA"; - case U'\u2107': - return U"EULER CONSTANT"; - case U'\u2108': - return U"SCRUPLE"; - case U'\u2109': - return U"DEGREE FAHRENHEIT"; - case U'\u210A': - return U"SCRIPT SMALL G"; - case U'\u210B': - return U"SCRIPT CAPITAL H"; - case U'\u210C': - return U"BLACK-LETTER CAPITAL H"; - case U'\u210D': - return U"DOUBLE-STRUCK CAPITAL H"; - case U'\u210E': - return U"PLANCK CONSTANT"; - case U'\u210F': - return U"PLANCK CONSTANT OVER TWO PI"; - case U'\u2110': - return U"SCRIPT CAPITAL I"; - case U'\u2111': - return U"BLACK-LETTER CAPITAL I"; - case U'\u2112': - return U"SCRIPT CAPITAL L"; - case U'\u2113': - return U"SCRIPT SMALL L"; - case U'\u2114': - return U"L B BAR SYMBOL"; - case U'\u2115': - return U"DOUBLE-STRUCK CAPITAL N"; - case U'\u2116': - return U"NUMERO SIGN"; - case U'\u2117': - return U"SOUND RECORDING COPYRIGHT"; - case U'\u2118': - return U"SCRIPT CAPITAL P"; - case U'\u2119': - return U"DOUBLE-STRUCK CAPITAL P"; - case U'\u211A': - return U"DOUBLE-STRUCK CAPITAL Q"; - case U'\u211B': - return U"SCRIPT CAPITAL R"; - case U'\u211C': - return U"BLACK-LETTER CAPITAL R"; - case U'\u211D': - return U"DOUBLE-STRUCK CAPITAL R"; - case U'\u211E': - return U"PRESCRIPTION TAKE"; - case U'\u211F': - return U"RESPONSE"; - case U'\u2120': - return U"SERVICE MARK"; - case U'\u2121': - return U"TELEPHONE SIGN"; - case U'\u2122': - return U"TRADE MARK SIGN"; - case U'\u2123': - return U"VERSICLE"; - case U'\u2124': - return U"DOUBLE-STRUCK CAPITAL Z"; - case U'\u2125': - return U"OUNCE SIGN"; - case U'\u2126': - return U"OHM SIGN"; - case U'\u2127': - return U"INVERTED OHM SIGN"; - case U'\u2128': - return U"BLACK-LETTER CAPITAL Z"; - case U'\u2129': - return U"TURNED GREEK SMALL LETTER IOTA"; - case U'\u212A': - return U"KELVIN SIGN"; - case U'\u212B': - return U"ANGSTROM SIGN"; - case U'\u212C': - return U"SCRIPT CAPITAL B"; - case U'\u212D': - return U"BLACK-LETTER CAPITAL C"; - case U'\u212E': - return U"ESTIMATED SYMBOL"; - case U'\u212F': - return U"SCRIPT SMALL E"; - case U'\u2130': - return U"SCRIPT CAPITAL E"; - case U'\u2131': - return U"SCRIPT CAPITAL F"; - case U'\u2132': - return U"TURNED CAPITAL F"; - case U'\u2133': - return U"SCRIPT CAPITAL M"; - case U'\u2134': - return U"SCRIPT SMALL O"; - case U'\u2135': - return U"ALEF SYMBOL"; - case U'\u2136': - return U"BET SYMBOL"; - case U'\u2137': - return U"GIMEL SYMBOL"; - case U'\u2138': - return U"DALET SYMBOL"; - case U'\u2139': - return U"INFORMATION SOURCE"; - case U'\u213A': - return U"ROTATED CAPITAL Q"; - case U'\u213B': - return U"FACSIMILE SIGN"; - case U'\u213C': - return U"DOUBLE-STRUCK SMALL PI"; - case U'\u213D': - return U"DOUBLE-STRUCK SMALL GAMMA"; - case U'\u213E': - return U"DOUBLE-STRUCK CAPITAL GAMMA"; - case U'\u213F': - return U"DOUBLE-STRUCK CAPITAL PI"; - case U'\u2140': - return U"DOUBLE-STRUCK N-ARY SUMMATION"; - case U'\u2141': - return U"TURNED SANS-SERIF CAPITAL G"; - case U'\u2142': - return U"TURNED SANS-SERIF CAPITAL L"; - case U'\u2143': - return U"REVERSED SANS-SERIF CAPITAL L"; - case U'\u2144': - return U"TURNED SANS-SERIF CAPITAL Y"; - case U'\u2145': - return U"DOUBLE-STRUCK ITALIC CAPITAL D"; - case U'\u2146': - return U"DOUBLE-STRUCK ITALIC SMALL D"; - case U'\u2147': - return U"DOUBLE-STRUCK ITALIC SMALL E"; - case U'\u2148': - return U"DOUBLE-STRUCK ITALIC SMALL I"; - case U'\u2149': - return U"DOUBLE-STRUCK ITALIC SMALL J"; - case U'\u214A': - return U"PROPERTY LINE"; - case U'\u214B': - return U"TURNED AMPERSAND"; - case U'\u214C': - return U"PER SIGN"; - case U'\u214D': - return U"AKTIESELSKAB"; - case U'\u214E': - return U"TURNED SMALL F"; - case U'\u214F': - return U"SYMBOL FOR SAMARITAN SOURCE"; - /* NUMBER FORMS: */ - case U'\u2150': - return U"VULGAR FRACTION ONE SEVENTH"; - case U'\u2151': - return U"VULGAR FRACTION ONE NINTH"; - case U'\u2152': - return U"VULGAR FRACTION ONE TENTH"; - case U'\u2153': - return U"VULGAR FRACTION ONE THIRD"; - case U'\u2154': - return U"VULGAR FRACTION TWO THIRDS"; - case U'\u2155': - return U"VULGAR FRACTION ONE FIFTH"; - case U'\u2156': - return U"VULGAR FRACTION TWO FIFTHS"; - case U'\u2157': - return U"VULGAR FRACTION THREE FIFTHS"; - case U'\u2158': - return U"VULGAR FRACTION FOUR FIFTHS"; - case U'\u2159': - return U"VULGAR FRACTION ONE SIXTH"; - case U'\u215A': - return U"VULGAR FRACTION FIVE SIXTHS"; - case U'\u215B': - return U"VULGAR FRACTION ONE EIGTH"; - case U'\u215C': - return U"VULGAR FRACTION THREE EIGTHS"; - case U'\u215D': - return U"VULGAR FRACTION FIVE EIGHTS"; - case U'\u215E': - return U"VULGAR FRACTION SEVEN EIGTHS"; - case U'\u215F': - return U"FRACTION NUMERATOR ONE"; - case U'\u2160': - return U"ROMAN NUMERAL ONE"; - case U'\u2161': - return U"ROMAN NUMERAL TWO"; - case U'\u2162': - return U"ROMAN NUMERAL THREE"; - case U'\u2163': - return U"ROMAN NUMERAL FOUR"; - case U'\u2164': - return U"ROMAN NUMERAL FIVE"; - case U'\u2165': - return U"ROMAN NUMERAL SIX"; - case U'\u2166': - return U"ROMAN NUMERAL SEVEN"; - case U'\u2167': - return U"ROMAN NUMERAL EIGHT"; - case U'\u2168': - return U"ROMAN NUMERAL NINE"; - case U'\u2169': - return U"ROMAN NUMERAL TEN"; - case U'\u216A': - return U"ROMAN NUMERAL ELEVEN"; - case U'\u216B': - return U"ROMAN NUMERAL TWELVE"; - case U'\u216C': - return U"ROMAN NUMERAL FIFTY"; - case U'\u216D': - return U"ROMAN NUMERAL ONE HUNDRED"; - case U'\u216E': - return U"ROMAN NUMERAL FIVE HUNDRED"; - case U'\u216F': - return U"ROMAN NUMERAL ONE THOUSAND"; - case U'\u2170': - return U"SMALL ROMAN NUMERAL ONE"; - case U'\u2171': - return U"SMALL ROMAN NUMERAL TWO"; - case U'\u2172': - return U"SMALL ROMAN NUMERAL THREE"; - case U'\u2173': - return U"SMALL ROMAN NUMERAL FOUR"; - case U'\u2174': - return U"SMALL ROMAN NUMERAL FIVE"; - case U'\u2175': - return U"SMALL ROMAN NUMERAL SIX"; - case U'\u2176': - return U"SMALL ROMAN NUMERAL SEVEN"; - case U'\u2177': - return U"SMALL ROMAN NUMERAL EIGHT"; - case U'\u2178': - return U"SMALL ROMAN NUMERAL NINE"; - case U'\u2179': - return U"SMALL ROMAN NUMERAL TEN"; - case U'\u217A': - return U"SMALL ROMAN NUMERAL ELEVEN"; - case U'\u217B': - return U"SMALL ROMAN NUMERAL TWELVE"; - case U'\u217C': - return U"SMALL ROMAN NUMERAL FIFTY"; - case U'\u217D': - return U"SMALL ROMAN NUMERAL ONE HUNDRED"; - case U'\u217E': - return U"SMALL ROMAN NUMERAL FIVE HUNDRED"; - case U'\u217F': - return U"SMALL ROMAN NUMERAL ONE THOUSAND"; - case U'\u2180': - return U"ROMAN NUMERAL ONE THOUSAND C D"; - case U'\u2181': - return U"ROMAN NUMERAL FIVE THOUSAND"; - case U'\u2182': - return U"ROMAN NUMERAL TEN THOUSAND"; - case U'\u2183': - return U"ROMAN NUMERAL REVERSED ONE HUNDRED"; - case U'\u2184': - return U"LATIN SMALL LETTER REVERSED C"; - case U'\u2185': - return U"ROMAN NUMERAL SIX LATE FORM"; - case U'\u2186': - return U"ROMAN NUMERAL FIFTY EARLY FORM"; - case U'\u2187': - return U"ROMAN NUMERAL FIFTY THOUSAND"; - case U'\u2188': - return U"ROMAN NUMERAL ONE HUNDRED THOUSAND"; - case U'\u2189': - return U"VULGAR FRACTION ZERO THIRDS"; - case U'\u218A': - return U"TURNED DIGIT TWO"; - case U'\u218B': - return U"TURNED DIGIT THREE"; - /* MISCELLANEOUS SYMBOLS: */ - case U'\u2630': - return U"TRIGRAM FOR HEAVEN"; - case U'\u2631': - return U"TRIGRAM FOR LAKE"; - case U'\u2632': - return U"TRIGRAM FOR FIRE"; - case U'\u2633': - return U"TRIGRAM FOR THUNDER"; - case U'\u2634': - return U"TRIGRAM FOR WIND"; - case U'\u2635': - return U"TRIGRAM FOR WATER"; - case U'\u2636': - return U"TRIGRAM FOR MOUNTAIN"; - case U'\u2637': - return U"TRIGRAM FOR EARTH"; - case U'\u2638': - return U"WHEEL OF DHARMA"; - case U'\u2639': - return U"WHITE FROWNING FACE"; - case U'\u263A': - return U"WHITE SMILING FACE"; - case U'\u263B': - return U"BLACK SMILING FACE"; - case U'\u263C': - return U"WHITE SUN WITH RAYS"; - case U'\u263D': - return U"FIRST QUARTER MOON"; - case U'\u263E': - return U"LAST QUARTER MOON"; - case U'\u263F': - return U"MERCURY"; - case U'\u2640': - return U"FEMALE SIGN"; - case U'\u2641': - return U"EARTH"; - case U'\u2642': - return U"MALE SIGN"; - case U'\u2643': - return U"JUPITER"; - case U'\u2644': - return U"SATURN"; - case U'\u2645': - return U"URANUS"; - case U'\u2646': - return U"NEPTUNE"; - case U'\u2647': - return U"PLUTO"; - case U'\u2648': - return U"ARIES"; - case U'\u2649': - return U"TAURUS"; - case U'\u264A': - return U"GEMNINI"; - case U'\u264B': - return U"CANCER"; - case U'\u264C': - return U"LEO"; - case U'\u264D': - return U"VIRGO"; - case U'\u264E': - return U"LIBRA"; - case U'\u264F': - return U"SCORPIUS"; - case U'\u2650': - return U"SAGITTARIUS"; - case U'\u2651': - return U"CAPRICORN"; - case U'\u2652': - return U"AQUARIUS"; - case U'\u2653': - return U"PISCES"; - case U'\u2654': - return U"WHITE CHESS KING"; - case U'\u2655': - return U"WHITE CHESS QUEEN"; - case U'\u2656': - return U"WHITE CHESS ROOK"; - case U'\u2657': - return U"WHITE CHESS BISHOP"; - case U'\u2658': - return U"WHITE CHESS KNIGHT"; - case U'\u2659': - return U"WHITE CHESS PAWN"; - case U'\u265A': - return U"BLACK CHESS KING"; - case U'\u265B': - return U"BLACK CHESS QUEEN"; - case U'\u265C': - return U"BLACK CHESS ROOK"; - case U'\u265D': - return U"BLACK CHESS BISHOP"; - case U'\u265E': - return U"BLACK CHESS KNIGHT"; - case U'\u265F': - return U"BLACK CHESS PAWN"; - case U'\u2660': - return U"BLACK SPADE SUIT"; - case U'\u2661': - return U"WHITE HEART SUIT"; - case U'\u2662': - return U"WHITE DIAMOND SUIT"; - case U'\u2663': - return U"BLACK CLUB SUIT"; - case U'\u2664': - return U"WHITE SPADE SUIT"; - case U'\u2665': - return U"BLACK HEART SUIT"; - case U'\u2666': - return U"BLACK DIAMOND SUIT"; - case U'\u2667': - return U"WHITE CLUB SUIT"; - case U'\u2668': - return U"HOT SPRINGS"; - case U'\u2669': - return U"QUARTER NOTE"; - case U'\u266A': - return U"EIGHT NOTE"; - case U'\u266B': - return U"BEAMED EIGTH NOTES"; - case U'\u266C': - return U"BEAMED SIXTEENTH NOTES"; - case U'\u266D': - return U"MUSIC FLAT SIGN"; - case U'\u266E': - return U"MUSIC NEUTRAL SIGN"; - case U'\u266F': - return U"MUSIC SHARP SIGN"; - case U'\u2670': - return U"WEST SYRIAC CROSS"; - case U'\u2671': - return U"EAST SYRIAC CROSS"; - case U'\u2672': - return U"UNIVERSAL RECYCLING SYMBOL"; - case U'\u2673': - return U"RECYCLING SYMBOL FOR TYPE-1 PLASTICS"; - case U'\u2674': - return U"RECYCLING SYMBOL FOR TYPE-2 PLASTICS"; - case U'\u2675': - return U"RECYCLING SYMBOL FOR TYPE-3 PLASTICS"; - case U'\u2676': - return U"RECYCLING SYMBOL FOR TYPE-4 PLASTICS"; - case U'\u2677': - return U"RECYCLING SYMBOL FOR TYPE-5 PLASTICS"; - case U'\u2678': - return U"RECYCLING SYMBOL FOR TYPE-6 PLASTICS"; - case U'\u2679': - return U"RECYCLING SYMBOL FOR TYPE-7 PLASTICS"; - case U'\u267A': - return U"RECYCLING SYMBOL FOR GENERIC MATERIALS"; - case U'\u267B': - return U"BLACK UNIVERSAL RECYCLING SYMBOL"; - case U'\u267C': - return U"RECYCLED PAPER SYMBOL"; - case U'\u267D': - return U"PARTIALLY-RECYCLED PAPER SYMBOL"; - case U'\u267E': - return U"PERMANENT PAPER SIGN"; - case U'\u267F': - return U"WHEELCHAIR SYMBOL"; - case U'\u26B9': - return U"SEXTILE"; - /* DINGBATS: */ - case U'\u271D': - return U"LATIN CROSS"; - case U'\u2721': - return U"STAR OF DAVID"; - /* SUPPLEMENTAL PUNCTUATION: */ - case U'\u2E3B': - return U"THREE-EM DASH"; - /* ARABIC PRESENTATION FORMS-A: */ - case U'\uFDFD': - return U"ARABIC LIGATURE BISMILLAH AL-RAHMAN AR-RAHEEM"; - /* ANCIENT SYMBOLS: */ - case U'\U00010190': - return U"ROMAN SEXTANS SIGN"; - case U'\U00010191': - return U"ROMAN UNCIA SIGN"; - case U'\U00010192': - return U"ROMAN SEMUNCIA SIGN"; - case U'\U00010193': - return U"ROMAN SEXTULA SIGN"; - case U'\U00010194': - return U"ROMAN DIMIDIA SEXTULA SIGN"; - case U'\U00010195': - return U"ROMAN SILIQUA SIGN"; - case U'\U00010196': - return U"ROMAN DENARIUS SIGN"; - case U'\U00010197': - return U"ROMAN QUINARIUS SIGN"; - case U'\U00010198': - return U"ROMAN SESTERTIUS SIGN"; - case U'\U00010199': - return U"ROMAN DUPONDIUS SIGN"; - case U'\U0001019A': - return U"ROMAN AS SIGN"; - case U'\U0001019B': - return U"ROMAN CENTURIAL SIGN"; - case U'\U0001019C': - return U"ASCIA SIGN"; - /* BRAHMI: */ - case U'\U00011066': - return U"BRAHMI DIGIT ZERO"; - case U'\U00011067': - return U"BRAHMI DIGIT ONE"; - case U'\U00011068': - return U"BRAHMI DIGIT TWO"; - case U'\U00011069': - return U"BRAHMI DIGIT THREE"; - case U'\U0001106A': - return U"BRAHMI DIGIT FOUR"; - case U'\U0001106B': - return U"BRAHMI DIGIT FIVE"; - case U'\U0001106C': - return U"BRAHMI DIGIT SIX"; - case U'\U0001106D': - return U"BRAHMI DIGIT SEVEN"; - case U'\U0001106E': - return U"BRAHMI DIGIT EIGHT"; - case U'\U0001106F': - return U"BRAHMI DIGIT NINE"; - /* CUNEIFORM: */ - case U'\U00012031': - return U"CUNEIFORM SIGN AN PLUS NAGA SQUARED"; - /* CUNEIFORM NUMBERS AND PUNCTUATION: */ - case U'\U0001242B': - return U"CUNEIFORM NUMERIC SIGN NINE SHAR2"; - /* EGYPTIAN HIEROGLYPHS: */ - case U'\U000130B8': - return U"EGYPTIAN HIEROGLYPH D052"; - /* COUNTING ROD NUMERALS: */ - case U'\U0001D372': - return U"IDEOGRAPHIC TALLY MARK ONE"; - case U'\U0001D373': - return U"IDEOGRAPHIC TALLY MARK TWO"; - case U'\U0001D374': - return U"IDEOGRAPHIC TALLY MARK THREE"; - case U'\U0001D375': - return U"IDEOGRAPHIC TALLY MARK FOUR"; - case U'\U0001D376': - return U"IDEOGRAPHIC TALLY MARK FIVE"; - case U'\U0001D377': - return U"TALLY MARK ONE"; - case U'\U0001D378': - return U"TALLY MARK FIVE"; - /* ENCLOSED ALPHANUMERIC SUPPLEMENT: */ - case U'\U0001F10D': - return U"CIRCLED ZERO WITH SLASH"; - case U'\U0001F10E': - return U"CIRCLED ANTICKLOCKWISE ARROW"; - case U'\U0001F10F': - return U"CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH"; - case U'\U0001F12F': - return U"COPYLEFT SYMBOL"; - case U'\U0001F16D': - return U"CIRCLED CC"; - case U'\U0001F16E': - return U"CIRCLED C WITH OVERLAID BACKSLASH"; - case U'\U0001F16F': - return U"CIRCLED HUMAN FIGURE"; - /* EMOTICONS: */ - case U'\U0001F600': - return U"GRINNING FACE"; - case U'\U0001F601': - return U"GRINNING FACE WITH SMIRKING EYES"; - case U'\U0001F602': - return U"FACE WITH TEARS OF JOY"; - case U'\U0001F603': - return U"SMILING FACE WITH OPEN MOUTH"; - case U'\U0001F604': - return U"SMILING FACE WITH OPEN MOUTH AND SMILING EYES"; - case U'\U0001F605': - return U"SMILING FACE WITH OPEN MOUTH AND COULD SWEAT"; - case U'\U0001F606': - return U"SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES"; - case U'\U0001F607': - return U"SMILING FACE WITH HALO"; - case U'\U0001F608': - return U"SMILING FACE WITH HORNS"; - case U'\U0001F609': - return U"WINKING FACE"; - case U'\U0001F60A': - return U"SMILING FACE WITH SMILING EYES"; - case U'\U0001F60B': - return U"FACE SAVOURING DELICIOUS FOOD"; - case U'\U0001F60C': - return U"RELIEVED FACE"; - case U'\U0001F60D': - return U"SMILLING FACE HEART-SHAPED EYES"; - case U'\U0001F60E': - return U"SMILLING FACE WITH SUNGLASSES"; - case U'\U0001F60F': - return U"SMIRKING FACE"; - case U'\U0001F610': - return U"NEUTRAL FACE"; - case U'\U0001F611': - return U"EXPRESSIONLESS FACE"; - case U'\U0001F612': - return U"UNAMUSED FACE"; - case U'\U0001F613': - return U"FACE WITH COLD SWEAT"; - case U'\U0001F614': - return U"PENSIVE FACE"; - case U'\U0001F615': - return U"CONFUSED FACE"; - case U'\U0001F616': - return U"CONFOUNDED FACE"; - case U'\U0001F617': - return U"KISSING FACE"; - case U'\U0001F618': - return U"FACE THROWING A KISS"; - case U'\U0001F619': - return U"KISSING FACE WITH SMILLING EYES"; - case U'\U0001F61A': - return U"KISSING FACE WITH CLOSED EYES"; - case U'\U0001F61B': - return U"FACE WITH STUCK-OUT TONGUE"; - case U'\U0001F61C': - return U"FACE WITH STUCK-OUT TONGUE AND WINKING EYE"; - case U'\U0001F61D': - return U"FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES"; - case U'\U0001F61E': - return U"DISSAPOINTED FACE"; - case U'\U0001F61F': - return U"WORRIED FACE"; - case U'\U0001F620': - return U"ANGRY FACE"; - case U'\U0001F621': - return U"POUTING FACE"; - case U'\U0001F622': - return U"CRYING FACE"; - case U'\U0001F623': - return U"PERSEVERING FACE"; - case U'\U0001F624': - return U"FACE WITH LOOK OF TRIUMPH"; - case U'\U0001F625': - return U"DISSAPOINTED BUT RELIEVED FACE"; - case U'\U0001F626': - return U"FROWNING FACE WITH OPEN MOUTH"; - case U'\U0001F627': - return U"ANGUISHED FACE"; - case U'\U0001F628': - return U"FEARFUL FACE"; - case U'\U0001F629': - return U"WEARY FACE"; - case U'\U0001F62A': - return U"SLEEPY FACE"; - case U'\U0001F62B': - return U"TIRED FACE"; - case U'\U0001F62C': - return U"GRIMACING FACE"; - case U'\U0001F62D': - return U"LOUDLY CRYING FACE"; - case U'\U0001F62E': - return U"FACE WITH OPEN MOUTH"; - case U'\U0001F62F': - return U"HUSHED FACE"; - case U'\U0001F630': - return U"FACE WITH OPEN MOUTH AND COLD SWEAT"; - case U'\U0001F631': - return U"FACE SCREAMING IN FEAR"; - case U'\U0001F632': - return U"ASTONISHED FACE"; - case U'\U0001F633': - return U"FLUSHED FACE"; - case U'\U0001F634': - return U"SLEEPING FACE"; - case U'\U0001F635': - return U"DIZZY FACE"; - case U'\U0001F636': - return U"FACE WITHOUT MOUTH"; - case U'\U0001F637': - return U"FACE WITH MEDICAL MASK"; - case U'\U0001F641': - return U"SLIGHTLY FROWNING FACE"; - case U'\U0001F642': - return U"SLIGHTLY SMILING FACE"; - case U'\U0001F643': - return U"UPSIDE-DOWN FACE"; - case U'\U0001F644': - return U"FACE WITH ROLLING EYES"; - /* ORNAMENTAL DINGBATS: */ - case U'\U0001F670': - return U"SCRIPT LIGATURE ET ORNAMENT"; - case U'\U0001F671': - return U"HEAVY SCRIPT LIGATURE ET ORNAMENT"; - case U'\U0001F672': - return U"LIGATURE OPEN ET ORNAMENT"; - case U'\U0001F673': - return U"HEAVY LIGATURE OPEN ET ORNAMENT"; - case U'\U0001F674': - return U"HEAVY AMPERSAND ORNAMENT"; - case U'\U0001F675': - return U"SWASH AMPERSAND ORNAMENT"; - case U'\U0001F676': - return U"SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT"; - case U'\U0001F677': - return U"SANS-SERIF HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT"; - case U'\U0001F678': - return U"SANS-SERIF HEAVY LOW DOUBLE QUOTATION MARK ORNAMENT"; - case U'\U0001F679': - return U"HEAVY INTERROBANG ORNAMENT"; - case U'\U0001F67A': - return U"SANS-SERIF INTERROBANG ORNAMENT"; - case U'\U0001F67B': - return U"HEAVY SANS-SERIF INTERROBANG ORNAMENT"; - case U'\U0001F67C': - return U"VERY HEAVY SOLIDUS"; - case U'\U0001F67D': - return U"VERY HEAVY REVERSE SOLIDUS"; - case U'\U0001F67E': - return U"CHECKER BOARD"; - case U'\U0001F67F': - return U"REVERSE CHECKER BOARD"; - /* CJK UNIFIED IDEOGRAPHS EXTENSION G: */ - case U'\U0003106C': - return U"CJK UNIFIED IDEOGRAPH-3106C"; - /* TAGS: */ - case U'\U000E0001': - return U"LANGUAGE TAG"; - case U'\U000E0020': - return U"TAG SPACE"; - case U'\U000E0021': - return U"TAG EXCLAMATION MARK"; - case U'\U000E0022': - return U"TAG QUOTATION MARK"; - case U'\U000E0023': - return U"TAG NUMBER SIGN"; - case U'\U000E0024': - return U"TAG DOLLAR SIGN"; - case U'\U000E0025': - return U"TAG PERCENT SIGN"; - case U'\U000E0026': - return U"TAG AMPERSAND"; - case U'\U000E0027': - return U"TAG APOSTROPHE"; - case U'\U000E0028': - return U"TAG LEFT PARANTHESIS"; - case U'\U000E0029': - return U"TAG RIGHT PARANTHESIS"; - case U'\U000E002A': - return U"TAG ASTERISK"; - case U'\U000E002B': - return U"TAG PLUS SIGN"; - case U'\U000E002C': - return U"TAG COMMA"; - case U'\U000E002D': - return U"TAG HYPHEN-MINUS"; - case U'\U000E002E': - return U"TAG FULL STOP"; - case U'\U000E002F': - return U"TAG SOLIDUS"; - case U'\U000E0030': - return U"TAG DIGIT ZERO"; - case U'\U000E0031': - return U"TAG DIGIT ONE"; - case U'\U000E0032': - return U"TAG DIGIT TWO"; - case U'\U000E0033': - return U"TAG DIGIT THREE"; - case U'\U000E0034': - return U"TAG DIGIT FOUR"; - case U'\U000E0035': - return U"TAG DIGIT FIVE"; - case U'\U000E0036': - return U"TAG DIGIT SIX"; - case U'\U000E0037': - return U"TAG DIGIT SEVEN"; - case U'\U000E0038': - return U"TAG DIGIT EIGHT"; - case U'\U000E0039': - return U"TAG DIGIT NINE"; - case U'\U000E003A': - return U"TAG COLON"; - case U'\U000E003B': - return U"TAG SEMICOLON"; - case U'\U000E003C': - return U"TAG LESS-THAN SIGN"; - case U'\U000E003D': - return U"TAG EQUALS SIGN"; - case U'\U000E003E': - return U"TAG GREATER-THAN SIGN"; - case U'\U000E003F': - return U"TAG QUESTION MARK"; - case U'\U000E0040': - return U"TAG COMMERCIAL AT"; - case U'\U000E0041': - return U"TAG LATIN CAPITAL LETTER A"; - case U'\U000E0042': - return U"TAG LATIN CAPITAL LETTER B"; - case U'\U000E0043': - return U"TAG LATIN CAPITAL LETTER C"; - case U'\U000E0044': - return U"TAG LATIN CAPITAL LETTER D"; - case U'\U000E0045': - return U"TAG LATIN CAPITAL LETTER E"; - case U'\U000E0046': - return U"TAG LATIN CAPITAL LETTER F"; - case U'\U000E0047': - return U"TAG LATIN CAPITAL LETTER G"; - case U'\U000E0048': - return U"TAG LATIN CAPITAL LETTER H"; - case U'\U000E0049': - return U"TAG LATIN CAPITAL LETTER I"; - case U'\U000E004A': - return U"TAG LATIN CAPITAL LETTER J"; - case U'\U000E004B': - return U"TAG LATIN CAPITAL LETTER K"; - case U'\U000E004C': - return U"TAG LATIN CAPITAL LETTER L"; - case U'\U000E004D': - return U"TAG LATIN CAPITAL LETTER M"; - case U'\U000E004E': - return U"TAG LATIN CAPITAL LETTER N"; - case U'\U000E004F': - return U"TAG LATIN CAPITAL LETTER O"; - case U'\U000E0050': - return U"TAG LATIN CAPITAL LETTER P"; - case U'\U000E0051': - return U"TAG LATIN CAPITAL LETTER Q"; - case U'\U000E0052': - return U"TAG LATIN CAPITAL LETTER R"; - case U'\U000E0053': - return U"TAG LATIN CAPITAL LETTER S"; - case U'\U000E0054': - return U"TAG LATIN CAPITAL LETTER T"; - case U'\U000E0055': - return U"TAG LATIN CAPITAL LETTER U"; - case U'\U000E0056': - return U"TAG LATIN CAPITAL LETTER V"; - case U'\U000E0057': - return U"TAG LATIN CAPITAL LETTER W"; - case U'\U000E0058': - return U"TAG LATIN CAPITAL LETTER X"; - case U'\U000E0059': - return U"TAG LATIN CAPITAL LETTER Y"; - case U'\U000E005A': - return U"TAG LATIN CAPITAL LETTER Z"; - case U'\U000E005B': - return U"TAG LEFT SQUARE BRACKET"; - case U'\U000E005C': - return U"TAG REVERSE SOLIDUS"; - case U'\U000E005D': - return U"TAG RIGHT SQUARE BRACKET"; - case U'\U000E005E': - return U"TAG CIRCUMFLEX ACCENT"; - case U'\U000E005F': - return U"TAG LOW LINE"; - case U'\U000E0060': - return U"TAG GRAVE ACCENT"; - case U'\U000E0061': - return U"TAG LATIN SMALL LETTER A"; - case U'\U000E0062': - return U"TAG LATIN SMALL LETTER B"; - case U'\U000E0063': - return U"TAG LATIN SMALL LETTER C"; - case U'\U000E0064': - return U"TAG LATIN SMALL LETTER D"; - case U'\U000E0065': - return U"TAG LATIN SMALL LETTER E"; - case U'\U000E0066': - return U"TAG LATIN SMALL LETTER F"; - case U'\U000E0067': - return U"TAG LATIN SMALL LETTER G"; - case U'\U000E0068': - return U"TAG LATIN SMALL LETTER H"; - case U'\U000E0069': - return U"TAG LATIN SMALL LETTER I"; - case U'\U000E006A': - return U"TAG LATIN SMALL LETTER J"; - case U'\U000E006B': - return U"TAG LATIN SMALL LETTER K"; - case U'\U000E006C': - return U"TAG LATIN SMALL LETTER L"; - case U'\U000E006D': - return U"TAG LATIN SMALL LETTER M"; - case U'\U000E006E': - return U"TAG LATIN SMALL LETTER N"; - case U'\U000E006F': - return U"TAG LATIN SMALL LETTER O"; - case U'\U000E0070': - return U"TAG LATIN SMALL LETTER P"; - case U'\U000E0071': - return U"TAG LATIN SMALL LETTER Q"; - case U'\U000E0072': - return U"TAG LATIN SMALL LETTER R"; - case U'\U000E0073': - return U"TAG LATIN SMALL LETTER S"; - case U'\U000E0074': - return U"TAG LATIN SMALL LETTER T"; - case U'\U000E0075': - return U"TAG LATIN SMALL LETTER U"; - case U'\U000E0076': - return U"TAG LATIN SMALL LETTER V"; - case U'\U000E0077': - return U"TAG LATIN SMALL LETTER W"; - case U'\U000E0078': - return U"TAG LATIN SMALL LETTER X"; - case U'\U000E0079': - return U"TAG LATIN SMALL LETTER Y"; - case U'\U000E007A': - return U"TAG LATIN SMALL LETTER Z"; - case U'\U000E007B': - return U"TAG LEFT CURLY BRACKET"; - case U'\U000E007C': - return U"TAG VERTICAL LINE"; - case U'\U000E007D': - return U"TAG RIGHT CURLY BRACKET"; - case U'\U000E007E': - return U"TAG TILDE"; - case U'\U000E007F': - return U"CANCEL TAG"; - } -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/u8c.h b/u8c/include/u8c/u8c.h new file mode 100644 index 0000000..94791e3 --- /dev/null +++ b/u8c/include/u8c/u8c.h @@ -0,0 +1,91 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +/* + Greater Header Dependencies: + + u8c →┬→ character + └→ format +*/ + +#ifndef u8c_HEADER_U8C +#define u8c_HEADER_U8C + +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +#if defined(__GNUC__) || defined(__clang__) + +#define u8c_RESTRICT __restrict__ + +#elif defined(_MSC_VER) + +#define u8c_RESTRICT __restrict + +#else + +#define u8c_RESTRICT + +#endif +#else + +#define u8c_RESTRICT restrict + +#endif + +#ifdef __GNUC__ + +#define u8c_ALWAYS_INLINE __attribute__ ((__always_inline__)) +#define u8c_NO_DISCARD __attribute__ ((__warn_unused_result__)) +#define u8c_NO_THROW __attribute__ ((__nothrow__)) +#define u8c_UNSEQUENCED __attribute__ ((__const__)) + +#elif __STDC_VERSION__ >= 202311 + +#define u8c_ALWAYS_INLINE +#define u8c_NO_DISCARD [[nodiscard]] +#define u8c_NO_THROW +#define u8c_UNSEQUENCED [[unsequenced]] + +#elif __cplusplus >= 201703 + +#define u8c_ALWAYS_INLINE +#define u8c_NO_DISCARD [[nodiscard]] +#define u8c_NO_THROW +#define u8c_UNSEQUENCED + +#else + +#define u8c_ALWAYS_INLINE +#define u8c_NO_DISCARD +#define u8c_NO_THROW +#define u8c_UNSEQUENCED + +#endif + +#define u8c_VERSION ((uint_least32_t)+UINT32_C(0x1D)) + +#define u8c_MAXIMUM_CODE_POINT ((uint_least32_t)+UINT32_C(0x0010FFFF)) + +#endif diff --git a/u8c/include/u8c/utf b/u8c/include/u8c/utf deleted file mode 100644 index 15bdc44..0000000 --- a/u8c/include/u8c/utf +++ /dev/null @@ -1,51 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_xtmbVPu5vGoJz4tw) -#define u8c_key_xtmbVPu5vGoJz4tw - -#include <u8c/arr> - -namespace u8c { - template<u8c::utf T,u8c::utf T0> [[nodiscard]] constexpr auto cnv( T0 const * begin,T0 const * end) -> u8c::arr<T>; - [[nodiscard,u8c_attr_const]] constexpr auto isalnum( char32_t chr) -> bool; - [[nodiscard,u8c_attr_const]] constexpr auto isalpha( char32_t chr) -> bool; - [[nodiscard,u8c_attr_const]] constexpr auto iscntrl( char32_t chr) -> bool; - [[nodiscard,u8c_attr_const]] constexpr auto isdigit( char32_t chr) -> bool; - [[nodiscard,u8c_attr_const]] constexpr auto islower( char32_t chr) -> bool; - [[nodiscard,u8c_attr_const]] constexpr auto ispunct( char32_t chr) -> bool; - [[nodiscard,u8c_attr_const]] constexpr auto isspace( char32_t chr) -> bool; - [[nodiscard,u8c_attr_const]] constexpr auto issurro( char32_t chr) -> bool; - [[nodiscard,u8c_attr_const]] constexpr auto isupper( char32_t chr) -> bool; - [[nodiscard,u8c_attr_const]] constexpr auto isxdigit(char32_t chr) -> bool; -} - -#include <u8c/utf.d/cnv> -#include <u8c/utf.d/isalnum> -#include <u8c/utf.d/isalpha> -#include <u8c/utf.d/iscntrl> -#include <u8c/utf.d/isdigit> -#include <u8c/utf.d/islower> -#include <u8c/utf.d/ispunct> -#include <u8c/utf.d/isspace> -#include <u8c/utf.d/issurro> -#include <u8c/utf.d/isupper> -#include <u8c/utf.d/isxdigit> - -#endif diff --git a/u8c/include/u8c/utf.d/cnv b/u8c/include/u8c/utf.d/cnv deleted file mode 100644 index 95b66e9..0000000 --- a/u8c/include/u8c/utf.d/cnv +++ /dev/null @@ -1,116 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_nVkgRbXZfcq3BG8J) -#define u8c_key_nVkgRbXZfcq3BG8J - -#include <algorithm> /* std::copy */ -#include <cstddef> /* u8c::size */ -#include <stdexcept> /* std::invalid_argument, std::out_of_range */ -#include <type_traits> /* std::is_same_v */ -#include <vector> /* std::vector */ - -template<u8c::utf T,u8c::utf T0> constexpr auto u8c::cnv(T0 const * const u8c_restr _begin,T0 const * const u8c_restr _end) -> u8c::arr<T> { - if (_begin == nullptr || _end == nullptr) [[unlikely]] { - throw std::invalid_argument("Null pointer provided as parameter."); - } - u8c::arr<T0> in(_begin,_end); - u8c::arr<T> out; - if constexpr (std::is_same_v<T0,T>) { - out.alloc(static_cast<u8c::size>(_end - _begin)); - std::copy(_begin,_end,out.begin()); - return out; - } - else { - if constexpr (std::is_same_v<T0,char16_t>) { - } - else if constexpr (std::is_same_v<T0,char32_t>) { - for (auto const tmp : in) { - if constexpr (std::is_same_v<T,char16_t>) { - if (tmp >= u8c_uint32c(0x10000)) { /* Two hextets. */ - char16_t const tmp0 = tmp - u8c_uint16c(0x10000); - out.app((tmp0 / u8c_uint16c(0x400) + u8c_uint16c(0xD800))); - out.app((tmp0 % u8c_uint16c(0x400) + u8c_uint16c(0xDC00))); - } - else { - /* One hextet. */ - out.app((static_cast<char16_t>(tmp))); - } - } - else { - if (tmp >= u8c_uint32c(0x10000)) { /* Four octets. */ - out.app((u8c_ubytec(0b11110000) + static_cast<char8_t>(tmp >> u8c_uint32c(0x12) & u8c_uint32c(0b00000111)))); - out.app((u8c_ubytec(0b10000000) + static_cast<char8_t>(tmp >> u8c_uint32c(0xC) & u8c_uint32c(0b00111111)))); - out.app((u8c_ubytec(0b10000000) + static_cast<char8_t>(tmp >> u8c_uint32c(0x6) & u8c_uint32c(0b00111111)))); - out.app((u8c_ubytec(0b10000000) + static_cast<char8_t>(tmp & u8c_uint32c(0b00111111)))); - } - else if (tmp >= U'\u0800') { /* Three octets. */ - out.app((u8c_ubytec(0xE0) + static_cast<char8_t>(tmp >> u8c_uint32c(0xC) & u8c_uint32c(0b00001111)))); - out.app((u8c_ubytec(0x80) + static_cast<char8_t>(tmp >> u8c_uint32c(0x6) & u8c_uint32c(0b00111111)))); - out.app((u8c_ubytec(0x80) + static_cast<char8_t>(tmp & u8c_uint32c(0b00111111)))); - } - else if (tmp >= U'\u0080') { /* Two octets. */ - out.app((u8c_ubytec(0xC0) + static_cast<char8_t>(tmp >> u8c_uint32c(0x6) & u8c_uint32c(0b00111111)))); - out.app((u8c_ubytec(0x80) + static_cast<char8_t>(tmp & u8c_uint32c(0b00111111)))); - } - else { - /* One octet. */ - out.app(static_cast<char8_t>(tmp)); - } - } - } - return out; - } - else { - if constexpr (std::is_same_v<T,char16_t>) { - } - else { - for (u8c::size n = 0x0uz;n < in.sz();n += 0x1uz) { - auto const tmp = in[n]; - auto chr = U'\u0000'; - if (tmp >= u8c_ubytec(0b11110000)) { /* Four octets. */ - chr = (tmp ^ u8c_uint32c(0b11110000)) << u8c_uint32c(0x12); - chr += (in[n + 0x1uz] ^ u8c_uint32c(0b10000000)) << u8c_uint32c(0xC); - chr += (in[n + 0x2uz] ^ u8c_uint32c(0b10000000)) << u8c_uint32c(0x6); - chr += in[n + 0x3uz] ^ u8c_uint32c(0b10000000); - n += 0x3uz; - } - else if (tmp >= u8c_ubytec(0b11100000)) { /* Three octets. */ - chr = (tmp ^ u8c_uint32c(0b11100000)) << u8c_uint32c(0xC); - chr += (in[n + 0x1uz] ^ u8c_uint32c(0b10000000)) << u8c_uint32c(0x6); - chr += in[n + 0x2uz] ^ u8c_uint32c(0b10000000); - n += 0x2uz; - } - else if (tmp >= u8c_ubytec(0b11000000)) { /* Two octets. */ - chr = (tmp ^ u8c_uint32c(0b11000000)) << u8c_uint32c(0x6); - chr += in[n + 0x1uz] ^ u8c_uint32c(0b10000000); - n += 0x1uz; - } - else { - /* One octet. */ - chr = tmp; - } - out.app(chr); - } - } - } - } -} - -#endif diff --git a/u8c/include/u8c/utf.d/isalnum b/u8c/include/u8c/utf.d/isalnum deleted file mode 100644 index 0960e57..0000000 --- a/u8c/include/u8c/utf.d/isalnum +++ /dev/null @@ -1,190 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_C8fUI0HFQi6fZDUx) -#define u8c_key_C8fUI0HFQi6fZDUx - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::isalpha(char32_t const _chr) -> bool { - if(_chr > u8c::unimax) [[unlikely]] { - throw std::domain_error("Unicode codepoint too big!"); - } - if(u8c::islower(_chr) || u8c::isupper(_chr)) [[unlikely]] { - return true; - } - switch(_chr) { - [[likely]] default: - return false; - case U'\u0297': /* LATIN LETTER GLOTTAL STOP */ - [[fallthrough]]; - case U'\u16A0': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16A1': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16A2': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16A3': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16A4': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16A5': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16A6': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16A7': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16A8': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16A9': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16AA': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16AB': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16AC': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16AD': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16AE': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16AF': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16B0': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16B1': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16B2': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16B3': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16B4': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16B5': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16B6': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16B7': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16B8': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16B9': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16BA': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16BB': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16BC': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16BD': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16BE': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16BF': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16C0': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16C1': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16C2': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16C3': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16C4': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16C5': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16C6': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16C7': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16C8': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16C9': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16CA': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16CB': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16CC': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16CD': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16CE': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16CF': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16D0': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16D1': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16D2': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16D3': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16D4': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16D5': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16D6': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16D7': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16D8': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16D9': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16DA': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16DB': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16DC': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16DD': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16DE': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16DF': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16E0': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16E1': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16E2': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16E3': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16E4': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16E5': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16E6': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16E7': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16E8': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16E9': /* RUNIC LETTER FEHU FEOH FE F */ - [[fallthrough]]; - case U'\u16EA': /* RUNIC LETTER FEHU FEOH FE F */ - return true; - } -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isalpha b/u8c/include/u8c/utf.d/isalpha deleted file mode 100644 index 3a0bb9d..0000000 --- a/u8c/include/u8c/utf.d/isalpha +++ /dev/null @@ -1,29 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_YnTiCcefC1wLH21w) -#define u8c_key_YnTiCcefC1wLH21w - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::isalnum(char32_t const _chr) -> bool { - return u8c::isalpha(_chr) || u8c::isdigit(_chr); -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/iscntrl b/u8c/include/u8c/utf.d/iscntrl deleted file mode 100644 index 083bf52..0000000 --- a/u8c/include/u8c/utf.d/iscntrl +++ /dev/null @@ -1,180 +0,0 @@ -// --C++-- -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_KkxufRi4dPQDAbxV) -#define u8c_key_KkxufRi4dPQDAbxV - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::iscntrl(char32_t const _chr) -> bool { - if(_chr > u8c::unimax) [[unlikely]] { - throw std::domain_error("Unicode codepoint too big!"); - } - switch(_chr) { - [[likely]] default: - return false; - case U'\u0000': /* NULL */ - [[fallthrough]]; - case U'\u0001': /* START OF HEADING */ - [[fallthrough]]; - case U'\u0002': /* START OF TEXT */ - [[fallthrough]]; - case U'\u0003': /* END OF TEXT */ - [[fallthrough]]; - case U'\u0004': /* END OF TRANSMISSION */ - [[fallthrough]]; - case U'\u0005': /* ENQUIRY */ - [[fallthrough]]; - case U'\u0006': /* ACKNOWLEDGE */ - [[fallthrough]]; - case U'\a': /* BELL */ - [[fallthrough]]; - case U'\b': /* BACKSPACE */ - [[fallthrough]]; - case U'\t': /* HORIZONTAL TABULATION */ - [[fallthrough]]; - case U'\n': /* NEW LINE */ - [[fallthrough]]; - case U'\v': /* VERTICAL TABULATION */ - [[fallthrough]]; - case U'\f': /* FORM FEED */ - [[fallthrough]]; - case U'\r': /* CARRIAGE RETURN */ - [[fallthrough]]; - case U'\u000E': /* SHIFT OUT */ - [[fallthrough]]; - case U'\u000F': /* SHIFT IN */ - [[fallthrough]]; - case U'\x10': /* DATA LINK ESCAPE */ - [[fallthrough]]; - case U'\x11': /* DEVICE CONTROL ONE */ - [[fallthrough]]; - case U'\x12': /* DEVICE CONTROL TWO */ - [[fallthrough]]; - case U'\x13': /* DEVICE CONTROL THREE */ - [[fallthrough]]; - case U'\x14': /* DEVICE CONTROL FOUR */ - [[fallthrough]]; - case U'\x15': /* NEGATIVE ACKNOWLEDGE */ - [[fallthrough]]; - case U'\x16': /* SYNCHRONOUS IDLE */ - [[fallthrough]]; - case U'\x17': /* END OF TRANSMISSION BLOCK */ - [[fallthrough]]; - case U'\x18': /* CANCEL */ - [[fallthrough]]; - case U'\x19': /* END OF MEDIUM */ - [[fallthrough]]; - case U'\x1A': /* SUBSTITUTE */ - [[fallthrough]]; - case U'\u001B': /* ESCAPE */ - [[fallthrough]]; - case U'\x1C': /* FILE SEPERATOR */ - [[fallthrough]]; - case U'\x1D': /* GROUP SEPERATOR */ - [[fallthrough]]; - case U'\x1E': /* RECORD SEPERATOR */ - [[fallthrough]]; - case U'\x1F': /* UNIT SEPERATOR */ - [[fallthrough]]; - case U'\x7F': /* DELETE */ - [[fallthrough]]; - case U'\x80': /* <CONTROL> */ - [[fallthrough]]; - case U'\x81': /* <CONTROL */ - [[fallthrough]]; - case U'\x82': /* BREAK PERMITTED HERE */ - [[fallthrough]]; - case U'\x83': /* NO BREAK HERE */ - [[fallthrough]]; - case U'\x84': /* INDEX */ - [[fallthrough]]; - case U'\x85': /* NEXT LINE */ - [[fallthrough]]; - case U'\x86': /* START OF SELECTED AREA */ - [[fallthrough]]; - case U'\x87': /* END OF SELECTED AREA */ - [[fallthrough]]; - case U'\x88': /* CHARACTER TABULATION SET */ - [[fallthrough]]; - case U'\x89': /* CHARACTER TABULATION SET WITH JUSTIFICATION */ - [[fallthrough]]; - case U'\x8A': /* LINE TABULATION SET */ - [[fallthrough]]; - case U'\x8B': /* PARTIAL LINE FORWARD */ - [[fallthrough]]; - case U'\x8C': /* PARTIAL LINE BACKWARD */ - [[fallthrough]]; - case U'\x8D': /* REVERSE LINE FEED */ - [[fallthrough]]; - case U'\x8E': /* SINGLE SHIFT TWO */ - [[fallthrough]]; - case U'\x8F': /* SINGLE SHIFT THREE */ - [[fallthrough]]; - case U'\x90': /* DEVICE CONTROL STRING */ - [[fallthrough]]; - case U'\x91': /* PRIVATE USE ONE */ - [[fallthrough]]; - case U'\x92': /* PRIVATE USE TWO */ - [[fallthrough]]; - case U'\x93': /* SET TRANSMIT STATE */ - [[fallthrough]]; - case U'\x94': /* CANCEL CHARACTER */ - [[fallthrough]]; - case U'\x95': /* MESSAGE WAITING */ - [[fallthrough]]; - case U'\x96': /* START OF GUARDED AREA */ - [[fallthrough]]; - case U'\x97': /* END OF GUARDED AREA */ - [[fallthrough]]; - case U'\x98': /* START OF STRING */ - [[fallthrough]]; - case U'\x99': /* <CONTROL> */ - [[fallthrough]]; - case U'\x9A': /* SINGLE CHARACTER INTRODUCER */ - [[fallthrough]]; - case U'\x9B': /* CONTROL SEQUENCE INTRODUCER */ - [[fallthrough]]; - case U'\x9C': /* STRING TERMINATOR */ - [[fallthrough]]; - case U'\x9D': /* OPERATING SYSTEM COMMAND */ - [[fallthrough]]; - case U'\x9E': /* PRIVACY MESSAGE */ - [[fallthrough]]; - case U'\x9F': /* APPLICATION PROGRAM COMMAND */ - [[fallthrough]]; - case U'\xA0': /* NO-BREAK SPACE */ - [[fallthrough]]; - case U'\u2028': /* LINE SEPERATOR */ - [[fallthrough]]; - case U'\u2029': /* PARAGRAPH SEPERATOR */ - [[fallthrough]]; - case U'\u202D': /* LEFT-TO-RIGHT OVERRIDE */ - [[fallthrough]]; - case U'\u202E': /* RIGHT-TO-LEFT OVERRIDE */ - [[fallthrough]]; - case U'\u2068': /* FIRST STRONG ISOLATE */ - [[fallthrough]]; - case U'\u2069': /* POP DIRECTIONAL ISOLATE */ - return true; - } -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isdigit b/u8c/include/u8c/utf.d/isdigit deleted file mode 100644 index 84179e6..0000000 --- a/u8c/include/u8c/utf.d/isdigit +++ /dev/null @@ -1,59 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_8r8RPCDLujofbg3k) -#define u8c_key_8r8RPCDLujofbg3k - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::isdigit(char32_t const _chr) -> bool { - if(_chr > u8c::unimax) [[unlikely]] { - throw std::domain_error("Unicode codepoint too big!"); - } - switch(_chr) { - [[likely]] default: - return false; - case U'\u0030': /* DIGIT ZERO */ - [[fallthrough]]; - case U'\u0031': /* DIGIT ONE */ - [[fallthrough]]; - case U'\u0032': /* DIGIT TWO */ - [[fallthrough]]; - case U'\u0033': /* DIGIT THREE */ - [[fallthrough]]; - case U'\u0034': /* DIGIT FOUR */ - [[fallthrough]]; - case U'\u0035': /* DIGIT FIVE */ - [[fallthrough]]; - case U'\u0036': /* DIGIT SIX */ - [[fallthrough]]; - case U'\u0037': /* DIGIT SEVEN */ - [[fallthrough]]; - case U'\u0038': /* DIGIT EIGHT */ - [[fallthrough]]; - case U'\u0039': /* DIGIT NINE */ - [[fallthrough]]; - case U'\u218A': /* TURNED DIGIT TWO */ - [[fallthrough]]; - case U'\u218B': /* TURNED DIGIT THREE */ - return true; - } -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/islower b/u8c/include/u8c/utf.d/islower deleted file mode 100644 index 9fd40e6..0000000 --- a/u8c/include/u8c/utf.d/islower +++ /dev/null @@ -1,345 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_AtxlGqMDj7uXYyKb) -#define u8c_key_AtxlGqMDj7uXYyKb - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::islower(char32_t const _chr) -> bool { - if(_chr > u8c::unimax) [[unlikely]] { - throw std::domain_error("Unicode codepoint too big!"); - } - switch(_chr) { - [[likely]] default: - return false; - case U'\u0061': /* LATIN SMALL LETTER A */ - [[fallthrough]]; - case U'\u0062': /* LATIN SMALL LETTER B */ - [[fallthrough]]; - case U'\u0063': /* LATIN SMALL LETTER C */ - [[fallthrough]]; - case U'\u0064': /* LATIN SMALL LETTER D */ - [[fallthrough]]; - case U'\u0065': /* LATIN SMALL LETTER E */ - [[fallthrough]]; - case U'\u0066': /* LATIN SMALL LETTER F */ - [[fallthrough]]; - case U'\u0067': /* LATIN SMALL LETTER G */ - [[fallthrough]]; - case U'\u0068': /* LATIN SMALL LETTER H */ - [[fallthrough]]; - case U'\u0069': /* LATIN SMALL LETTER I */ - [[fallthrough]]; - case U'\u006A': /* LATIN SMALL LETTER J */ - [[fallthrough]]; - case U'\u006B': /* LATIN SMALL LETTER K */ - [[fallthrough]]; - case U'\u006C': /* LATIN SMALL LETTER L */ - [[fallthrough]]; - case U'\u006D': /* LATIN SMALL LETTER M */ - [[fallthrough]]; - case U'\u006E': /* LATIN SMALL LETTER N */ - [[fallthrough]]; - case U'\u006F': /* LATIN SMALL LETTER O */ - [[fallthrough]]; - case U'\u0070': /* LATIN SMALL LETTER P */ - [[fallthrough]]; - case U'\u0071': /* LATIN SMALL LETTER Q */ - [[fallthrough]]; - case U'\u0072': /* LATIN SMALL LETTER R */ - [[fallthrough]]; - case U'\u0073': /* LATIN SMALL LETTER S */ - [[fallthrough]]; - case U'\u0074': /* LATIN SMALL LETTER T */ - [[fallthrough]]; - case U'\u0075': /* LATIN SMALL LETTER U */ - [[fallthrough]]; - case U'\u0076': /* LATIN SMALL LETTER V */ - [[fallthrough]]; - case U'\u0077': /* LATIN SMALL LETTER W */ - [[fallthrough]]; - case U'\u0078': /* LATIN SMALL LETTER X */ - [[fallthrough]]; - case U'\u0079': /* LATIN SMALL LETTER Y */ - [[fallthrough]]; - case U'\u007A': /* LATIN SMALL LETTER Z */ - [[fallthrough]]; - case U'\u00DF': /* LATIN SMALL LETTER SHARP S */ - [[fallthrough]]; - case U'\u00E0': /* LATIN SMALL LETTER A WITH GRAVE */ - [[fallthrough]]; - case U'\u00E1': /* LATIN SMALL LETTER A WITH ACUTE */ - [[fallthrough]]; - case U'\u00E2': /* LATIN SMALL LETTER A WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u00E3': /* LATIN SMALL LETTER A WITH TILDE */ - [[fallthrough]]; - case U'\u00E4': /* LATIN SMALL LETTER A WITH DIAERESIS */ - [[fallthrough]]; - case U'\u00E5': /* LATIN SMALL LETTER A WITH RING ABOVE */ - [[fallthrough]]; - case U'\u00E6': /* LATIN SMALL LETTER AE */ - [[fallthrough]]; - case U'\u00E7': /* LATIN SMALL LETTER C WITH CEDILLA */ - [[fallthrough]]; - case U'\u00E8': /* LATIN SMALL LETTER E WITH GRAVE */ - [[fallthrough]]; - case U'\u00E9': /* LATIN SMALL LETTER E WITH ACUTE */ - [[fallthrough]]; - case U'\u00EA': /* LATIN SMALL LETTER E WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u00EB': /* LATIN SMALL LETTER E WITH DIAERESIS */ - [[fallthrough]]; - case U'\u00EC': /* LATIN SMALL LETTER I WITH GRAVE */ - [[fallthrough]]; - case U'\u00ED': /* LATIN SMALL LETTER I WITH ACUTE */ - [[fallthrough]]; - case U'\u00EE': /* LATIN SMALL LETTER I WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u00EF': /* LATIN SMALL LETTER I WITH DIAERESIS */ - [[fallthrough]]; - case U'\u00F0': /* LATIN SMALL LETTER ETH */ - [[fallthrough]]; - case U'\u00F1': /* LATIN SMALL LETTER N WITH TILDE */ - [[fallthrough]]; - case U'\u00F2': /* LATIN SMALL LETTER O WITH GRAVE */ - [[fallthrough]]; - case U'\u00F3': /* LATIN SMALL LETTER O WITH ACUTE */ - [[fallthrough]]; - case U'\u00F4': /* LATIN SMALL LETTER O WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u00F5': /* LATIN SMALL LETTER O WITH TILDE */ - [[fallthrough]]; - case U'\u00F6': /* LATIN SMALL LETTER O WITH DIAERESIS */ - [[fallthrough]]; - case U'\u00F8': /* LATIN SMALL LETTER O WITH STROKE */ - [[fallthrough]]; - case U'\u00F9': /* LATIN SMALL LETTER U WITH GRAVE */ - [[fallthrough]]; - case U'\u00FA': /* LATIN SMALL LETTER U WITH ACUTE */ - [[fallthrough]]; - case U'\u00FB': /* LATIN SMALL LETTER U WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u00FC': /* U WITH TWO DOTS */ - [[fallthrough]]; - case U'\u00FD': /* LATIN SMALL LETTER Y WITH ACUTE */ - [[fallthrough]]; - case U'\u00FE': /* LATIN SMALL LETTER THORN */ - [[fallthrough]]; - case U'\u00FF': /* LATIN SMALL LETTER Y WITH DIAERESIS */ - [[fallthrough]]; - case U'\u0105': /* LATIN SMALL LETTER A WITH OGONEK */ - [[fallthrough]]; - case U'\u0107': /* LATIN SMALL LETTER C WITH ACUTE */ - [[fallthrough]]; - case U'\u010D': /* LATIN SMALL LETTER C WITH CARON */ - [[fallthrough]]; - case U'\u010F': /* LATIN SMALL LETTER D WITH CARON */ - [[fallthrough]]; - case U'\u0119': /* LATIN SMALL LETTER E WITH OGONEK */ - [[fallthrough]]; - case U'\u011B': /* LATIN SMALL LETTER E WITH CARON */ - [[fallthrough]]; - case U'\u011F': /* LATIN SMALL LETTER G WITH BREVE */ - [[fallthrough]]; - case U'\u0131': /* LATIN SMALL LETTER DOTLESS I */ - [[fallthrough]]; - case U'\u0133': /* LATIN SMALL LIGATURE IJ */ - [[fallthrough]]; - case U'\u0138': /* LATIN SMALL LETTER KRA */ - [[fallthrough]]; - case U'\u0142': /* LATIN SMALL LETTER L WITH STROKE */ - [[fallthrough]]; - case U'\u0144': /* LATIN SMALL LETTER N WITH ACUTE */ - [[fallthrough]]; - case U'\u0148': /* LATIN SMALL LETTER N WITH CARON */ - [[fallthrough]]; - case U'\u014B': /* LATIN SMALL LETTER ENG */ - [[fallthrough]]; - case U'\u0153': /* LATIN SMALL LIGATURE OE */ - [[fallthrough]]; - case U'\u0159': /* LATIN SMALL LETTER R WITH CARON */ - [[fallthrough]]; - case U'\u015B': /* LATIN SMALL LETTER S WITH ACUTE */ - [[fallthrough]]; - case U'\u015F': /* LATIN SMALL LETTER S WITH CEDILLA */ - [[fallthrough]]; - case U'\u0161': /* LATIN SMALL LETTER S WITH CARON */ - [[fallthrough]]; - case U'\u0165': /* LATIN SMALL LETTER T WITH CARON */ - [[fallthrough]]; - case U'\u016F': /* LATIN SMALL LETTER U WITH RING ABOVE */ - [[fallthrough]]; - case U'\u017A': /* LATIN SMALL LETTER Z WITH ACUTE */ - [[fallthrough]]; - case U'\u017C': /* LATIN SMALL LETTER Z WITH DOT ABOVE */ - [[fallthrough]]; - case U'\u017E': /* LATIN SMALL LETTER Z WITH CARON */ - [[fallthrough]]; - case U'\u01BF': /* LATIN LETTER WYNN */ - [[fallthrough]]; - case U'\u01DD': /* LATIN SMALL LETTER TURNED E */ - [[fallthrough]]; - case U'\u021D': /* LATIN SMALL LETTER YOGH */ - [[fallthrough]]; - case U'\u0242': /* LATIN SMALL LETTER GLOTTAL STOP */ - [[fallthrough]]; - case U'\u0250': /* LATIN SMALL LETTER TURNED A */ - [[fallthrough]]; - case U'\u0251': /* LATIN SMALL LETTER ALPHA */ - [[fallthrough]]; - case U'\u0252': /* LATIN SMALL LETTER TURNED ALPHA */ - [[fallthrough]]; - case U'\u0253': /* LATIN SMALL LETTER B WITH HOOk */ - [[fallthrough]]; - case U'\u0254': /* LATIN SMALL LETTER OPEN O */ - [[fallthrough]]; - case U'\u0255': /* LATIN SMALL LETTER C WITH CURL */ - [[fallthrough]]; - case U'\u0256': /* LATIN SMALL LETTER D WITH TAIL */ - [[fallthrough]]; - case U'\u0257': /* LATIN SMALL LETTER D WITH HOOk */ - [[fallthrough]]; - case U'\u0258': /* LATIN SMALL LETTER REVERSED E */ - [[fallthrough]]; - case U'\u0259': /* LATIN SMALL LETTER SCHWA */ - [[fallthrough]]; - case U'\u025A': /* LATIN SMALL LETTER SCHWA WITH HOOK */ - [[fallthrough]]; - case U'\u025B': /* LATIN SMALL LETTER OPEN E */ - [[fallthrough]]; - case U'\u025C': /* LATIN SMALL LETTER REVERSED OPEN E */ - [[fallthrough]]; - case U'\u025D': /* LATIN SMALL LETTER REVERSED OPEN E WITH HOOK */ - [[fallthrough]]; - case U'\u025E': /* LATIN SMALL LETTER CLOSED REVERSED OPEN E */ - [[fallthrough]]; - case U'\u025F': /* LATIN SMALL LETTER DOTLESS J WITH STROKE */ - [[fallthrough]]; - case U'\u0260': /* LATIN SMALL LETTER G WITH HOOK */ - [[fallthrough]]; - case U'\u0261': /* LATIN SMALL LETTER SCRIPT G */ - [[fallthrough]]; - case U'\u0262': /* LATIN LETTER SMALL CAPITAL G */ - [[fallthrough]]; - case U'\u0263': /* LATIN SMALL LETTER GAMMA */ - [[fallthrough]]; - case U'\u0264': /* LATIN SMALL LETTER RAMS HORN */ - [[fallthrough]]; - case U'\u0265': /* LATIN SMALL LETTER TURNED H */ - [[fallthrough]]; - case U'\u0266': /* LATIN SMALL LETTER H WITH HOOK */ - [[fallthrough]]; - case U'\u0267': /* LATIN SMALL LETTER HENG WITH HOOK */ - [[fallthrough]]; - case U'\u0268': /* LATIN SMALL LETTER I WITH STROKE */ - [[fallthrough]]; - case U'\u0269': /* LATIN SMALL LETTER IOTA */ - [[fallthrough]]; - case U'\u026A': /* LATIN LETTER SMALL CAPITAL I */ - [[fallthrough]]; - case U'\u026B': /* LATIN SMALL LETTER L WITH MIDDLE TILDE */ - [[fallthrough]]; - case U'\u026C': /* LATIN SMALL LETTER L WITH BELT */ - [[fallthrough]]; - case U'\u026D': /* LATIN SMALL LETTER L WITH RETROFLEX HOOK */ - [[fallthrough]]; - case U'\u026E': /* LATIN SMALL LETTER LEZH */ - [[fallthrough]]; - case U'\u026F': /* LATIN SMALL LETTER TURNED M */ - [[fallthrough]]; - case U'\u0270': /* LATIN SMALL LETTER TURNED M WITH LONG LEG */ - [[fallthrough]]; - case U'\u0271': /* LATIN SMALL LETTER M WITH HOOK */ - [[fallthrough]]; - case U'\u0272': /* LATIN SMALL LETTER N WITH LEFT HOOK */ - [[fallthrough]]; - case U'\u0273': /* LATIN SMALL LETTER N WITH RETROFLEX HOOK */ - [[fallthrough]]; - case U'\u0283': /* LATIN SMALL LETTER ESH */ - [[fallthrough]]; - case U'\u028A': /* LATIN SMALL LETTER UPSILON */ - [[fallthrough]]; - case U'\u028B': /* LATIN SMALL LETTER V WITH HOOK */ - [[fallthrough]]; - case U'\u0292': /* LATIN SMALL LETTER EZH */ - [[fallthrough]]; - case U'\u0294': /* LATIN SMALL LETTER GLOTTAL STOP */ - [[fallthrough]]; - case U'\u03B1': /* GREEK SMALL LETTER ALPHA */ - [[fallthrough]]; - case U'\u03B2': /* GREEK SMALL LETTER BETA */ - [[fallthrough]]; - case U'\u03B3': /* GREEK SMALL LETTER GAMMA */ - [[fallthrough]]; - case U'\u03B4': /* GREEK SMALL LETTER DELTA */ - [[fallthrough]]; - case U'\u03B5': /* GREEK SMALL LETTER EPSILON */ - [[fallthrough]]; - case U'\u03B6': /* GREEK SMALL LETTER ZETA */ - [[fallthrough]]; - case U'\u03B7': /* GREEK SMALL LETTER ETA */ - [[fallthrough]]; - case U'\u03B8': /* GREEK SMALL LETTER THETA */ - [[fallthrough]]; - case U'\u03B9': /* GREEK SMALL LETTER IOTA */ - [[fallthrough]]; - case U'\u03BA': /* GREEK SMALL LETTER KAPPA */ - [[fallthrough]]; - case U'\u03BB': /* GREEK SMALL LETTER LAMBDA */ - [[fallthrough]]; - case U'\u03BC': /* GREEK SMALL LETTER MU */ - [[fallthrough]]; - case U'\u03BD': /* GREEK SMALL LETTER NU */ - [[fallthrough]]; - case U'\u03BE': /* GREEK SMALL LETTER XI */ - [[fallthrough]]; - case U'\u03BF': /* GREEK SMALL LETTER OMICRON */ - [[fallthrough]]; - case U'\u03C0': /* GREEK SMALL LETTER PI */ - [[fallthrough]]; - case U'\u03C1': /* GREEK SMALL LETTER RHO */ - [[fallthrough]]; - case U'\u03C2': /* GREEK SMALL LETTER FINAL SIGMA */ - [[fallthrough]]; - case U'\u03C3': /* GREEK SMALL LETTER SIGMA */ - [[fallthrough]]; - case U'\u03C4': /* GREEK SMALL LETTER TAU */ - [[fallthrough]]; - case U'\u03C5': /* GREEK SMALL LETTER UPSILON */ - [[fallthrough]]; - case U'\u03C6': /* GREEK SMALL LETTER PHI */ - [[fallthrough]]; - case U'\u03C7': /* GREEK SMALL LETTER CHI */ - [[fallthrough]]; - case U'\u03C8': /* GREEK SMALL LETTER PSI */ - [[fallthrough]]; - case U'\u03C9': /* GREEK SMALL LETTER OMEGA */ - [[fallthrough]]; - case U'\u1D79': /* LATIN SMALL LETTER INSULAR G */ - [[fallthrough]]; - case U'\uA7B7': /* LATIN SMALL LETTER OMEGA */ - [[fallthrough]]; - case U'\uFB00': /* LATIN SMALL LIGATURE FF */ - return true; - } -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/ispunct b/u8c/include/u8c/utf.d/ispunct deleted file mode 100644 index f82f11e..0000000 --- a/u8c/include/u8c/utf.d/ispunct +++ /dev/null @@ -1,329 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_tmcwLOAAPKkIUthE) -#define u8c_key_tmcwLOAAPKkIUthE - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::ispunct(char32_t const _chr) -> bool { - if(_chr > u8c::unimax) [[unlikely]] { - throw std::domain_error("Unicode codepoint too big!"); - } - switch(_chr) { - [[likely]] default: - return false; - case U'\u0021': /* EXCLAMATION MARK */ - [[fallthrough]]; - case U'\u0022': /* QUOTATION MARK */ - [[fallthrough]]; - case U'\u0023': /* NUMBER SIGN */ - [[fallthrough]]; - case U'\u0024': /* DOLLAR SIGN */ - [[fallthrough]]; - case U'\u0025': /* PERCENT SIGN */ - [[fallthrough]]; - case U'\u0026': /* AMPERSAND */ - [[fallthrough]]; - case U'\u0027': /* APOSTROPHE */ - [[fallthrough]]; - case U'\u0028': /* LEFT PARANTHESIS */ - [[fallthrough]]; - case U'\u0029': /* RIGHT PARANTHESIS */ - [[fallthrough]]; - case U'\u002A': /* ASTERISK */ - [[fallthrough]]; - case U'\u002B': /* PLUS SIGN */ - [[fallthrough]]; - case U'\u002C': /* COMMA */ - [[fallthrough]]; - case U'\u002D': /* HYPHEN-MINUS */ - [[fallthrough]]; - case U'\u002E': /* FULL STOP */ - [[fallthrough]]; - case U'\u002F': /* SOLIDUS */ - [[fallthrough]]; - case U'\u003A': /* COLON */ - [[fallthrough]]; - case U'\u003B': /* SEMICOLON */ - [[fallthrough]]; - case U'\u003C': /* LESS-THAN SIGN */ - [[fallthrough]]; - case U'\u003D': /* EQUALS SIGN */ - [[fallthrough]]; - case U'\u003E': /* GREATER-THAN SIGN */ - [[fallthrough]]; - case U'\u003F': /* QUESTION MARK */ - [[fallthrough]]; - case U'\u0040': /* COMMERCIAL AT */ - [[fallthrough]]; - case U'\u005B': /* LEFT SQUARE BRACKET */ - [[fallthrough]]; - case U'\u005C': /* REVERSE SOLIDUS */ - [[fallthrough]]; - case U'\u005D': /* RIGHT SQUARE BRACKET */ - [[fallthrough]]; - case U'\u005E': /* CIRCUMFLEX ACCENT */ - [[fallthrough]]; - case U'\u005F': /* LOW LINE */ - [[fallthrough]]; - case U'\u0060': /* GRAVE ACCENT */ - [[fallthrough]]; - case U'\u007B': /* LEFT CURLY BRACKET */ - [[fallthrough]]; - case U'\u007C': /* VERTICAL LINE */ - [[fallthrough]]; - case U'\u007D': /* RIGHT CURLY BRACKET */ - [[fallthrough]]; - case U'\u007E': /* TILDE */ - [[fallthrough]]; - case U'\u00A1': /* INVERT EXCLAMATION MARK */ - [[fallthrough]]; - case U'\u00A2': /* CENT SIGN */ - [[fallthrough]]; - case U'\u00A3': /* POUND SIGN */ - [[fallthrough]]; - case U'\u00A4': /* CURRENCY SIGN */ - [[fallthrough]]; - case U'\u00A5': /* YEN SIGN */ - [[fallthrough]]; - case U'\u00A6': /* BROKEN BAR */ - [[fallthrough]]; - case U'\u00A7': /* SECTION SIGN */ - [[fallthrough]]; - case U'\u00A8': /* DIAERESIS */ - [[fallthrough]]; - case U'\u00A9': /* COPYRIGHT SIGN */ - [[fallthrough]]; - case U'\u00AA': /* FEMININE ORDINAL INDICATOR */ - [[fallthrough]]; - case U'\u00AB': /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ - [[fallthrough]]; - case U'\u00AC': /* NOT SIGN */ - [[fallthrough]]; - case U'\u00AE': /* REGISTERED SIGN */ - [[fallthrough]]; - case U'\u00AF': /* MACRON */ - [[fallthrough]]; - case U'\u00B0': /* DEGREE SIGN */ - [[fallthrough]]; - case U'\u00B1': /* PLUS MINUS SYMBOL */ - [[fallthrough]]; - case U'\u00B2': /* SUPERSCRIPT TWO */ - [[fallthrough]]; - case U'\u00B3': /* SUPERSCRIPT THREE */ - [[fallthrough]]; - case U'\u00B4': /* ACUTE ACCENT */ - [[fallthrough]]; - case U'\u00B5': /* MICRO SIGN */ - [[fallthrough]]; - case U'\u00B6': /* PILCROW SIGN */ - [[fallthrough]]; - case U'\u00B7': /* MIDDLE DOT */ - [[fallthrough]]; - case U'\u00B8': /* CEDILLA */ - [[fallthrough]]; - case U'\u00B9': /* SUPERSCRIPT ONE */ - [[fallthrough]]; - case U'\u00BA': /* MASCULINE ORDINAL INDICATOR */ - [[fallthrough]]; - case U'\u00BB': /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ - [[fallthrough]]; - case U'\u00BC': /* VULGAR FRACTION ONE QUARTER */ - [[fallthrough]]; - case U'\u00BD': /* VULGAR FRACTION ONE HALF */ - [[fallthrough]]; - case U'\u00BE': /* VULGAR FRACTION THREE QUARTERS */ - [[fallthrough]]; - case U'\u00BF': /* INVERT QUESTION MARK */ - [[fallthrough]]; - case U'\u00D7': /* MULTIPLICATION SIGN */ - [[fallthrough]]; - case U'\u00F7': /* DIVISION SIGN */ - [[fallthrough]]; - case U'\u2010': /* HYPHEN */ - [[fallthrough]]; - case U'\u2013': /* EN DASH */ - [[fallthrough]]; - case U'\u2014': /* EM DASH */ - [[fallthrough]]; - case U'\u2018': /* LEFT SINGLE QUOTATION MARK */ - [[fallthrough]]; - case U'\u2019': /* RIGHT SINGLE QUOTATION MARK */ - [[fallthrough]]; - case U'\u201C': /* LEFT DOUBLE QUOTATION MARK */ - [[fallthrough]]; - case U'\u201D': /* RIGHT DOUBLE QUOTATION MARK */ - [[fallthrough]]; - case U'\u2026': /* HORIZONTAL ELLIPSIS */ - [[fallthrough]]; - case U'\u2030': /* PER MILLE SIGN */ - [[fallthrough]]; - case U'\u2031': /* PER TEN THOUSAND SIGN */ - [[fallthrough]]; - case U'\u2032': /* PRIME */ - [[fallthrough]]; - case U'\u2033': /* DOUBLE PRIME */ - [[fallthrough]]; - case U'\u2034': /* TRIPLE PRIME */ - [[fallthrough]]; - case U'\u2035': /* REVERSED PRIME */ - [[fallthrough]]; - case U'\u2036': /* REVERSED DOUBLE PRIME */ - [[fallthrough]]; - case U'\u2037': /* REVERSED TRIPLE PRIME */ - [[fallthrough]]; - case U'\u203C': /* DOUBLE EXCLAMATION MARK */ - [[fallthrough]]; - case U'\u203D': /* INTERROBANG */ - [[fallthrough]]; - case U'\u2047': /* DOUBLE QUOTATION MARK */ - [[fallthrough]]; - case U'\u2048': /* QUESTION EXCLAMATION MARK */ - [[fallthrough]]; - case U'\u2049': /* EXCLAMATION QUESTION MARK */ - [[fallthrough]]; - case U'\u20A3': /* FRENCH FRANC SIGN */ - [[fallthrough]]; - case U'\u20A4': /* LIRA SIGN */ - [[fallthrough]]; - case U'\u20A8': /* RUPEE SIGN */ - [[fallthrough]]; - case U'\u20A9': /* WON SIGN */ - [[fallthrough]]; - case U'\u20AC': /* EURO SIGN */ - [[fallthrough]]; - case U'\u20B9': /* INDIAN RUPEE SIGN */ - [[fallthrough]]; - case U'\u20BF': /* BITCOIN SIGN */ - [[fallthrough]]; - case U'\u2103': /* DEGREE CELSIUS */ - [[fallthrough]]; - case U'\u2107': /* EULER CONSTANT */ - [[fallthrough]]; - case U'\u2109': /* DEGREE FAHRENHEIT */ - [[fallthrough]]; - case U'\u210E': /* PLANCK CONSTANT */ - [[fallthrough]]; - case U'\u2117': /* SOUND RECORDING COPYRIGHT */ - [[fallthrough]]; - case U'\u2122': /* TRADE MARK SIGN */ - [[fallthrough]]; - case U'\u2125': /* OUNCE SIGN */ - [[fallthrough]]; - case U'\u2126': /* OHM SIGN */ - [[fallthrough]]; - case U'\u212A': /* KELVIN SIGN */ - [[fallthrough]]; - case U'\u214D': /* AKTIESELSKAB */ - [[fallthrough]]; - case U'\u2205': /* EMPTY SET */ - [[fallthrough]]; - case U'\u2212': /* MINUS SIGN */ - [[fallthrough]]; - case U'\u221A': /* SQUARE ROOT */ - [[fallthrough]]; - case U'\u221B': /* CUBE ROOT */ - [[fallthrough]]; - case U'\u221C': /* FOURTH ROOT */ - [[fallthrough]]; - case U'\u221E': /* INFINITY */ - [[fallthrough]]; - case U'\u2228': /* LOGICAL OR */ - [[fallthrough]]; - case U'\u2248': /* ALMOST EQUAL TO */ - [[fallthrough]]; - case U'\u2260': /* NOT EQUAL TO */ - [[fallthrough]]; - case U'\u2264': /* LESS-THAN OR EQUAL TO */ - [[fallthrough]]; - case U'\u2265': /* GREATER-THAN OR EQUAL TO */ - [[fallthrough]]; - case U'\u2609': /* SUN */ - [[fallthrough]]; - case U'\u263F': /* MERCURY */ - [[fallthrough]]; - case U'\u2640': /* FEMALE SIGN */ - [[fallthrough]]; - case U'\u2641': /* EARTH */ - [[fallthrough]]; - case U'\u2642': /* MALE SIGN */ - [[fallthrough]]; - case U'\u2643': /* JUPITER */ - [[fallthrough]]; - case U'\u2644': /* SATURN */ - [[fallthrough]]; - case U'\u2645': /* URANUS */ - [[fallthrough]]; - case U'\u2646': /* NEPTUNE */ - [[fallthrough]]; - case U'\u2647': /* PLUTO */ - [[fallthrough]]; - case U'\u26A2': /* DOUBLED FEMALE SIGN */ - [[fallthrough]]; - case U'\u26A3': /* DOUBLED MALE SIGN */ - [[fallthrough]]; - case U'\u26A4': /* INTERLOCKED FEMALE AND MALE SIGN */ - [[fallthrough]]; - case U'\u26A5': /* MALE AND FEMALE SIGN */ - [[fallthrough]]; - case U'\u26B3': /* CERES */ - [[fallthrough]]; - case U'\u26B4': /* PALLAS */ - [[fallthrough]]; - case U'\u26B5': /* JUNO */ - [[fallthrough]]; - case U'\u26B6': /* VESTA */ - [[fallthrough]]; - case U'\u26B7': /* CHIRON */ - [[fallthrough]]; - case U'\u2BD8': /* PROSERPINA */ - [[fallthrough]]; - case U'\u2BD9': /* ASTRAEA */ - [[fallthrough]]; - case U'\u2BDA': /* HYGIEA */ - [[fallthrough]]; - case U'\u2BDB': /* PHOLOS */ - [[fallthrough]]; - case U'\u2BDC': /* NESSUS */ - [[fallthrough]]; - case U'\u2E2E': /* INVERTED QUESTION MARK */ - [[fallthrough]]; - case U'\u33D7': /* SQUARE PH */ - [[fallthrough]]; - case U'\uFDFC': /* RIAL SIGN */ - [[fallthrough]]; - case U'\U0001F10D': /* CIRCLED ZERO WITH SLASH */ - [[fallthrough]]; - case U'\U0001F10E': /* CIRCLED ANTICKLOCKWISE ARROW */ - [[fallthrough]]; - case U'\U0001F10F': /* CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH */ - [[fallthrough]]; - case U'\U0001F12F': /* COPYLEFT SYMBOL */ - [[fallthrough]]; - case U'\U0001F16D': /* CIRCLED CC */ - [[fallthrough]]; - case U'\U0001F16E': /* CIRCLED C WITH OVERLAID BACKSLASH */ - [[fallthrough]]; - case U'\U0001F16F': /* CIRCLED HUMAN FIGURE */ - return true; - } -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isspace b/u8c/include/u8c/utf.d/isspace deleted file mode 100644 index ccf191a..0000000 --- a/u8c/include/u8c/utf.d/isspace +++ /dev/null @@ -1,47 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_fRbwVyyBwfrm0Slq) -#define u8c_key_fRbwVyyBwfrm0Slq - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::isspace(char32_t const _chr) -> bool { - if(_chr > u8c::unimax) [[unlikely]] { - throw std::domain_error("Unicode codepoint too big!"); - } - switch(_chr) { - [[likely]] default: - return false; - case U'\u0009': /* HORIZONTAL TABULATION */ - [[fallthrough]]; - case U'\u000A': /* NEW LINE */ - [[fallthrough]]; - case U'\u000B': /* VERTICAL TABULATION */ - [[fallthrough]]; - case U'\u000C': /* FORM FEED */ - [[fallthrough]]; - case U'\u000D': /* CARRIAGE RETURN */ - [[fallthrough]]; - case U'\u0020': /* SPACE */ - return true; - } -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/issurro b/u8c/include/u8c/utf.d/issurro deleted file mode 100644 index 119c2e5..0000000 --- a/u8c/include/u8c/utf.d/issurro +++ /dev/null @@ -1,35 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_f4d3TezxF0FFmbn4) -#define u8c_key_f4d3TezxF0FFmbn4 - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::issurro(char32_t const _chr) -> bool { - if(_chr > u8c::unimax) [[unlikely]] { - throw std::domain_error("Unicode codepoint too big!"); - } - if(_chr >= U'\xD800' && _chr <= U'\xDFFF') [[unlikely]] { - return true; - } - return false; -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isupper b/u8c/include/u8c/utf.d/isupper deleted file mode 100644 index 4a18fd5..0000000 --- a/u8c/include/u8c/utf.d/isupper +++ /dev/null @@ -1,265 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_CNx6iimb2pI6RXGS) -#define u8c_key_CNx6iimb2pI6RXGS - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::isupper(char32_t const _chr) -> bool { - if(_chr > u8c::unimax) [[unlikely]] { - throw std::domain_error("Unicode codepoint too big!"); - } - switch(_chr) { - [[likely]] default: - return false; - case U'\u0041': /* LATIN CAPITAL LETTER A */ - [[fallthrough]]; - case U'\u0042': /* LATIN CAPITAL LETTER B */ - [[fallthrough]]; - case U'\u0043': /* LATIN CAPITAL LETTER C */ - [[fallthrough]]; - case U'\u0044': /* LATIN CAPITAL LETTER D */ - [[fallthrough]]; - case U'E': /* LATIN CAPITAL LETTER E */ - [[fallthrough]]; - case U'F': /* LATIN CAPITAL LETTER F */ - [[fallthrough]]; - case U'G': /* LATIN CAPITAL LETTER G */ - [[fallthrough]]; - case U'H': /* LATIN CAPITAL LETTER H */ - [[fallthrough]]; - case U'I': /* LATIN CAPITAL LETTER I */ - [[fallthrough]]; - case U'J': /* LATIN CAPITAL LETTER J */ - [[fallthrough]]; - case U'K': /* LATIN CAPITAL LETTER K */ - [[fallthrough]]; - case U'L': /* LATIN CAPITAL LETTER L */ - [[fallthrough]]; - case U'M': /* LATIN CAPITAL LETTER M */ - [[fallthrough]]; - case U'N': /* LATIN CAPITAL LETTER N */ - [[fallthrough]]; - case U'O': /* LATIN CAPITAL LETTER O */ - [[fallthrough]]; - case U'P': /* LATIN CAPITAL LETTER P */ - [[fallthrough]]; - case U'Q': /* LATIN CAPITAL LETTER Q */ - [[fallthrough]]; - case U'R': /* LATIN CAPITAL LETTER R */ - [[fallthrough]]; - case U'S': /* LATIN CAPITAL LETTER S */ - [[fallthrough]]; - case U'T': /* LATIN CAPITAL LETTER T */ - [[fallthrough]]; - case U'U': /* LATIN CAPITAL LETTER U */ - [[fallthrough]]; - case U'V': /* LATIN CAPITAL LETTER V */ - [[fallthrough]]; - case U'X': /* LATIN CAPITAL LETTER Y */ - [[fallthrough]]; - case U'W': /* LATIN CAPITAL LETTER X */ - [[fallthrough]]; - case U'Y': /* LATIN CAPITAL LETTER Y */ - [[fallthrough]]; - case U'Z': /* LATIN CAPITAL LETTER Z */ - [[fallthrough]]; - case U'\u00C0': /* LATIN CAPITAL LETTER A WITH GRAVE */ - [[fallthrough]]; - case U'\u00C1': /* LATIN CAPITAL LETTER A WITH ACUTE */ - [[fallthrough]]; - case U'\u00C2': /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u00C3': /* LATIN CAPITAL LETTER A WITH TILDE */ - [[fallthrough]]; - case U'\u00C4': /* LATIN CAPITAL LETTER A WITH DIAERESIS */ - [[fallthrough]]; - case U'\u00C5': /* LATIN CAPITAL LETTER A WITH RING ABOVE */ - [[fallthrough]]; - case U'\u00C6': /* LATIN CAPITAL LETTER AE */ - [[fallthrough]]; - case U'\u00C7': /* LATIN CAPITAL LETTER C WITH CEDILLA */ - [[fallthrough]]; - case U'\u00C8': /* LATIN CAPITAL LETTER E WITH GRAVE */ - [[fallthrough]]; - case U'\u00C9': /* LATIN CAPITAL LETTER E WITH ACUTE */ - [[fallthrough]]; - case U'\u00CA': /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u00CB': /* LATIN CAPITAL LETTER E WITH DIAERESIS */ - [[fallthrough]]; - case U'\u00CC': /* LATIN CAPITAL LETTER I WITH GRAVE */ - [[fallthrough]]; - case U'\u00CD': /* LATIN CAPITAL LETTER I WITH ACUTE */ - [[fallthrough]]; - case U'\u00CE': /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u00CF': /* LATIN CAPITAL LETTER I WITH DIAERESIS */ - [[fallthrough]]; - case U'\u00D0': /* LATIN CAPITAL LETTER ETH */ - [[fallthrough]]; - case U'\u00D1': /* LATIN CAPITAL LETTER N WITH TILDE */ - [[fallthrough]]; - case U'\u00D2': /* LATIN CAPITAL LETTER O WITH GRAVE */ - [[fallthrough]]; - case U'\u00D3': /* LATIN CAPITAL LETTER O WITH ACUTE */ - [[fallthrough]]; - case U'\u00D4': /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u00D5': /* LATIN CAPITAL LETTER O WITH TILDE */ - [[fallthrough]]; - case U'\u00D6': /* LATIN CAPITAL LETTER O WITH DIAERESIS */ - [[fallthrough]]; - case U'\u00D8': /* LATIN CAPITAL LETTER O WITH STROKE */ - [[fallthrough]]; - case U'\u00D9': /* LATIN CAPITAL LETTER U WITH GRAVE */ - [[fallthrough]]; - case U'\u00DA': /* LATIN CAPITAL LETTER U WITH STROKE */ - [[fallthrough]]; - case U'\u00DB': /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u00DC': /* LATIN CAPITAL LETTER U WITH DIAERESIS */ - [[fallthrough]]; - case U'\u00DD': /* LATIN CAPITAL LETTER Y WITH ACUTE */ - [[fallthrough]]; - case U'\u00DE': /* LATIN CAPITAL LETTER THORN */ - [[fallthrough]]; - case U'\u0100': /* LATIN CAPITAL LETTER A WITH MACRON */ - [[fallthrough]]; - case U'\u0102': /* LATIN CAPITAL LETTER A WITH BREVE */ - [[fallthrough]]; - case U'\u0104': /* LATIN CAPITAL LETTER A WITH OGONEK */ - [[fallthrough]]; - case U'\u0106': /* LATIN CAPITAL LETTER C WITH ACUTE */ - [[fallthrough]]; - case U'\u0108': /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u010A': /* LATIN CAPITAL LETTER C WITH DOT ABOVE */ - [[fallthrough]]; - case U'\u010C': /* LATIN CAPITAL LETTER C WITH CARON */ - [[fallthrough]]; - case U'\u010E': /* LATIN CAPITAL LETTER D WITH CARON */ - [[fallthrough]]; - case U'\u0110': /* LATIN CAPITAL LETTER D WITH STROKE */ - [[fallthrough]]; - case U'\u0112': /* LATIN CAPITAL LETTER E WITH MACRON */ - [[fallthrough]]; - case U'\u0114': /* LATIN CAPITAL LETTER E WITH BREVE */ - [[fallthrough]]; - case U'\u0116': /* LATIN CAPITAL LETTER E WITH DOT ABOVE */ - [[fallthrough]]; - case U'\u0118': /* LATIN CAPITAL LETTER E WITH OGONEK */ - [[fallthrough]]; - case U'\u011A': /* LATIN CAPITAL LETTER E WITH CARON */ - [[fallthrough]]; - case U'\u011C': /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX */ - [[fallthrough]]; - case U'\u014A': /* LATIN CAPITAL LETTER ENG */ - [[fallthrough]]; - case U'\u0152': /* LATIN CAPITAL LIGATURE OE */ - [[fallthrough]]; - case U'\u0186': /* LATIN CAPITAL LETTER OPEN O */ - [[fallthrough]]; - case U'\u018E': /* LATIN CAPITAL LETTER REVERSED E */ - [[fallthrough]]; - case U'\u018F': /* LATIN CAPITAL LETTER SCHWA */ - [[fallthrough]]; - case U'\u0190': /* LATIN CAPITAL LETTER OPEN E */ - [[fallthrough]]; - case U'\u0194': /* LATIN CAPITAL LETTER GAMMA */ - [[fallthrough]]; - case U'\u0196': /* LATIN CAPITAL LETTER IOTA */ - [[fallthrough]]; - case U'\u01A9': /* LATIN CAPITAL LETTER ESH */ - [[fallthrough]]; - case U'\u01B1': /* LATIN CAPITAL LETTER UPSILON */ - [[fallthrough]]; - case U'\u01B2': /* LATIN CAPITAL LETTER V WITH HOOk */ - [[fallthrough]]; - case U'\u01B7': /* LATIN CAPITAL LETTER EZH */ - [[fallthrough]]; - case U'\u01F7': /* LATIN CAPITAL LETTER WYNN */ - [[fallthrough]]; - case U'\u021C': /* LATIN CAPITAL LETTER YOGH */ - [[fallthrough]]; - case U'\u0241': /* LATIN CAPITAL LETTER GLOTTAL STOP */ - [[fallthrough]]; - case U'\u0391': /* GREEK CAPITAL LETTER ALPHA */ - [[fallthrough]]; - case U'\u0392': /* GREEK CAPITAL LETTER BETA */ - [[fallthrough]]; - case U'\u0393': /* GREEK CAPITAL LETTER GAMMA */ - [[fallthrough]]; - case U'\u0394': /* GREEK CAPITAL LETTER DELTA */ - [[fallthrough]]; - case U'\u0395': /* GREEK CAPITAL LETTER EPSILON */ - [[fallthrough]]; - case U'\u0396': /* GREEK CAPITAL LETTER ZETA */ - [[fallthrough]]; - case U'\u0397': /* GREEK CAPITAL LETTER ETA */ - [[fallthrough]]; - case U'\u0398': /* GREEK CAPITAL LETTER THETA */ - [[fallthrough]]; - case U'\u0399': /* GREEK CAPITAL LETTER IOTA */ - [[fallthrough]]; - case U'\u039A': /* GREEK CAPITAL LETTER KAPPA */ - [[fallthrough]]; - case U'\u039B': /* GREEK CAPITAL LETTER LAMBDA */ - [[fallthrough]]; - case U'\u039C': /* GREEK CAPITAL LETTER MU */ - [[fallthrough]]; - case U'\u039D': /* GREEK CAPITAL LETTER NU */ - [[fallthrough]]; - case U'\u039E': /* GREEK CAPITAL LETTER XI */ - [[fallthrough]]; - case U'\u039F': /* GREEK CAPITAL LETTER OMICRON */ - [[fallthrough]]; - case U'\u03A0': /* GREEK CAPITAL LETTER PI */ - [[fallthrough]]; - case U'\u03A1': /* GREEK CAPITAL LETTER RHO */ - [[fallthrough]]; - case U'\u03A3': /* GREEK CAPITAL LETTER SIGMA */ - [[fallthrough]]; - case U'\u03A4': /* GREEK CAPITAL LETTER TAU */ - [[fallthrough]]; - case U'\u03A5': /* GREEK CAPITAL LETTER UPSILON */ - [[fallthrough]]; - case U'\u03A6': /* GREEK CAPITAL LETTER PHI */ - [[fallthrough]]; - case U'\u03A7': /* GREEK CAPITAL LETTER CHI */ - [[fallthrough]]; - case U'\u03A8': /* GREEK CAPITAL LETTER PSI */ - [[fallthrough]]; - case U'\u03A9': /* GREEK CAPITAL LETTER OMEGA */ - [[fallthrough]]; - case U'\u1E9E': /* LATIN CAPITAL LETTER SHARP S */ - [[fallthrough]]; - case U'\u2C6D': /* LATIN CAPITAL LETTER ALPHA */ - [[fallthrough]]; - case U'\uA77D': /* LATIN CAPITAL LETTER INSULAR G */ - [[fallthrough]]; - case U'\uA7B4': /* LATIN CAPITAL LETTER BETA */ - [[fallthrough]]; - case U'\uA7B6': /* LATIN CAPITAL LETTER OMEGA */ - return true; - } -} - -#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isxdigit b/u8c/include/u8c/utf.d/isxdigit deleted file mode 100644 index cc73526..0000000 --- a/u8c/include/u8c/utf.d/isxdigit +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#if !defined(u8c_key_NdERYC9ToUZX0vHE) -#define u8c_key_NdERYC9ToUZX0vHE - -#include <stdexcept> /* std::domain_error */ - -constexpr auto u8c::isxdigit(char32_t const _chr) -> bool { - if(_chr > u8c::unimax) [[unlikely]] { - throw std::domain_error("Unicode codepoint too big!"); - } - switch(_chr) { - [[likely]] default: - return false; - break; - case U'\u0030': /* DIGIT ZERO */ - [[fallthrough]]; - case U'\u0031': /* DIGIT ONE */ - [[fallthrough]]; - case U'\u0032': /* DIGIT TWO */ - [[fallthrough]]; - case U'\u0033': /* DIGIT THREE */ - [[fallthrough]]; - case U'\u0034': /* DIGIT FOUR */ - [[fallthrough]]; - case U'\u0035': /* DIGIT FIVE */ - [[fallthrough]]; - case U'\u0036': /* DIGIT SIX */ - [[fallthrough]]; - case U'\u0037': /* DIGIT SEVEN */ - [[fallthrough]]; - case U'\u0038': /* DIGIT EIGHT */ - [[fallthrough]]; - case U'\u0039': /* DIGIT NINE */ - [[fallthrough]]; - case U'\u0041': /* LATIN CAPITAL LETTER A */ - [[fallthrough]]; - case U'\u0042': /* LATIN CAPITAL LETTER B */ - [[fallthrough]]; - case U'\u0043': /* LATIN CAPITAL LETTER C */ - [[fallthrough]]; - case U'\u0044': /* LATIN CAPITAL LETTER D */ - [[fallthrough]]; - case U'\u0045': /* LATIN CAPITAL LETTER E */ - [[fallthrough]]; - case U'\u0046': /* LATIN CAPITAL LETTER F */ - return true; - } -} - -#endif
\ No newline at end of file diff --git a/u8c/source/character/is_alphabetic.c b/u8c/source/character/is_alphabetic.c new file mode 100644 index 0000000..551c93e --- /dev/null +++ b/u8c/source/character/is_alphabetic.c @@ -0,0 +1,117 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> + +#include <stdbool.h> +#include <stdint.h> + +u8c_NO_DISCARD u8c_NO_THROW u8c_UNSEQUENCED static bool u8c_is_alphabetic_internal(uint_least32_t code_point); + +bool u8c_is_alphabetic(uint_least32_t const code_point) { + return u8c_is_minuscule(code_point) || u8c_is_majuscule(code_point) || u8c_is_alphabetic_internal(code_point); +} + +static bool u8c_is_alphabetic_internal(uint_least32_t const code_point) { + switch(code_point) { + default: + return false; + + case UINT32_C(0x16A0): // RUNIC LETTER FEHU FEOH FE F + case UINT32_C(0x16A1): // RUNIC LETTER V + case UINT32_C(0x16A2): // RUNIC LETTER URUZ UR U + case UINT32_C(0x16A3): // RUNIC LETTER YR + case UINT32_C(0x16A4): // RUNIC LETTER Y + case UINT32_C(0x16A5): // RUNIC LETTER W + case UINT32_C(0x16A6): // RUNIC LETTER THURISAZ THURS THORN + case UINT32_C(0x16A7): // RUNIC LETTER ETH + case UINT32_C(0x16A8): // RUNIC LETTER ANSUZ A + case UINT32_C(0x16A9): // RUNIC LETTER OS O + case UINT32_C(0x16AA): // RUNIC LETTER AC A + case UINT32_C(0x16AB): // RUNIC LETTER AESC + case UINT32_C(0x16AC): // RUNIC LETTER LONG-BRANCH-OSS O + case UINT32_C(0x16AD): // RUNIC LETTER SHORT-TWIG-OSS O + case UINT32_C(0x16AE): // RUNIC LETTER O + case UINT32_C(0x16AF): // RUNIC LETTER OE + case UINT32_C(0x16B0): // RUNIC LETTER ON + case UINT32_C(0x16B1): // RUNIC LETTER RAIDO RAD REID R + case UINT32_C(0x16B2): // RUNIC LETTER KAUNA + case UINT32_C(0x16B3): // RUNIC LETTER CEN + case UINT32_C(0x16B4): // RUNIC LETTER KAUN K + case UINT32_C(0x16B5): // RUNIC LETTER G + case UINT32_C(0x16B6): // RUNIC LETTER ENG + case UINT32_C(0x16B7): // RUNIC LETTER GEBO GYFU G + case UINT32_C(0x16B8): // RUNIC LETTER GAR + case UINT32_C(0x16B9): // RUNIC LETTER WUNJO WYNN W + case UINT32_C(0x16BA): // RUNIC LETTER HAGLAZ H + case UINT32_C(0x16BB): // RUNIC LETTER HAEGL H + case UINT32_C(0x16BC): // RUNIC LETTER LONG-BRANCH-HAGALL H + case UINT32_C(0x16BD): // RUNIC LETTER SHORT-TWIG-HAGALL H + case UINT32_C(0x16BE): // RUNIC LETTER NAUDIZ NYD NAUD N + case UINT32_C(0x16BF): // RUNIC LETTER SHORT-TWIG-NAUD N + case UINT32_C(0x16C0): // RUNIC LETTER DOTTED-N + case UINT32_C(0x16C1): // RUNIC LETTER ISAZ IS ISS I + case UINT32_C(0x16C2): // RUNIC LETTER E + case UINT32_C(0x16C3): // RUNIC LETTER JERAN J + case UINT32_C(0x16C4): // RUNIC LETTER GER + case UINT32_C(0x16C5): // RUNIC LETTER LONG-BRANCH-AR AE + case UINT32_C(0x16C6): // RUNIC LETTER SHORT-TWIG-AR A + case UINT32_C(0x16C7): // RUNIC LETTER IWAZ EOH + case UINT32_C(0x16C8): // RUNIC LETTER PERTHO PEORTH P + case UINT32_C(0x16C9): // RUNIC LETTER ALGIZ EOLHX + case UINT32_C(0x16CA): // RUNIC LETTER SOWILO S + case UINT32_C(0x16CB): // RUNIC LETTER SIGEL LONG-BRANCH-SOL S + case UINT32_C(0x16CC): // RUNIC LETTER SHORT-TWIG-SOL S + case UINT32_C(0x16CD): // RUNIC LETTER C + case UINT32_C(0x16CE): // RUNIC LETTER Z + case UINT32_C(0x16CF): // RUNIC LETTER TIWAZ TIR TYR T + case UINT32_C(0x16D0): // RUNIC LETTER SHORT-TWIG-TYR T + case UINT32_C(0x16D1): // RUNIC LETTER D + case UINT32_C(0x16D2): // RUNIC LETTER BERKANAN BEORC BJARKAN B + case UINT32_C(0x16D3): // RUNIC LETTER SHORT-TWIG-BJARKAN B + case UINT32_C(0x16D4): // RUNIC LETTER DOTTED-P + case UINT32_C(0x16D5): // RUNIC LETTER OPEN-P + case UINT32_C(0x16D6): // RUNIC LETTER EHWAZ EH E + case UINT32_C(0x16D7): // RUNIC LETTER MANNAZ MAN M + case UINT32_C(0x16D8): // RUNIC LETTER LONG-BRANCH-MADR M + case UINT32_C(0x16D9): // RUNIC LETTER SHORT-TWIG-MADR M + case UINT32_C(0x16DA): // RUNIC LETTER LAUKAZ LAGU LOGR L + case UINT32_C(0x16DB): // RUNIC LETTER DOTTED-L + case UINT32_C(0x16DC): // RUNIC LETTER INGWAZ + case UINT32_C(0x16DD): // RUNIC LETTER ING + case UINT32_C(0x16DE): // RUNIC LETTER DAGAZ DAEG D + case UINT32_C(0x16DF): // RUNIC LETTER OTHALAN ETHEL O + case UINT32_C(0x16E0): // RUNIC LETTER EAR + case UINT32_C(0x16E1): // RUNIC LETTER IOR + case UINT32_C(0x16E2): // RUNIC LETTER CWEORTH + case UINT32_C(0x16E3): // RUNIC LETTER CALC + case UINT32_C(0x16E4): // RUNIC LETTER CEALC + case UINT32_C(0x16E5): // RUNIC LETTER STAN + case UINT32_C(0x16E6): // RUNIC LETTER LONG-BRANCH-YR + case UINT32_C(0x16E7): // RUNIC LETTER SHORT-TWIG-YR + case UINT32_C(0x16E8): // RUNIC LETTER ICELANDIC-YR + case UINT32_C(0x16E9): // RUNIC LETTER Q F + case UINT32_C(0x16EA): // RUNIC LETTER X + return true; + } +} diff --git a/u8c/source/character/is_control.c b/u8c/source/character/is_control.c new file mode 100644 index 0000000..f6a0ddd --- /dev/null +++ b/u8c/source/character/is_control.c @@ -0,0 +1,108 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> + +#include <stdbool.h> +#include <stdint.h> + +bool u8c_is_control(uint_least32_t const code_point) { + switch(code_point) { + default: + return false; + + case UINT32_C(0x0000): // NULL + case UINT32_C(0x0001): // START OF HEADING + case UINT32_C(0x0002): // START OF TEXT + case UINT32_C(0x0003): // END OF TEXT + case UINT32_C(0x0004): // END OF TRANSMISSION + case UINT32_C(0x0005): // ENQUIRY + case UINT32_C(0x0006): // ACKNOWLEDGE + case UINT32_C(0x0007): // BELL + case UINT32_C(0x0008): // BACKSPACE + case UINT32_C(0x0009): // HORIZONTAL TABULATION + case UINT32_C(0x000A): // NEW LINE + case UINT32_C(0x000B): // VERTICAL TABULATION + case UINT32_C(0x000C): // FORM FEED + case UINT32_C(0x000D): // CARRIAGE RETURN + case UINT32_C(0x000E): // SHIFT OUT + case UINT32_C(0x000F): // SHIFT IN + case UINT32_C(0x0010): // DATA LINK ESCAPE + case UINT32_C(0x0011): // DEVICE CONTROL ONE + case UINT32_C(0x0012): // DEVICE CONTROL TWO + case UINT32_C(0x0013): // DEVICE CONTROL THREE + case UINT32_C(0x0014): // DEVICE CONTROL FOUR + case UINT32_C(0x0015): // NEGATIVE ACKNOWLEDGE + case UINT32_C(0x0016): // SYNCHRONOUS IDLE + case UINT32_C(0x0017): // END OF TRANSMISSION BLOCK + case UINT32_C(0x0018): // CANCEL + case UINT32_C(0x0019): // END OF MEDIUM + case UINT32_C(0x001A): // SUBSTITUTE + case UINT32_C(0x001B): // ESCAPE + case UINT32_C(0x001C): // FILE SEPERATOR + case UINT32_C(0x001D): // GROUP SEPERATOR + case UINT32_C(0x001E): // RECORD SEPERATOR + case UINT32_C(0x001F): // UNIT SEPERATOR + case UINT32_C(0x007F): // DELETE + case UINT32_C(0x0080): // PADDING CHARACTER + case UINT32_C(0x0081): // HIGH OCTET PRESET + case UINT32_C(0x0082): // BREAK PERMITTED HERE + case UINT32_C(0x0083): // NO BREAK HERE + case UINT32_C(0x0084): // INDEX + case UINT32_C(0x0085): // NEXT LINE + case UINT32_C(0x0086): // START OF SELECTED AREA + case UINT32_C(0x0087): // END OF SELECTED AREA + case UINT32_C(0x0088): // CHARACTER TABULATION SET + case UINT32_C(0x0089): // CHARACTER TABULATION SET WITH JUSTIFICATION + case UINT32_C(0x008A): // LINE TABULATION SET + case UINT32_C(0x008B): // PARTIAL LINE FORWARD + case UINT32_C(0x008C): // PARTIAL LINE BACKWARD + case UINT32_C(0x008D): // REVERSE LINE FEED + case UINT32_C(0x008E): // SINGLE SHIFT TWO + case UINT32_C(0x008F): // SINGLE SHIFT THREE + case UINT32_C(0x0090): // DEVICE CONTROL STRING + case UINT32_C(0x0091): // PRIVATE USE ONE + case UINT32_C(0x0092): // PRIVATE USE TWO + case UINT32_C(0x0093): // SET TRANSMIT STATE + case UINT32_C(0x0094): // CANCEL CHARACTER + case UINT32_C(0x0095): // MESSAGE WAITING + case UINT32_C(0x0096): // START OF GUARDED AREA + case UINT32_C(0x0097): // END OF GUARDED AREA + case UINT32_C(0x0098): // START OF STRING + case UINT32_C(0x0099): // SINGLE GRAPHIC CHARACTER INTRODUCER + case UINT32_C(0x009A): // SINGLE CHARACTER INTRODUCER + case UINT32_C(0x009B): // CONTROL SEQUENCE INTRODUCER + case UINT32_C(0x009C): // STRING TERMINATOR + case UINT32_C(0x009D): // OPERATING SYSTEM COMMAND + case UINT32_C(0x009E): // PRIVACY MESSAGE + case UINT32_C(0x009F): // APPLICATION PROGRAM COMMAND + case UINT32_C(0x00A0): // NO-BREAK SPACE + case UINT32_C(0x2028): // LINE SEPERATOR + case UINT32_C(0x2029): // PARAGRAPH SEPERATOR + case UINT32_C(0x202D): // LEFT-TO-RIGHT OVERRIDE + case UINT32_C(0x202E): // RIGHT-TO-LEFT OVERRIDE + case UINT32_C(0x2068): // FIRST STRONG ISOLATE + case UINT32_C(0x2069): // POP DIRECTIONAL ISOLATE + return true; + } +}
\ No newline at end of file diff --git a/u8c/source/character/is_hexadecimal_numeric.c b/u8c/source/character/is_hexadecimal_numeric.c new file mode 100644 index 0000000..218ee17 --- /dev/null +++ b/u8c/source/character/is_hexadecimal_numeric.c @@ -0,0 +1,52 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> + +#include <stdbool.h> +#include <stdint.h> + +bool u8c_is_hexadecimal_numeric(uint_least32_t const code_point) { + switch(code_point) { + default: + return false; + + case UINT32_C(0x0030): // DIGIT ZERO + case UINT32_C(0x0031): // DIGIT ONE + case UINT32_C(0x0032): // DIGIT TWO + case UINT32_C(0x0033): // DIGIT THREE + case UINT32_C(0x0034): // DIGIT FOUR + case UINT32_C(0x0035): // DIGIT FIVE + case UINT32_C(0x0036): // DIGIT SIX + case UINT32_C(0x0037): // DIGIT SEVEN + case UINT32_C(0x0038): // DIGIT EIGHT + case UINT32_C(0x0039): // DIGIT NINE + case UINT32_C(0x0041): // LATIN CAPITAL LETTER A + case UINT32_C(0x0042): // LATIN CAPITAL LETTER B + case UINT32_C(0x0043): // LATIN CAPITAL LETTER C + case UINT32_C(0x0044): // LATIN CAPITAL LETTER D + case UINT32_C(0x0045): // LATIN CAPITAL LETTER E + case UINT32_C(0x0046): // LATIN CAPITAL LETTER F + return true; + } +} diff --git a/u8c/source/character/is_majuscule.c b/u8c/source/character/is_majuscule.c new file mode 100644 index 0000000..cc27858 --- /dev/null +++ b/u8c/source/character/is_majuscule.c @@ -0,0 +1,151 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> + +#include <stdbool.h> +#include <stdint.h> + +bool u8c_is_majuscule(uint_least32_t const code_point) { + switch(code_point) { + default: + return false; + + case UINT32_C(0x0041): // LATIN CAPITAL LETTER A + case UINT32_C(0x0042): // LATIN CAPITAL LETTER B + case UINT32_C(0x0043): // LATIN CAPITAL LETTER C + case UINT32_C(0x0044): // LATIN CAPITAL LETTER D + case UINT32_C(0x0045): // LATIN CAPITAL LETTER E + case UINT32_C(0x0046): // LATIN CAPITAL LETTER F + case UINT32_C(0x0047): // LATIN CAPITAL LETTER G + case UINT32_C(0x0048): // LATIN CAPITAL LETTER H + case UINT32_C(0x0049): // LATIN CAPITAL LETTER I + case UINT32_C(0x004A): // LATIN CAPITAL LETTER J + case UINT32_C(0x004B): // LATIN CAPITAL LETTER K + case UINT32_C(0x004C): // LATIN CAPITAL LETTER L + case UINT32_C(0x004D): // LATIN CAPITAL LETTER M + case UINT32_C(0x004E): // LATIN CAPITAL LETTER N + case UINT32_C(0x004F): // LATIN CAPITAL LETTER O + case UINT32_C(0x0050): // LATIN CAPITAL LETTER P + case UINT32_C(0x0051): // LATIN CAPITAL LETTER Q + case UINT32_C(0x0052): // LATIN CAPITAL LETTER R + case UINT32_C(0x0053): // LATIN CAPITAL LETTER S + case UINT32_C(0x0054): // LATIN CAPITAL LETTER T + case UINT32_C(0x0055): // LATIN CAPITAL LETTER U + case UINT32_C(0x0056): // LATIN CAPITAL LETTER V + case UINT32_C(0x0057): // LATIN CAPITAL LETTER Y + case UINT32_C(0x0058): // LATIN CAPITAL LETTER X + case UINT32_C(0x0059): // LATIN CAPITAL LETTER Y + case UINT32_C(0x005A): // LATIN CAPITAL LETTER Z + case UINT32_C(0x005B): // LATIN CAPITAL LETTER A WITH GRAVE + case UINT32_C(0x00C1): // LATIN CAPITAL LETTER A WITH ACUTE + case UINT32_C(0x00C2): // LATIN CAPITAL LETTER A WITH CIRCUMFLEX + case UINT32_C(0x00C3): // LATIN CAPITAL LETTER A WITH TILDE + case UINT32_C(0x00C4): // LATIN CAPITAL LETTER A WITH DIAERESIS + case UINT32_C(0x00C5): // LATIN CAPITAL LETTER A WITH RING ABOVE + case UINT32_C(0x00C6): // LATIN CAPITAL LETTER AE + case UINT32_C(0x00C7): // LATIN CAPITAL LETTER C WITH CEDILLA + case UINT32_C(0x00C8): // LATIN CAPITAL LETTER E WITH GRAVE + case UINT32_C(0x00C9): // LATIN CAPITAL LETTER E WITH ACUTE + case UINT32_C(0x00CA): // LATIN CAPITAL LETTER E WITH CIRCUMFLEX + case UINT32_C(0x00CB): // LATIN CAPITAL LETTER E WITH DIAERESIS + case UINT32_C(0x00CC): // LATIN CAPITAL LETTER I WITH GRAVE + case UINT32_C(0x00CD): // LATIN CAPITAL LETTER I WITH ACUTE + case UINT32_C(0x00CE): // LATIN CAPITAL LETTER I WITH CIRCUMFLEX + case UINT32_C(0x00CF): // LATIN CAPITAL LETTER I WITH DIAERESIS + case UINT32_C(0x00D0): // LATIN CAPITAL LETTER ETH + case UINT32_C(0x00D1): // LATIN CAPITAL LETTER N WITH TILDE + case UINT32_C(0x00D2): // LATIN CAPITAL LETTER O WITH GRAVE + case UINT32_C(0x00D3): // LATIN CAPITAL LETTER O WITH ACUTE + case UINT32_C(0x00D4): // LATIN CAPITAL LETTER O WITH CIRCUMFLEX + case UINT32_C(0x00D5): // LATIN CAPITAL LETTER O WITH TILDE + case UINT32_C(0x00D6): // LATIN CAPITAL LETTER O WITH DIAERESIS + case UINT32_C(0x00D8): // LATIN CAPITAL LETTER O WITH STROKE + case UINT32_C(0x00D9): // LATIN CAPITAL LETTER U WITH GRAVE + case UINT32_C(0x00DA): // LATIN CAPITAL LETTER U WITH STROKE + case UINT32_C(0x00DB): // LATIN CAPITAL LETTER U WITH CIRCUMFLEX + case UINT32_C(0x00DC): // LATIN CAPITAL LETTER U WITH DIAERESIS + case UINT32_C(0x00DD): // LATIN CAPITAL LETTER Y WITH ACUTE + case UINT32_C(0x00DE): // LATIN CAPITAL LETTER THORN + case UINT32_C(0x0100): // LATIN CAPITAL LETTER A WITH MACRON + case UINT32_C(0x0102): // LATIN CAPITAL LETTER A WITH BREVE + case UINT32_C(0x0104): // LATIN CAPITAL LETTER A WITH OGONEK + case UINT32_C(0x0106): // LATIN CAPITAL LETTER C WITH ACUTE + case UINT32_C(0x0108): // LATIN CAPITAL LETTER C WITH CIRCUMFLEX + case UINT32_C(0x010A): // LATIN CAPITAL LETTER C WITH DOT ABOVE + case UINT32_C(0x010C): // LATIN CAPITAL LETTER C WITH CARON + case UINT32_C(0x010E): // LATIN CAPITAL LETTER D WITH CARON + case UINT32_C(0x0110): // LATIN CAPITAL LETTER D WITH STROKE + case UINT32_C(0x0112): // LATIN CAPITAL LETTER E WITH MACRON + case UINT32_C(0x0114): // LATIN CAPITAL LETTER E WITH BREVE + case UINT32_C(0x0116): // LATIN CAPITAL LETTER E WITH DOT ABOVE + case UINT32_C(0x0118): // LATIN CAPITAL LETTER E WITH OGONEK + case UINT32_C(0x011A): // LATIN CAPITAL LETTER E WITH CARON + case UINT32_C(0x011C): // LATIN CAPITAL LETTER G WITH CIRCUMFLEX + case UINT32_C(0x014A): // LATIN CAPITAL LETTER ENG + case UINT32_C(0x0152): // LATIN CAPITAL LIGATURE OE + case UINT32_C(0x0186): // LATIN CAPITAL LETTER OPEN O + case UINT32_C(0x018E): // LATIN CAPITAL LETTER REVERSED E + case UINT32_C(0x018F): // LATIN CAPITAL LETTER SCHWA + case UINT32_C(0x0190): // LATIN CAPITAL LETTER OPEN E + case UINT32_C(0x0194): // LATIN CAPITAL LETTER GAMMA + case UINT32_C(0x0196): // LATIN CAPITAL LETTER IOTA + case UINT32_C(0x01A9): // LATIN CAPITAL LETTER ESH + case UINT32_C(0x01B1): // LATIN CAPITAL LETTER UPSILON + case UINT32_C(0x01B2): // LATIN CAPITAL LETTER V WITH HOOk + case UINT32_C(0x01B7): // LATIN CAPITAL LETTER EZH + case UINT32_C(0x01F7): // LATIN CAPITAL LETTER WYNN + case UINT32_C(0x021C): // LATIN CAPITAL LETTER YOGH + case UINT32_C(0x0241): // LATIN CAPITAL LETTER GLOTTAL STOP + case UINT32_C(0x0391): // GREEK CAPITAL LETTER ALPHA + case UINT32_C(0x0392): // GREEK CAPITAL LETTER BETA + case UINT32_C(0x0393): // GREEK CAPITAL LETTER GAMMA + case UINT32_C(0x0394): // GREEK CAPITAL LETTER DELTA + case UINT32_C(0x0395): // GREEK CAPITAL LETTER EPSILON + case UINT32_C(0x0396): // GREEK CAPITAL LETTER ZETA + case UINT32_C(0x0397): // GREEK CAPITAL LETTER ETA + case UINT32_C(0x0398): // GREEK CAPITAL LETTER THETA + case UINT32_C(0x0399): // GREEK CAPITAL LETTER IOTA + case UINT32_C(0x039A): // GREEK CAPITAL LETTER KAPPA + case UINT32_C(0x039B): // GREEK CAPITAL LETTER LAMBDA + case UINT32_C(0x039C): // GREEK CAPITAL LETTER MU + case UINT32_C(0x039D): // GREEK CAPITAL LETTER NU + case UINT32_C(0x039E): // GREEK CAPITAL LETTER XI + case UINT32_C(0x039F): // GREEK CAPITAL LETTER OMICRON + case UINT32_C(0x03A0): // GREEK CAPITAL LETTER PI + case UINT32_C(0x03A1): // GREEK CAPITAL LETTER RHO + case UINT32_C(0x03A3): // GREEK CAPITAL LETTER SIGMA + case UINT32_C(0x03A4): // GREEK CAPITAL LETTER TAU + case UINT32_C(0x03A5): // GREEK CAPITAL LETTER UPSILON + case UINT32_C(0x03A6): // GREEK CAPITAL LETTER PHI + case UINT32_C(0x03A7): // GREEK CAPITAL LETTER CHI + case UINT32_C(0x03A8): // GREEK CAPITAL LETTER PSI + case UINT32_C(0x03A9): // GREEK CAPITAL LETTER OMEGA + case UINT32_C(0x1E9E): // LATIN CAPITAL LETTER SHARP S + case UINT32_C(0x2C6D): // LATIN CAPITAL LETTER ALPHA + case UINT32_C(0xA77D): // LATIN CAPITAL LETTER INSULAR G + case UINT32_C(0xA7B4): // LATIN CAPITAL LETTER BETA + case UINT32_C(0xA7B6): // LATIN CAPITAL LETTER OMEGA + return true; + } +} diff --git a/u8c/source/character/is_minuscule.c b/u8c/source/character/is_minuscule.c new file mode 100644 index 0000000..463ecb3 --- /dev/null +++ b/u8c/source/character/is_minuscule.c @@ -0,0 +1,191 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> + +#include <stdbool.h> +#include <stdint.h> + +bool u8c_is_minuscule(uint_least32_t const code_point) { + switch(code_point) { + default: + return false; + + case (uint_least32_t)'a': // LATIN SMALL LETTER A + case (uint_least32_t)'b': // LATIN SMALL LETTER B + case (uint_least32_t)'c': // LATIN SMALL LETTER C + case (uint_least32_t)'d': // LATIN SMALL LETTER D + case (uint_least32_t)'e': // LATIN SMALL LETTER E + case (uint_least32_t)'f': // LATIN SMALL LETTER F + case (uint_least32_t)'g': // LATIN SMALL LETTER G + case (uint_least32_t)'h': // LATIN SMALL LETTER H + case (uint_least32_t)'i': // LATIN SMALL LETTER I + case (uint_least32_t)'j': // LATIN SMALL LETTER J + case (uint_least32_t)'k': // LATIN SMALL LETTER K + case (uint_least32_t)'l': // LATIN SMALL LETTER L + case (uint_least32_t)'m': // LATIN SMALL LETTER M + case (uint_least32_t)'n': // LATIN SMALL LETTER N + case (uint_least32_t)'o': // LATIN SMALL LETTER O + case (uint_least32_t)'p': // LATIN SMALL LETTER P + case (uint_least32_t)'q': // LATIN SMALL LETTER Q + case (uint_least32_t)'r': // LATIN SMALL LETTER R + case (uint_least32_t)'s': // LATIN SMALL LETTER S + case (uint_least32_t)'t': // LATIN SMALL LETTER T + case (uint_least32_t)'u': // LATIN SMALL LETTER U + case (uint_least32_t)'v': // LATIN SMALL LETTER V + case (uint_least32_t)'w': // LATIN SMALL LETTER W + case (uint_least32_t)'x': // LATIN SMALL LETTER X + case (uint_least32_t)'y': // LATIN SMALL LETTER Y + case (uint_least32_t)'z': // LATIN SMALL LETTER Z + case UINT32_C(0x00DF): // LATIN SMALL LETTER SHARP S + case UINT32_C(0x00E0): // LATIN SMALL LETTER A WITH GRAVE + case UINT32_C(0x00E1): // LATIN SMALL LETTER A WITH ACUTE + case UINT32_C(0x00E2): // LATIN SMALL LETTER A WITH CIRCUMFLEX + case UINT32_C(0x00E3): // LATIN SMALL LETTER A WITH TILDE + case UINT32_C(0x00E4): // LATIN SMALL LETTER A WITH DIAERESIS + case UINT32_C(0x00E5): // LATIN SMALL LETTER A WITH RING ABOVE + case UINT32_C(0x00E6): // LATIN SMALL LETTER AE + case UINT32_C(0x00E7): // LATIN SMALL LETTER C WITH CEDILLA + case UINT32_C(0x00E8): // LATIN SMALL LETTER E WITH GRAVE + case UINT32_C(0x00E9): // LATIN SMALL LETTER E WITH ACUTE + case UINT32_C(0x00EA): // LATIN SMALL LETTER E WITH CIRCUMFLEX + case UINT32_C(0x00EB): // LATIN SMALL LETTER E WITH DIAERESIS + case UINT32_C(0x00EC): // LATIN SMALL LETTER I WITH GRAVE + case UINT32_C(0x00ED): // LATIN SMALL LETTER I WITH ACUTE + case UINT32_C(0x00EE): // LATIN SMALL LETTER I WITH CIRCUMFLEX + case UINT32_C(0x00EF): // LATIN SMALL LETTER I WITH DIAERESIS + case UINT32_C(0x00F0): // LATIN SMALL LETTER ETH + case UINT32_C(0x00F1): // LATIN SMALL LETTER N WITH TILDE + case UINT32_C(0x00F2): // LATIN SMALL LETTER O WITH GRAVE + case UINT32_C(0x00F3): // LATIN SMALL LETTER O WITH ACUTE + case UINT32_C(0x00F4): // LATIN SMALL LETTER O WITH CIRCUMFLEX + case UINT32_C(0x00F5): // LATIN SMALL LETTER O WITH TILDE + case UINT32_C(0x00F6): // LATIN SMALL LETTER O WITH DIAERESIS + case UINT32_C(0x00F8): // LATIN SMALL LETTER O WITH STROKE + case UINT32_C(0x00F9): // LATIN SMALL LETTER U WITH GRAVE + case UINT32_C(0x00FA): // LATIN SMALL LETTER U WITH ACUTE + case UINT32_C(0x00FB): // LATIN SMALL LETTER U WITH CIRCUMFLEX + case UINT32_C(0x00FC): // LATIN SMALL LETTER U WITH DIAERESIS + case UINT32_C(0x00FD): // LATIN SMALL LETTER Y WITH ACUTE + case UINT32_C(0x00FE): // LATIN SMALL LETTER THORN + case UINT32_C(0x00FF): // LATIN SMALL LETTER Y WITH DIAERESIS + case UINT32_C(0x0105): // LATIN SMALL LETTER A WITH OGONEK + case UINT32_C(0x0107): // LATIN SMALL LETTER C WITH ACUTE + case UINT32_C(0x010D): // LATIN SMALL LETTER C WITH CARON + case UINT32_C(0x010F): // LATIN SMALL LETTER D WITH CARON + case UINT32_C(0x0119): // LATIN SMALL LETTER E WITH OGONEK + case UINT32_C(0x011B): // LATIN SMALL LETTER E WITH CARON + case UINT32_C(0x011F): // LATIN SMALL LETTER G WITH BREVE + case UINT32_C(0x0131): // LATIN SMALL LETTER DOTLESS I + case UINT32_C(0x0133): // LATIN SMALL LIGATURE IJ + case UINT32_C(0x0138): // LATIN SMALL LETTER KRA + case UINT32_C(0x0142): // LATIN SMALL LETTER L WITH STROKE + case UINT32_C(0x0144): // LATIN SMALL LETTER N WITH ACUTE + case UINT32_C(0x0148): // LATIN SMALL LETTER N WITH CARON + case UINT32_C(0x014B): // LATIN SMALL LETTER ENG + case UINT32_C(0x0153): // LATIN SMALL LIGATURE OE + case UINT32_C(0x0159): // LATIN SMALL LETTER R WITH CARON + case UINT32_C(0x015B): // LATIN SMALL LETTER S WITH ACUTE + case UINT32_C(0x015F): // LATIN SMALL LETTER S WITH CEDILLA + case UINT32_C(0x0161): // LATIN SMALL LETTER S WITH CARON + case UINT32_C(0x0165): // LATIN SMALL LETTER T WITH CARON + case UINT32_C(0x016F): // LATIN SMALL LETTER U WITH RING ABOVE + case UINT32_C(0x017A): // LATIN SMALL LETTER Z WITH ACUTE + case UINT32_C(0x017C): // LATIN SMALL LETTER Z WITH DOT ABOVE + case UINT32_C(0x017E): // LATIN SMALL LETTER Z WITH CARON + case UINT32_C(0x01BF): // LATIN LETTER WYNN + case UINT32_C(0x01DD): // LATIN SMALL LETTER TURNED E + case UINT32_C(0x021D): // LATIN SMALL LETTER YOGH + case UINT32_C(0x0242): // LATIN SMALL LETTER GLOTTAL STOP + case UINT32_C(0x0250): // LATIN SMALL LETTER TURNED A + case UINT32_C(0x0251): // LATIN SMALL LETTER ALPHA + case UINT32_C(0x0252): // LATIN SMALL LETTER TURNED ALPHA + case UINT32_C(0x0253): // LATIN SMALL LETTER B WITH HOOk + case UINT32_C(0x0254): // LATIN SMALL LETTER OPEN O + case UINT32_C(0x0255): // LATIN SMALL LETTER C WITH CURL + case UINT32_C(0x0256): // LATIN SMALL LETTER D WITH TAIL + case UINT32_C(0x0257): // LATIN SMALL LETTER D WITH HOOk + case UINT32_C(0x0258): // LATIN SMALL LETTER REVERSED E + case UINT32_C(0x0259): // LATIN SMALL LETTER SCHWA + case UINT32_C(0x025A): // LATIN SMALL LETTER SCHWA WITH HOOK + case UINT32_C(0x025B): // LATIN SMALL LETTER OPEN E + case UINT32_C(0x025C): // LATIN SMALL LETTER REVERSED OPEN E + case UINT32_C(0x025D): // LATIN SMALL LETTER REVERSED OPEN E WITH HOOK + case UINT32_C(0x025E): // LATIN SMALL LETTER CLOSED REVERSED OPEN E + case UINT32_C(0x025F): // LATIN SMALL LETTER DOTLESS J WITH STROKE + case UINT32_C(0x0260): // LATIN SMALL LETTER G WITH HOOK + case UINT32_C(0x0261): // LATIN SMALL LETTER SCRIPT G + case UINT32_C(0x0262): // LATIN LETTER SMALL CAPITAL G + case UINT32_C(0x0263): // LATIN SMALL LETTER GAMMA + case UINT32_C(0x0264): // LATIN SMALL LETTER RAMS HORN + case UINT32_C(0x0265): // LATIN SMALL LETTER TURNED H + case UINT32_C(0x0266): // LATIN SMALL LETTER H WITH HOOK + case UINT32_C(0x0267): // LATIN SMALL LETTER HENG WITH HOOK + case UINT32_C(0x0268): // LATIN SMALL LETTER I WITH STROKE + case UINT32_C(0x0269): // LATIN SMALL LETTER IOTA + case UINT32_C(0x026A): // LATIN LETTER SMALL CAPITAL I + case UINT32_C(0x026B): // LATIN SMALL LETTER L WITH MIDDLE TILDE + case UINT32_C(0x026C): // LATIN SMALL LETTER L WITH BELT + case UINT32_C(0x026D): // LATIN SMALL LETTER L WITH RETROFLEX HOOK + case UINT32_C(0x026E): // LATIN SMALL LETTER LEZH + case UINT32_C(0x026F): // LATIN SMALL LETTER TURNED M + case UINT32_C(0x0270): // LATIN SMALL LETTER TURNED M WITH LONG LEG + case UINT32_C(0x0271): // LATIN SMALL LETTER M WITH HOOK + case UINT32_C(0x0272): // LATIN SMALL LETTER N WITH LEFT HOOK + case UINT32_C(0x0273): // LATIN SMALL LETTER N WITH RETROFLEX HOOK + case UINT32_C(0x0283): // LATIN SMALL LETTER ESH + case UINT32_C(0x028A): // LATIN SMALL LETTER UPSILON + case UINT32_C(0x028B): // LATIN SMALL LETTER V WITH HOOK + case UINT32_C(0x0292): // LATIN SMALL LETTER EZH + case UINT32_C(0x0294): // LATIN SMALL LETTER GLOTTAL STOP + case UINT32_C(0x03B1): // GREEK SMALL LETTER ALPHA + case UINT32_C(0x03B2): // GREEK SMALL LETTER BETA + case UINT32_C(0x03B3): // GREEK SMALL LETTER GAMMA + case UINT32_C(0x03B4): // GREEK SMALL LETTER DELTA + case UINT32_C(0x03B5): // GREEK SMALL LETTER EPSILON + case UINT32_C(0x03B6): // GREEK SMALL LETTER ZETA + case UINT32_C(0x03B7): // GREEK SMALL LETTER ETA + case UINT32_C(0x03B8): // GREEK SMALL LETTER THETA + case UINT32_C(0x03B9): // GREEK SMALL LETTER IOTA + case UINT32_C(0x03BA): // GREEK SMALL LETTER KAPPA + case UINT32_C(0x03BB): // GREEK SMALL LETTER LAMBDA + case UINT32_C(0x03BC): // GREEK SMALL LETTER MU + case UINT32_C(0x03BD): // GREEK SMALL LETTER NU + case UINT32_C(0x03BE): // GREEK SMALL LETTER XI + case UINT32_C(0x03BF): // GREEK SMALL LETTER OMICRON + case UINT32_C(0x03C0): // GREEK SMALL LETTER PI + case UINT32_C(0x03C1): // GREEK SMALL LETTER RHO + case UINT32_C(0x03C2): // GREEK SMALL LETTER FINAL SIGMA + case UINT32_C(0x03C3): // GREEK SMALL LETTER SIGMA + case UINT32_C(0x03C4): // GREEK SMALL LETTER TAU + case UINT32_C(0x03C5): // GREEK SMALL LETTER UPSILON + case UINT32_C(0x03C6): // GREEK SMALL LETTER PHI + case UINT32_C(0x03C7): // GREEK SMALL LETTER CHI + case UINT32_C(0x03C8): // GREEK SMALL LETTER PSI + case UINT32_C(0x03C9): // GREEK SMALL LETTER OMEGA + case UINT32_C(0x1D79): // LATIN SMALL LETTER INSULAR G + case UINT32_C(0xA7B7): // LATIN SMALL LETTER OMEGA + case UINT32_C(0xFB00): // LATIN SMALL LIGATURE FF + return true; + } +} diff --git a/u8c/source/character/is_numeric.c b/u8c/source/character/is_numeric.c new file mode 100644 index 0000000..a796a0e --- /dev/null +++ b/u8c/source/character/is_numeric.c @@ -0,0 +1,48 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> + +#include <stdbool.h> +#include <stdint.h> + +bool u8c_is_numeric(uint_least32_t const code_point) { + switch(code_point) { + default: + return false; + + case UINT32_C(0x0030): // DIGIT ZERO + case UINT32_C(0x0031): // DIGIT ONE + case UINT32_C(0x0032): // DIGIT TWO + case UINT32_C(0x0033): // DIGIT THREE + case UINT32_C(0x0034): // DIGIT FOUR + case UINT32_C(0x0035): // DIGIT FIVE + case UINT32_C(0x0036): // DIGIT SIX + case UINT32_C(0x0037): // DIGIT SEVEN + case UINT32_C(0x0038): // DIGIT EIGHT + case UINT32_C(0x0039): // DIGIT NINE + case UINT32_C(0x218A): // TURNED DIGIT TWO + case UINT32_C(0x218B): // TURNED DIGIT THREE + return true; + } +}
\ No newline at end of file diff --git a/u8c/source/character/is_punctuation.c b/u8c/source/character/is_punctuation.c new file mode 100644 index 0000000..8834fd1 --- /dev/null +++ b/u8c/source/character/is_punctuation.c @@ -0,0 +1,186 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> + +#include <stdbool.h> +#include <stdint.h> + +bool u8c_is_punctuation(uint_least32_t const code_point) { + switch(code_point) { + default: + return false; + + case UINT32_C(0x0021): // EXCLAMATION MARK + case UINT32_C(0x0022): // QUOTATION MARK + case UINT32_C(0x0023): // NUMBER SIGN + case UINT32_C(0x0024): // DOLLAR SIGN + case UINT32_C(0x0025): // PERCENT SIGN + case UINT32_C(0x0026): // AMPERSAND + case UINT32_C(0x0027): // APOSTROPHE + case UINT32_C(0x0028): // LEFT PARANTHESIS + case UINT32_C(0x0029): // RIGHT PARANTHESIS + case UINT32_C(0x002A): // ASTERISK + case UINT32_C(0x002B): // PLUS SIGN + case UINT32_C(0x002C): // COMMA + case UINT32_C(0x002D): // HYPHEN-MINUS + case UINT32_C(0x002E): // FULL STOP + case UINT32_C(0x002F): // SOLIDUS + case UINT32_C(0x003A): // COLON + case UINT32_C(0x003B): // SEMICOLON + case UINT32_C(0x003C): // LESS-THAN SIGN + case UINT32_C(0x003D): // EQUALS SIGN + case UINT32_C(0x003E): // GREATER-THAN SIGN + case UINT32_C(0x003F): // QUESTION MARK + case UINT32_C(0x0040): // COMMERCIAL AT + case UINT32_C(0x005B): // LEFT SQUARE BRACKET + case UINT32_C(0x005C): // REVERSE SOLIDUS + case UINT32_C(0x005D): // RIGHT SQUARE BRACKET + case UINT32_C(0x005E): // CIRCUMFLEX ACCENT + case UINT32_C(0x005F): // LOW LINE + case UINT32_C(0x0060): // GRAVE ACCENT + case UINT32_C(0x007B): // LEFT CURLY BRACKET + case UINT32_C(0x007C): // VERTICAL LINE + case UINT32_C(0x007D): // RIGHT CURLY BRACKET + case UINT32_C(0x007E): // TILDE + case UINT32_C(0x00A1): // INVERT EXCLAMATION MARK + case UINT32_C(0x00A2): // CENT SIGN + case UINT32_C(0x00A3): // POUND SIGN + case UINT32_C(0x00A4): // CURRENCY SIGN + case UINT32_C(0x00A5): // YEN SIGN + case UINT32_C(0x00A6): // BROKEN BAR + case UINT32_C(0x00A7): // SECTION SIGN + case UINT32_C(0x00A8): // DIAERESIS + case UINT32_C(0x00A9): // COPYRIGHT SIGN + case UINT32_C(0x00AA): // FEMININE ORDINAL INDICATOR + case UINT32_C(0x00AB): // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + case UINT32_C(0x00AC): // NOT SIGN + case UINT32_C(0x00AE): // REGISTERED SIGN + case UINT32_C(0x00AF): // MACRON + case UINT32_C(0x00B0): // DEGREE SIGN + case UINT32_C(0x00B1): // PLUS MINUS SYMBOL + case UINT32_C(0x00B2): // SUPERSCRIPT TWO + case UINT32_C(0x00B3): // SUPERSCRIPT THREE + case UINT32_C(0x00B4): // ACUTE ACCENT + case UINT32_C(0x00B5): // MICRO SIGN + case UINT32_C(0x00B6): // PILCROW SIGN + case UINT32_C(0x00B7): // MIDDLE DOT + case UINT32_C(0x00B8): // CEDILLA + case UINT32_C(0x00B9): // SUPERSCRIPT ONE + case UINT32_C(0x00BA): // MASCULINE ORDINAL INDICATOR + case UINT32_C(0x00BB): // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + case UINT32_C(0x00BC): // VULGAR FRACTION ONE QUARTER + case UINT32_C(0x00BD): // VULGAR FRACTION ONE HALF + case UINT32_C(0x00BE): // VULGAR FRACTION THREE QUARTERS + case UINT32_C(0x00BF): // INVERT QUESTION MARK + case UINT32_C(0x00D7): // MULTIPLICATION SIGN + case UINT32_C(0x00F7): // DIVISION SIGN + case UINT32_C(0x16EB): // RUNIC SINGLE PUNCTUATION + case UINT32_C(0x16EC): // RUNIC MULTIPLE PUNCTUATION + case UINT32_C(0x16ED): // RUNIC CROSS PUNCTUATION + case UINT32_C(0x2010): // HYPHEN + case UINT32_C(0x2013): // EN DASH + case UINT32_C(0x2014): // EM DASH + case UINT32_C(0x2018): // LEFT SINGLE QUOTATION MARK + case UINT32_C(0x2019): // RIGHT SINGLE QUOTATION MARK + case UINT32_C(0x201C): // LEFT DOUBLE QUOTATION MARK + case UINT32_C(0x201D): // RIGHT DOUBLE QUOTATION MARK + case UINT32_C(0x2026): // HORIZONTAL ELLIPSIS + case UINT32_C(0x2030): // PER MILLE SIGN + case UINT32_C(0x2031): // PER TEN THOUSAND SIGN + case UINT32_C(0x2032): // PRIME + case UINT32_C(0x2033): // DOUBLE PRIME + case UINT32_C(0x2034): // TRIPLE PRIME + case UINT32_C(0x2035): // REVERSED PRIME + case UINT32_C(0x2036): // REVERSED DOUBLE PRIME + case UINT32_C(0x2037): // REVERSED TRIPLE PRIME + case UINT32_C(0x203C): // DOUBLE EXCLAMATION MARK + case UINT32_C(0x203D): // INTERROBANG + case UINT32_C(0x2047): // DOUBLE QUOTATION MARK + case UINT32_C(0x2048): // QUESTION EXCLAMATION MARK + case UINT32_C(0x2049): // EXCLAMATION QUESTION MARK + case UINT32_C(0x20A3): // FRENCH FRANC SIGN + case UINT32_C(0x20A4): // LIRA SIGN + case UINT32_C(0x20A8): // RUPEE SIGN + case UINT32_C(0x20A9): // WON SIGN + case UINT32_C(0x20AC): // EURO SIGN + case UINT32_C(0x20B9): // INDIAN RUPEE SIGN + case UINT32_C(0x20BF): // BITCOIN SIGN + case UINT32_C(0x2103): // DEGREE CELSIUS + case UINT32_C(0x2107): // EULER CONSTANT + case UINT32_C(0x2109): // DEGREE FAHRENHEIT + case UINT32_C(0x210E): // PLANCK CONSTANT + case UINT32_C(0x2117): // SOUND RECORDING COPYRIGHT + case UINT32_C(0x2122): // TRADE MARK SIGN + case UINT32_C(0x2125): // OUNCE SIGN + case UINT32_C(0x2126): // OHM SIGN + case UINT32_C(0x212A): // KELVIN SIGN + case UINT32_C(0x214D): // AKTIESELSKAB + case UINT32_C(0x2205): // EMPTY SET + case UINT32_C(0x2212): // MINUS SIGN + case UINT32_C(0x221A): // SQUARE ROOT + case UINT32_C(0x221B): // CUBE ROOT + case UINT32_C(0x221C): // FOURTH ROOT + case UINT32_C(0x221E): // INFINITY + case UINT32_C(0x2228): // LOGICAL OR + case UINT32_C(0x2248): // ALMOST EQUAL TO + case UINT32_C(0x2260): // NOT EQUAL TO + case UINT32_C(0x2264): // LESS-THAN OR EQUAL TO + case UINT32_C(0x2265): // GREATER-THAN OR EQUAL TO + case UINT32_C(0x2609): // SUN + case UINT32_C(0x263F): // MERCURY + case UINT32_C(0x2640): // FEMALE SIGN + case UINT32_C(0x2641): // EARTH + case UINT32_C(0x2642): // MALE SIGN + case UINT32_C(0x2643): // JUPITER + case UINT32_C(0x2644): // SATURN + case UINT32_C(0x2645): // URANUS + case UINT32_C(0x2646): // NEPTUNE + case UINT32_C(0x2647): // PLUTO + case UINT32_C(0x26A2): // DOUBLED FEMALE SIGN + case UINT32_C(0x26A3): // DOUBLED MALE SIGN + case UINT32_C(0x26A4): // INTERLOCKED FEMALE AND MALE SIGN + case UINT32_C(0x26A5): // MALE AND FEMALE SIGN + case UINT32_C(0x26B3): // CERES + case UINT32_C(0x26B4): // PALLAS + case UINT32_C(0x26B5): // JUNO + case UINT32_C(0x26B6): // VESTA + case UINT32_C(0x26B7): // CHIRON + case UINT32_C(0x2BD8): // PROSERPINA + case UINT32_C(0x2BD9): // ASTRAEA + case UINT32_C(0x2BDA): // HYGIEA + case UINT32_C(0x2BDB): // PHOLOS + case UINT32_C(0x2BDC): // NESSUS + case UINT32_C(0x2E2E): // INVERTED QUESTION MARK + case UINT32_C(0x33D7): // SQUARE PH + case UINT32_C(0xFDFC): // RIAL SIGN + case UINT32_C(0x0001F10D): // CIRCLED ZERO WITH SLASH + case UINT32_C(0x0001F10E): // CIRCLED ANTICKLOCKWISE ARROW + case UINT32_C(0x0001F10F): // CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH + case UINT32_C(0x0001F12F): // COPYLEFT SYMBOL + case UINT32_C(0x0001F16D): // CIRCLED CC + case UINT32_C(0x0001F16E): // CIRCLED C WITH OVERLAID BACKSLASH + case UINT32_C(0x0001F16F): // CIRCLED HUMAN FIGURE + return true; + } +} diff --git a/u8c/source/character/is_surrogate.c b/u8c/source/character/is_surrogate.c new file mode 100644 index 0000000..04722bf --- /dev/null +++ b/u8c/source/character/is_surrogate.c @@ -0,0 +1,31 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> + +#include <stdbool.h> +#include <stdint.h> + +bool u8c_is_surrogate(uint_least32_t const code_point) { + return code_point >= UINT32_C(0xD800) && code_point <= UINT32_C(0xDFFF); +} diff --git a/u8c/source/character/is_whitespace.c b/u8c/source/character/is_whitespace.c new file mode 100644 index 0000000..3f77561 --- /dev/null +++ b/u8c/source/character/is_whitespace.c @@ -0,0 +1,48 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> + +#include <stdbool.h> +#include <stdint.h> + +bool u8c_is_whitespace(uint_least32_t const code_point) { + switch(code_point) { + default: + return false; + + case UINT32_C(0x0009): // HORIZONTAL TABULATION + case UINT32_C(0x000A): // NEW LINE + case UINT32_C(0x000B): // VERTICAL TABULATION + case UINT32_C(0x000C): // FORM FEED + case UINT32_C(0x000D): // CARRIAGE RETURN + case UINT32_C(0x0020): // SPACE + case UINT32_C(0x00A0): // NO-BREAK SPACE + case UINT32_C(0x2002): // EN SPACE + case UINT32_C(0x2003): // EM SPACE + case UINT32_C(0x2008): // PUNCTUATION SPACE + case UINT32_C(0x2009): // THIN SPACE + case UINT32_C(0x203F): // NARROW NO-BREAK SPACE + return true; + } +} diff --git a/u8c/source/character/unicode_name.c b/u8c/source/character/unicode_name.c new file mode 100644 index 0000000..e520b8c --- /dev/null +++ b/u8c/source/character/unicode_name.c @@ -0,0 +1,4026 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> + +#include <stdint.h> +#include <string.h> + +static char const* u8c_get_name(uint_least32_t code_point); + +size_t u8c_unicode_name(char* const restrict buffer, uint_least32_t const code_point) { + char const* const restrict string = u8c_get_name(code_point); + + size_t const length = strlen(string); + + if (buffer == NULL) { goto return_length; } + + memcpy(buffer, string, length); + +return_length:; + return length; +} + +static char const* u8c_get_name(uint_least32_t const code_point) { + switch(code_point) { + default: + return "UNDEFINED IN UNICODE"; + + // BASIC LATIN: + case UINT32_C(0x0000): + return "NULL"; + + case UINT32_C(0x0001): + return "START OF HEADING"; + + case UINT32_C(0x0002): + return "START OF TEXT"; + + case UINT32_C(0x0003): + return "END OF TEXT"; + + case UINT32_C(0x0004): + return "END OF TRANSMISSION"; + + case UINT32_C(0x0005): + return "ENQUIRY"; + + case UINT32_C(0x0006): + return "ACKNOWLEDGE"; + + case UINT32_C(0x0007): + return "BELL"; + + case UINT32_C(0x0008): + return "BACKSPACE"; + + case UINT32_C(0x0009): + return "HORIZONTAL TABULATION"; + + case UINT32_C(0x000A): + return "NEW LINE"; + + case UINT32_C(0x000B): + return "VERTICAL TABULATION"; + + case UINT32_C(0x000C): + return "FORM FEED"; + + case UINT32_C(0x000D): + return "CARRIAGE RETURN"; + + case UINT32_C(0x000E): + return "SHIFT OUT"; + + case UINT32_C(0x000F): + return "SHIFT IN"; + + case UINT32_C(0x0010): + return "DATA LINK ESCAPE"; + + case UINT32_C(0x0011): + return "DEVICE CONTROL ONE"; + + case UINT32_C(0x0012): + return "DEVICE CONTROL TWO"; + + case UINT32_C(0x0013): + return "DEVICE CONTROL THREE"; + + case UINT32_C(0x0014): + return "DEVICE CONTROL FOUR"; + + case UINT32_C(0x0015): + return "NEGATIVE ACKNOWLEDGE"; + + case UINT32_C(0x0016): + return "SYNCHRONOUS IDLE"; + + case UINT32_C(0x0017): + return "END OF TRANSMISSION BLOCk"; + + case UINT32_C(0x0018): + return "CANCEL"; + + case UINT32_C(0x0019): + return "END OF MEDIUM"; + + case UINT32_C(0x001A): + return "SUBSTITUTE"; + + case UINT32_C(0x001B): + return "ESCAPE"; + + case UINT32_C(0x001C): + return "FILE SEPERATOR"; + + case UINT32_C(0x001D): + return "GROUP SEPERATOR"; + + case UINT32_C(0x001E): + return "RECORD SEPERATOR"; + + case UINT32_C(0x001F): + return "UNIT SEPERATOR"; + + case UINT32_C(0x0020): + return "SPACE"; + + case UINT32_C(0x0021): + return "EXCLAMATION MARK"; + + case UINT32_C(0x0022): + return "QUOTATION MARK"; + + case UINT32_C(0x0023): + return "NUMBER SIGN"; + + case UINT32_C(0x0024): + return "DOLLAR SIGN"; + + case UINT32_C(0x0025): + return "PERCENT SIGN"; + + case UINT32_C(0x0026): + return "AMPERSAND"; + + case UINT32_C(0x0027): + return "APOSTROPHE"; + + case UINT32_C(0x0028): + return "LEFT PARANTHESIS"; + + case UINT32_C(0x0029): + return "RIGHT PARANTHESIS"; + + case UINT32_C(0x002A): + return "ASTERISK"; + + case UINT32_C(0x002B): + return "PLUS SIGN"; + + case UINT32_C(0x002C): + return "COMMA"; + + case UINT32_C(0x002D): + return "HYPHEN-MINUS"; + + case UINT32_C(0x002E): + return "FULL STOP"; + + case UINT32_C(0x002F): + return "SOLIDUS"; + + case UINT32_C(0x0030): + return "DIGIT ZERO"; + + case UINT32_C(0x0031): + return "DIGIT ONE"; + + case UINT32_C(0x0032): + return "DIGIT TWO"; + + case UINT32_C(0x0033): + return "DIGIT THREE"; + + case UINT32_C(0x0034): + return "DIGIT FOUR"; + + case UINT32_C(0x0035): + return "DIGIT FIVE"; + + case UINT32_C(0x0036): + return "DIGIT SIX"; + + case UINT32_C(0x0037): + return "DIGIT SEVEN"; + + case UINT32_C(0x0038): + return "DIGIT EIGHT"; + + case UINT32_C(0x0039): + return "DIGIT NINE"; + + case UINT32_C(0x003A): + return "COLON"; + + case UINT32_C(0x003B): + return "SEMICOLON"; + + case UINT32_C(0x003C): + return "LESS-THAN SIGN"; + + case UINT32_C(0x003D): + return "EQUALS SIGN"; + + case UINT32_C(0x003E): + return "GREATER-THAN SIGN"; + + case UINT32_C(0x003F): + return "QUESTION MARK"; + + case UINT32_C(0x0040): + return "COMMERCIAL AT"; + + case UINT32_C(0x0041): + return "LATIN CAPITAL LETTER A"; + + case UINT32_C(0x0042): + return "LATIN CAPITAL LETTER B"; + + case UINT32_C(0x0043): + return "LATIN CAPITAL LETTER C"; + + case UINT32_C(0x0044): + return "LATIN CAPITAL LETTER D"; + + case UINT32_C(0x0045): + return "LATIN CAPITAL LETTER E"; + + case UINT32_C(0x0046): + return "LATIN CAPITAL LETTER F"; + + case UINT32_C(0x0047): + return "LATIN CAPITAL LETTER G"; + + case UINT32_C(0x0048): + return "LATIN CAPITAL LETTER H"; + + case UINT32_C(0x0049): + return "LATIN CAPITAL LETTER I"; + + case UINT32_C(0x004A): + return "LATIN CAPITAL LETTER J"; + + case UINT32_C(0x004B): + return "LATIN CAPITAL LETTER K"; + + case UINT32_C(0x004C): + return "LATIN CAPITAL LETTER L"; + + case UINT32_C(0x004D): + return "LATIN CAPITAL LETTER M"; + + case UINT32_C(0x004E): + return "LATIN CAPITAL LETTER N"; + + case UINT32_C(0x004F): + return "LATIN CAPITAL LETTER O"; + + case UINT32_C(0x0050): + return "LATIN CAPITAL LETTER P"; + + case UINT32_C(0x0051): + return "LATIN CAPITAL LETTER Q"; + + case UINT32_C(0x0052): + return "LATIN CAPITAL LETTER R"; + + case UINT32_C(0x0053): + return "LATIN CAPITAL LETTER S"; + + case UINT32_C(0x0054): + return "LATIN CAPITAL LETTER T"; + + case UINT32_C(0x0055): + return "LATIN CAPITAL LETTER "; + + case UINT32_C(0x0056): + return "LATIN CAPITAL LETTER V"; + + case UINT32_C(0x0057): + return "LATIN CAPITAL LETTER W"; + + case UINT32_C(0x0058): + return "LATIN CAPITAL LETTER X"; + + case UINT32_C(0x0059): + return "LATIN CAPITAL LETTER Y"; + + case UINT32_C(0x005A): + return "LATIN CAPITAL LETTER Z"; + + case UINT32_C(0x005B): + return "LEFT SQUARE BRACKET"; + + case UINT32_C(0x005C): + return "REVERSE SOLIDUS"; + + case UINT32_C(0x005D): + return "RIGHT SQUARE BRACKET"; + + case UINT32_C(0x005E): + return "CIRCUMFLEX ACCENT"; + + case UINT32_C(0x005F): + return "LOW LINE"; + + case UINT32_C(0x0060): + return "GRAVE ACCENT"; + + case UINT32_C(0x0061): + return "LATIN SMALL LETTER A"; + + case UINT32_C(0x0062): + return "LATIN SMALL LETTER B"; + + case UINT32_C(0x0063): + return "LATIN SMALL LETTER C"; + + case UINT32_C(0x0064): + return "LATIN SMALL LETTER D"; + + case UINT32_C(0x0065): + return "LATIN SMALL LETTER E"; + + case UINT32_C(0x0066): + return "LATIN SMALL LETTER F"; + + case UINT32_C(0x0067): + return "LATIN SMALL LETTER G"; + + case UINT32_C(0x0068): + return "LATIN SMALL LETTER H"; + + case UINT32_C(0x0069): + return "LATIN SMALL LETTER I"; + + case UINT32_C(0x006A): + return "LATIN SMALL LETTER J"; + + case UINT32_C(0x006B): + return "LATIN SMALL LETTER K"; + + case UINT32_C(0x006C): + return "LATIN SMALL LETTER L"; + + case UINT32_C(0x006D): + return "LATIN SMALL LETTER M"; + + case UINT32_C(0x006E): + return "LATIN SMALL LETTER N"; + + case UINT32_C(0x006F): + return "LATIN SMALL LETTER O"; + + case UINT32_C(0x0070): + return "LATIN SMALL LETTER P"; + + case UINT32_C(0x0071): + return "LATIN SMALL LETTER Q"; + + case UINT32_C(0x0072): + return "LATIN SMALL LETTER R"; + + case UINT32_C(0x0073): + return "LATIN SMALL LETTER S"; + + case UINT32_C(0x0074): + return "LATIN SMALL LETTER T"; + + case UINT32_C(0x0075): + return "LATIN SMALL LETTER "; + + case UINT32_C(0x0076): + return "LATIN SMALL LETTER V"; + + case UINT32_C(0x0077): + return "LATIN SMALL LETTER W"; + + case UINT32_C(0x0078): + return "LATIN SMALL LETTER X"; + + case UINT32_C(0x0079): + return "LATIN SMALL LETTER Y"; + + case UINT32_C(0x007A): + return "LATIN SMALL LETTER Z"; + + case UINT32_C(0x007B): + return "LEFT CURLY BRACKET"; + + case UINT32_C(0x007C): + return "VERTICAL LINE"; + + case UINT32_C(0x007D): + return "RIGHT CURLY BRACKET"; + + case UINT32_C(0x007E): + return "TILDE"; + + case UINT32_C(0x007F): + return "DELETE"; + + // LATIN-1 SUPPLEMENT: + case UINT32_C(0x0080): + return "PADDING CHARACTER"; + + case UINT32_C(0x0081): + return "HIGH OCTET PRESET"; + + case UINT32_C(0x0082): + return "BREAK PERMITTED HERE"; + + case UINT32_C(0x0083): + return "NO BREAK HERE"; + + case UINT32_C(0x0084): + return "INDEX"; + + case UINT32_C(0x0085): + return "NEXT LINE"; + + case UINT32_C(0x0086): + return "START OF SELECTED AREA"; + + case UINT32_C(0x0087): + return "END OF SELECTED AREA"; + + case UINT32_C(0x0088): + return "CHARACTER TABULATION SET"; + + case UINT32_C(0x0089): + return "CHARACTER TABULATION WITH JUSTIFICATION"; + + case UINT32_C(0x008A): + return "LINE TABULATION SET"; + + case UINT32_C(0x008B): + return "PARTIAL LINE FORWARD"; + + case UINT32_C(0x008C): + return "PARTIAL LINE BACKWARD"; + + case UINT32_C(0x008D): + return "REVERSE LINE FEED"; + + case UINT32_C(0x008E): + return "SINGLE SHIFT TWO"; + + case UINT32_C(0x008F): + return "SINGLE SHIFT THREE"; + + case UINT32_C(0x0090): + return "DEVICE CONTROL STRING"; + + case UINT32_C(0x0091): + return "PRIVATE USE ONE"; + + case UINT32_C(0x0092): + return "PRIVATE USE TWO"; + + case UINT32_C(0x0093): + return "SET TRANSMIT STATE"; + + case UINT32_C(0x0094): + return "CANCEL CHARACTER"; + + case UINT32_C(0x0095): + return "MESSAGE WAITING"; + + case UINT32_C(0x0096): + return "START OF GUARDED AREA"; + + case UINT32_C(0x0097): + return "END OF GUARDED AREA"; + + case UINT32_C(0x0098): + return "START OF STRING"; + + case UINT32_C(0x0099): + return "SINGLE GRAPHIC CHARACTER INTRODUCER"; + + case UINT32_C(0x009A): + return "SINGLE CHARACTER INTRODUCER"; + + case UINT32_C(0x009B): + return "CONTROL SEQUENCE INTRODUCER"; + + case UINT32_C(0x009C): + return "STRING TERMINATOR"; + + case UINT32_C(0x009D): + return "OPERATING SYSTEM COMMAND"; + + case UINT32_C(0x009E): + return "PRIVACY MESSAGE"; + + case UINT32_C(0x009F): + return "APPLICATION PROGRAM COMMAND"; + + case UINT32_C(0x00A0): + return "NO-BREAK SPACE"; + + case UINT32_C(0x00A1): + return "INVERTED EXCLAMATION MARK"; + + case UINT32_C(0x00A2): + return "CENT SIGN"; + + case UINT32_C(0x00A3): + return "POUND SIGN"; + + case UINT32_C(0x00A4): + return "CURRENCY SIGN"; + + case UINT32_C(0x00A5): + return "YEN SIGN"; + + case UINT32_C(0x00A6): + return "BROKEN BAR"; + + case UINT32_C(0x00A7): + return "SECTION SIGN"; + + case UINT32_C(0x00A8): + return "DIAERESIS"; + + case UINT32_C(0x00A9): + return "COPYRIGHT SIGN"; + + case UINT32_C(0x00AA): + return "FEMININE ORDINAL INDICATOR"; + + case UINT32_C(0x00AB): + return "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"; + + case UINT32_C(0x00AC): + return "NOT SIGN"; + + case UINT32_C(0x00AD): + return "SOFT HYPHEN"; + + case UINT32_C(0x00AE): + return "REGISTERED SIGN"; + + case UINT32_C(0x00AF): + return "MACRON"; + + case UINT32_C(0x00B0): + return "DEGREE SIGN"; + + case UINT32_C(0x00B1): + return "PLUS MINUS SYMBOL"; + + case UINT32_C(0x00B2): + return "SUPERSCRIPT TWO"; + + case UINT32_C(0x00B3): + return "SUPERSCRIPT THREE"; + + case UINT32_C(0x00B4): + return "ACUTE ACCENT"; + + case UINT32_C(0x00B5): + return "MICRO SIGN"; + + case UINT32_C(0x00B6): + return "PILCROW SIGN"; + + case UINT32_C(0x00B7): + return "MIDDLE DOT"; + + case UINT32_C(0x00B8): + return "CEDILLA"; + + case UINT32_C(0x00B9): + return "SUPERSCRIPT ONE"; + + case UINT32_C(0x00BA): + return "MASCULINE ORDINAL INDICATOR"; + + case UINT32_C(0x00BB): + return "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"; + + case UINT32_C(0x00BC): + return "VULGAR FRACTION ONE QUARTER"; + + case UINT32_C(0x00BD): + return "VULGAR FRACTION ONE HALF"; + + case UINT32_C(0x00BE): + return "VULGAR FRACTION THREE QUARTERS"; + + case UINT32_C(0x00BF): + return "INVERTED QUESTION MARK"; + + case UINT32_C(0x00C0): + return "LATIN CAPITAL LETTER A WITH GRAVE"; + + case UINT32_C(0x00C1): + return "LATIN CAPITAL LETTER A WITH ACUTE"; + + case UINT32_C(0x00C2): + return "LATIN CAPITAL LETTER A WITH CIRCUMFLEX"; + + case UINT32_C(0x00C3): + return "LATIN CAPITAL LETTER A WITH TILDE"; + + case UINT32_C(0x00C4): + return "LATIN CAPITAL LETTER A WITH DIAERESIS"; + + case UINT32_C(0x00C5): + return "LATIN CAPITAL LETTER A WITH RING ABOVE"; + + case UINT32_C(0x00C6): + return "LATIN CAPITAL LETTER AE"; + + case UINT32_C(0x00C7): + return "LATIN CAPITAL LETTER C WITH CEDILLA"; + + case UINT32_C(0x00C8): + return "LATIN CAPITAL LETTER E WITH GRAVE"; + + case UINT32_C(0x00C9): + return "LATIN CAPITAL LETTER E WITH ACUTE"; + + case UINT32_C(0x00CA): + return "LATIN CAPITAL LETTER E WITH CIRCUMFLEX"; + + case UINT32_C(0x00CB): + return "LATIN CAPITAL LETTER E WITH DIAERESIS"; + + case UINT32_C(0x00CC): + return "LATIN CAPITAL LETTER I WITH GRAVE"; + + case UINT32_C(0x00CD): + return "LATIN CAPITAL LETTER I WITH ACUTE"; + + case UINT32_C(0x00CE): + return "LATIN CAPITAL LETTER I WITH CIRCUMFLEX"; + + case UINT32_C(0x00CF): + return "LATIN CAPITAL LETTER I WITH DIAERESIS"; + + case UINT32_C(0x00D0): + return "LATIN CAPITAL LETTER ETH"; + + case UINT32_C(0x00D1): + return "LATIN CAPITAL LETTER N WITH TILDE"; + + case UINT32_C(0x00D2): + return "LATIN CAPITAL LETTER O WITH GRAVE"; + + case UINT32_C(0x00D3): + return "LATIN CAPITAL LETTER O WITH ACUTE"; + + case UINT32_C(0x00D4): + return "LATIN CAPITAL LETTER O WITH CIRCUMFLEX"; + + case UINT32_C(0x00D5): + return "LATIN CAPITAL LETTER O WITH TILDE"; + + case UINT32_C(0x00D6): + return "LATIN CAPITAL LETTER O WITH DIAERESIS"; + + case UINT32_C(0x00D7): + return "MULTIPLICATION SIGN"; + + case UINT32_C(0x00D8): + return "LATIN CAPITAL LETTER O WITH STROKE"; + + case UINT32_C(0x00D9): + return "LATIN CAPITAL LETTER U WITH GRAVE"; + + case UINT32_C(0x00DA): + return "LATIN CAPITAL LETTER U WITH ACUTE"; + + case UINT32_C(0x00DB): + return "LATIN CAPITAL LETTER U WITH CIRCUMFLEX"; + + case UINT32_C(0x00DC): + return "LATIN CAPITAL LETTER U WITH DIAERESIS"; + + case UINT32_C(0x00DD): + return "LATIN CAPITAL LETTER Y WITH ACUTE"; + + case UINT32_C(0x00DE): + return "LATIN CAPITAL LETTER THORN"; + + case UINT32_C(0x00DF): + return "LATIN SMALL LETTER SHARP S"; + + case UINT32_C(0x00E0): + return "LATIN SMALL LETTER A WITH GRAVE"; + + case UINT32_C(0x00E1): + return "LATIN SMALL LETTER A WITH ACUTE"; + + case UINT32_C(0x00E2): + return "LATIN SMALL LETTER A WITH CIRCUMFLEX"; + + case UINT32_C(0x00E3): + return "LATIN SMALL LETTER A WITH TILDE"; + + case UINT32_C(0x00E4): + return "LATIN SMALL LETTER A WITH DIAERESIS"; + + case UINT32_C(0x00E5): + return "LATIN SMALL LETTER A WITH RING ABOVE"; + + case UINT32_C(0x00E6): + return "LATIN SMALL LETTER AE"; + + case UINT32_C(0x00E7): + return "LATIN SMALL LETTER C WITH CEDILLA"; + + case UINT32_C(0x00E8): + return "LATIN SMALL LETTER E WITH GRAVE"; + + case UINT32_C(0x00E9): + return "LATIN SMALL LETTER E WITH ACUTE"; + + case UINT32_C(0x00EA): + return "LATIN SMALL LETTER E WITH CIRCUMFLEX"; + + case UINT32_C(0x00EB): + return "LATIN SMALL LETTER E WITH DIAERESIS"; + + case UINT32_C(0x00EC): + return "LATIN SMALL LETTER I WITH GRAVE"; + + case UINT32_C(0x00ED): + return "LATIN SMALL LETTER I WITH ACUTE"; + + case UINT32_C(0x00EE): + return "LATIN SMALL LETTER I WITH CIRCUMFLEX"; + + case UINT32_C(0x00EF): + return "LATIN SMALL LETTER I WITH DIAERESIS"; + + case UINT32_C(0x00F0): + return "LATIN SMALL LETTER ETH"; + + case UINT32_C(0x00F1): + return "LATIN SMALL LETTER N WITH TILDE"; + + case UINT32_C(0x00F2): + return "LATIN SMALL LETTER O WITH GRAVE"; + + case UINT32_C(0x00F3): + return "LATIN SMALL LETTER O WITH ACUTE"; + + case UINT32_C(0x00F4): + return "LATIN SMALL LETTER O WITH CIRCUMFLEX"; + + case UINT32_C(0x00F5): + return "LATIN SMALL LETTER O WITH TILDE"; + + case UINT32_C(0x00F6): + return "LATIN SMALL LETTER O WITH DIAERESIS"; + + case UINT32_C(0x00F7): + return "DIVISION SIGN"; + + case UINT32_C(0x00F8): + return "LATIN SMALL LETTER O WITH STROKE"; + + case UINT32_C(0x00F9): + return "LATIN SMALL LETTER U WITH GRAVE"; + + case UINT32_C(0x00FA): + return "LATIN SMALL LETTER U WITH ACUTE"; + + case UINT32_C(0x00FB): + return "LATIN SMALL LETTER U WITH CIRCUMFLEX"; + + case UINT32_C(0x00FC): + return "U WITH TWO DOTS"; + + case UINT32_C(0x00FD): + return "LATIN SMALL LETTER Y WITH ACUTE"; + + case UINT32_C(0x00FE): + return "LATIN SMALL LETTER THORN"; + + case UINT32_C(0x00FF): + return "LATIN SMALL LETTER Y WITH DIAERESIS"; + + // LATIN EXTENDED-A: + case UINT32_C(0x0100): + return "LATIN CAPITAL LETTER A WITH MACRON"; + + case UINT32_C(0x0101): + return "LATIN SMALL LETTER A WITH MACRON"; + + case UINT32_C(0x0102): + return "LATIN CAPITAL LETTER A WITH BREVE"; + + case UINT32_C(0x0103): + return "LATIN SMALL LETTER A WITH BREVE"; + + case UINT32_C(0x0104): + return "LATIN CAPITAL LETTER A WITH OGONEK"; + + case UINT32_C(0x0105): + return "LATIN SMALL LETTER A WITH OGONEK"; + + case UINT32_C(0x0106): + return "LATIN CAPITAL LETTER C WITH ACUTE"; + + case UINT32_C(0x0107): + return "LATIN SMALL LETTER C WITH ACUTE"; + + case UINT32_C(0x0108): + return "LATIN CAPITAL LETTER C WITH CIRCUMFLEX"; + + case UINT32_C(0x0109): + return "LATIN SMALL LETTER C WITH CIRCUMFLEX"; + + case UINT32_C(0x010A): + return "LATIN CAPITAL LETTER C WITH DOT ABOVE"; + + case UINT32_C(0x010B): + return "LATIN SMALL LETTER C WITH DOT ABOVE"; + + case UINT32_C(0x010C): + return "LATIN CAPITAL LETTER C WITH CARON"; + + case UINT32_C(0x010D): + return "LATIN SMALL LETTER C WITH CARON"; + + case UINT32_C(0x010E): + return "LATIN CAPITAL LETTER D WITH CARON"; + + case UINT32_C(0x010F): + return "LATIN SMALL LETTER D WITH CARON"; + + case UINT32_C(0x0110): + return "LATIN CAPITAL LETTER D WITH STROKE"; + + case UINT32_C(0x0111): + return "LATIN SMALL LETTER D WITH STROKE"; + + case UINT32_C(0x0112): + return "LATIN CAPITAL LETTER E WITH MACRON"; + + case UINT32_C(0x0113): + return "LATIN SMALL LETTER E WITH MACRON"; + + case UINT32_C(0x0114): + return "LATIN CAPITAL LETTER E WITH BREVE"; + + case UINT32_C(0x0115): + return "LATIN SMALL LETTER E WITH BREVE"; + + case UINT32_C(0x0116): + return "LATIN CAPITAL LETTER E WITH DOT ABOVE"; + + case UINT32_C(0x0117): + return "LATIN SMALL LETTER E WITH DOT ABOVE"; + + case UINT32_C(0x0118): + return "LATIN CAPITAL LETTER E WITH OGONEK"; + + case UINT32_C(0x0119): + return "LATIN SMALL LETTER E WITH OGONEK"; + + case UINT32_C(0x011A): + return "LATIN CAPITAL LETTER E WITH CARON"; + + case UINT32_C(0x011B): + return "LATIN SMALL LETTER E WITH CARON"; + + case UINT32_C(0x011C): + return "LATIN CAPITAL LETTER G WITH CIRCUMFLEX"; + + case UINT32_C(0x011D): + return "LATIN SMALL LETTER G WITH CIRCUMFLEX"; + + case UINT32_C(0x011E): + return "LATIN CAPITAL LETTER G WITH BREVE"; + + case UINT32_C(0x011F): + return "LATIN SMALL LETTER G WITH BREVE"; + + case UINT32_C(0x0120): + return "LATIN CAPITAL LETTER G WITH DOT ABOVE"; + + case UINT32_C(0x0121): + return "LATIN SMALL LETTER G WITH DOT ABOVE"; + + case UINT32_C(0x0122): + return "LATIN CAPITAL LETTER G WITH CEDILLA"; + + case UINT32_C(0x0123): + return "LATIN SMALL LETTER G WITH CEDILLA"; + + case UINT32_C(0x0124): + return "LATIN CAPITAL LETTER H WITH CIRCUMFLEX"; + + case UINT32_C(0x0125): + return "LATIN SMALL LETTER H WITH CIRCUMFLEX"; + + case UINT32_C(0x0126): + return "LATIN CAPITAL LETTER H WITH STROKE"; + + case UINT32_C(0x0127): + return "LATIN SMALL LETTER H WITH STROKE"; + + case UINT32_C(0x0128): + return "LATIN CAPITAL LETTER I WITH TILDE"; + + case UINT32_C(0x0129): + return "LATIN SMALL LETTER I WITH TILDE"; + + case UINT32_C(0x012A): + return "LATIN CAPITAL LETTER I WITH MACRON"; + + case UINT32_C(0x012B): + return "LATIN SMALL LETTER I WITH MACRON"; + + case UINT32_C(0x012C): + return "LATIN CAPITAL LETTER I WITH BREVE"; + + case UINT32_C(0x012D): + return "LATIN SMALL LETTER I WITH BREVE"; + + case UINT32_C(0x012E): + return "LATIN CAPITAL LETTER I WITH OGONEK"; + + case UINT32_C(0x012F): + return "LATIN SMALL LETTER I WITH OGONEK"; + + case UINT32_C(0x0130): + return "LATIN CAPITAL LETTER I WITH DOT ABOVE"; + + case UINT32_C(0x0131): + return "LATIN SMALL LETTER DOTLESS I"; + + case UINT32_C(0x0132): + return "LATIN CAPITAL LIGATURE IJ"; + + case UINT32_C(0x0133): + return "LATIN SMALL LIGATURE IJ"; + + case UINT32_C(0x0134): + return "LATIN CAPITAL LETTER J WITH CIRCUMFLEX"; + + case UINT32_C(0x0135): + return "LATIN SMALL LETTER J WITH CIRCUMFLEX"; + + case UINT32_C(0x0136): + return "LATIN CAPITAL LETTER K WITH CEDILLA"; + + case UINT32_C(0x0137): + return "LATIN SMALL LETTER K WITH CEDILLA"; + + case UINT32_C(0x0138): + return "LATIN SMALL LETTER KRA"; + + case UINT32_C(0x0139): + return "LATIN CAPITAL LETTER L WITH ACUTE"; + + case UINT32_C(0x013A): + return "LATIN SMALL LETTER L WITH ACUTE"; + + case UINT32_C(0x013B): + return "LATIN CAPITAL LETTER L WITH CEDILLA"; + + case UINT32_C(0x013C): + return "LATIN SMALL LETTER L WITH CEDILLA"; + + case UINT32_C(0x013D): + return "LATIN CAPITAL LETTER L WITH CARON"; + + case UINT32_C(0x013E): + return "LATIN SMALL LETTER L WITH CARON"; + + case UINT32_C(0x013F): + return "LATIN CAPITAL LETTER L WITH MDDLE DOT"; + + case UINT32_C(0x0140): + return "LATIN SMALL LETTER L WITH MIDDLE DOT"; + + case UINT32_C(0x0150): + return "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE"; + + case UINT32_C(0x0160): + return "LATIN CAPITAL LETTER S WITH CARON"; + + case UINT32_C(0x0170): + return "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE"; + + // LATIN EXTENDED-B: + case UINT32_C(0x0180): + return "LATIN SMALL LETTER B WITH STROKE"; + + case UINT32_C(0x0190): + return "LATIN CAPITAL LETTER OPEN E"; + + case UINT32_C(0x01A0): + return "LATIN CAPITAL LETTER O WITH HORN"; + + case UINT32_C(0x01B0): + return "LATIN SMALL LETTER U WITH HORN"; + + case UINT32_C(0x01C0): + return "LATIN LETTER DENTAL CLICK"; + + case UINT32_C(0x01D0): + return "LATIN SMALL LETTER I WITH CARON"; + + case UINT32_C(0x01E0): + return "LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON"; + + case UINT32_C(0x01F0): + return "LATIN SMALL LETTER J WITH CARON"; + + case UINT32_C(0x0200): + return "LATIN CAPITAL LETTER A WITH DOUBLE GRAVE"; + + case UINT32_C(0x0210): + return "LATIN CAPITAL LETTER R WITH DOUBLE GRAVE"; + + case UINT32_C(0x0220): + return "LATIN CAPITAL LETTER N WITH LONG RIGHT LEG"; + + case UINT32_C(0x0230): + return "LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON"; + + case UINT32_C(0x0240): + return "LATIN SMALL LETTER Z WITH SWASH TAIL"; + + // IPA EXTENSIONS: + case UINT32_C(0x0250): + return "LATIN SMALL LETTER TURNED A"; + + case UINT32_C(0x0251): + return "LATIN SMALL LETTER ALPHA"; + + case UINT32_C(0x0252): + return "LATIN SMALL LETTER TURNED ALPHA"; + + case UINT32_C(0x0253): + return "LATIN SMALL LETTER B WITH HOOK"; + + case UINT32_C(0x0254): + return "LATIN SMALL LETTER OPEN O"; + + case UINT32_C(0x0255): + return "LATIN SMALL LETTER C WITH CURL"; + + case UINT32_C(0x0256): + return "LATIN SMALL LETTER D WITH TAIL"; + + case UINT32_C(0x0257): + return "LATIN SMALL LETTER D WITH HOOK"; + + case UINT32_C(0x0258): + return "LATIN SMALL LETTER REVERSED E"; + + case UINT32_C(0x0259): + return "LATIN SMALL LETTER SCHWA"; + + case UINT32_C(0x025A): + return "LATIN SMALL LETTER SCHWA WITH HOOK"; + + case UINT32_C(0x025B): + return "LATIN SMALL LETTER OPEN E"; + + case UINT32_C(0x025C): + return "LATIN SMALL LETTER REVERSED OPEN E"; + + case UINT32_C(0x025D): + return "LATIN SMALL LETTER REVERSED OPEN E WITH HOOK"; + + case UINT32_C(0x025E): + return "LATIN SMALL LETTER CLOSED REVERSED OPEN E"; + + case UINT32_C(0x025F): + return "LATIN SMALL LETTER DOTLESS J WITH STROKE"; + + case UINT32_C(0x0260): + return "LATIN SMALL LETTER G WITH HOOK"; + + case UINT32_C(0x0261): + return "LATIN SMALL LETTER SCRIPT G"; + + case UINT32_C(0x0262): + return "LATIN LETTER SMALL CAPITAL G"; + + case UINT32_C(0x0263): + return "LATIN SMALL LETTER GAMMA"; + + case UINT32_C(0x0264): + return "LATIN SMALL LETTER RAMS HORN"; + + case UINT32_C(0x0265): + return "LATIN SMALL LETTER TURNED H"; + + case UINT32_C(0x0266): + return "LATIN SMALL LETTER H WITH HOOK"; + + case UINT32_C(0x0267): + return "LATIN SMALL LETTER HENG WITH HOOK"; + + case UINT32_C(0x0268): + return "LATIN SMALL LETTER I WITH STROKE"; + + case UINT32_C(0x0269): + return "LATIN SMALL LETTER IOTA"; + + case UINT32_C(0x026A): + return "LATIN LETTER SMALL CAPITAL I"; + + case UINT32_C(0x026B): + return "LATIN SMALL LETTER L WITH MIDDLE TILDE"; + + case UINT32_C(0x026C): + return "LATIN SMALL LETTER L WITH BELT"; + + case UINT32_C(0x026D): + return "LATIN SMALL LETTER L WITH RETROFLEX HOOK"; + + case UINT32_C(0x026E): + return "LATIN SMALL LETTER LEZH"; + + case UINT32_C(0x026F): + return "LATIN SMALL LETTER TURNED M"; + + case UINT32_C(0x0270): + return "LATIN SMALL LETTER TURNED M WITH LONG LEG"; + + case UINT32_C(0x0271): + return "LATIN SMALL LETTER M WITH HOOK"; + + case UINT32_C(0x0272): + return "LATIN SMALL LETTER N WITH LEFT HOOK"; + + case UINT32_C(0x0273): + return "LATIN SMALL LETTER N WITH RETROFLEX HOOK"; + + case UINT32_C(0x0274): + return "LATIN LETTER SMALL CAPITAL N"; + + case UINT32_C(0x0275): + return "LATIN SMALL LETTER BARRED O"; + + case UINT32_C(0x0276): + return "LATIN LETTER SMALL CAPITAL OE"; + + case UINT32_C(0x0277): + return "LATIN SMALL LETTER CLOSED OMEGA"; + + case UINT32_C(0x0278): + return "LATIN SMALL LETTER PHI"; + + case UINT32_C(0x0279): + return "LATIN SMALL LETTER TURNED R"; + + case UINT32_C(0x027A): + return "LATIN SMALL LETTER TURNED R WITH LONG LEG"; + + case UINT32_C(0x027B): + return "LATIN SMALL LETTER TURNED R WITH HOOK"; + + case UINT32_C(0x027C): + return "LATIN SMALL LETTER R WITH LONG LEG"; + + case UINT32_C(0x027D): + return "LATIN SMALL LETTER R WITH TAIL"; + + case UINT32_C(0x027E): + return "LATIN SMALL LETTER R WITH FISHHOOK"; + + case UINT32_C(0x027F): + return "LATIN SMALL LETTER REVERSED R WITH FISHHOOK"; + + case UINT32_C(0x0280): + return "LATIN LETTER SMALL CAPITAL R"; + + case UINT32_C(0x0281): + return "LATIN LETTER SMALL CAPITAL INVERTED R"; + + case UINT32_C(0x0282): + return "LATIN SMALL LETTER S WITH HOOK"; + + case UINT32_C(0x0283): + return "LATIN SMALL LETTER ESH"; + + case UINT32_C(0x0284): + return "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK"; + + case UINT32_C(0x0285): + return "LATIN SMALL LETTER SQUAT REVERSED ESH"; + + case UINT32_C(0x0286): + return "LATIN SMALL LETTER SH WITH CURL"; + + case UINT32_C(0x0287): + return "LATIN SMALL LETTER TURNED T"; + + case UINT32_C(0x0288): + return "LATIN SMALL LETTER T WITH RETROFLEX HOOK"; + + case UINT32_C(0x0289): + return "LATIN SMALL LETTER U BAR"; + + case UINT32_C(0x028A): + return "LATIN SMALL LETTER UPSILON"; + + case UINT32_C(0x028B): + return "LATIN SMALL LETTER V WTIH HOOK"; + + case UINT32_C(0x028C): + return "LATIN SMALL LETTER TURNED V"; + + case UINT32_C(0x028D): + return "LATIN SMALL LETTER TURNED W"; + + case UINT32_C(0x028E): + return "LATIN SMALL LETTER TURNED Y"; + + case UINT32_C(0x028F): + return "LATIN LETTER SMALL CAPITAL Y"; + + case UINT32_C(0x0290): + return "LATIN SMALL LETTER Z WITH RETROFLEX HOOK"; + + case UINT32_C(0x0291): + return "LATIN SMALL LETTER Z WITH RETROFLEX"; + + case UINT32_C(0x0292): + return "LATIN SMALL LETTER EZH"; + + case UINT32_C(0x0293): + return "LATIN SMALL LETTER EZH WITH CURL"; + + case UINT32_C(0x0294): + return "LATIN LETTER GLOTTAL STOP"; + + case UINT32_C(0x0295): + return "LATIN LETTER PHARYNGEAL VOICED FRICATIVE"; + + case UINT32_C(0x0296): + return "LATIN LETTER INVERTED GLOTTAL STOP"; + + case UINT32_C(0x0297): + return "LATIN LETTER STRETCHED C"; + + case UINT32_C(0x0298): + return "LATIN LETTER BILABIAL CLICK"; + + case UINT32_C(0x0299): + return "LATIN LETTER SMALL CAPITAL B"; + + case UINT32_C(0x029A): + return "LATIN SMALL LETTER CLOSED OPEN E"; + + case UINT32_C(0x029B): + return "LATIN LETTER SMALL CAPITAL G WITH HOOK"; + + case UINT32_C(0x029C): + return "LATIN LETTER SMALL CAPITAL H"; + + case UINT32_C(0x029D): + return "LATIN SMALL LETTER J WITH CROSSED-TAIL"; + + case UINT32_C(0x029E): + return "LATIN SMALL LETTER TURNED K"; + + case UINT32_C(0x029F): + return "LATIN LETTER SMALL CAPITAL L"; + + case UINT32_C(0x02A0): + return "LATIN SMALL LETTER Q WITH HOOK"; + + case UINT32_C(0x02A1): + return "LATIN LETTER GLOTTAL STOP WITH STROKE"; + + case UINT32_C(0x02A2): + return "LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE"; + + case UINT32_C(0x02A3): + return "LATIN SMALL LETTER DZ DIGRAPH"; + + case UINT32_C(0x02A4): + return "LATIN SMALL LETTER DEZH DIGRAPH"; + + case UINT32_C(0x02A5): + return "LATIN SMALL LETTER DZ DIGRAPH WITH CURL"; + + case UINT32_C(0x02A6): + return "LATIN SMALL LETTER TS DIGRAPH"; + + case UINT32_C(0x02A7): + return "LATIN SMALL LETTER TESH DIGRAPH"; + + case UINT32_C(0x02A8): + return "LATIN SMALL LETTER TC DIGRAPH WITH CURL"; + + case UINT32_C(0x02A9): + return "LATIN SMALL LETTER FENG DIGRAPH"; + + case UINT32_C(0x02AA): + return "LATIN SMALL LETTER LS DIGRAPH"; + + case UINT32_C(0x02AB): + return "LATIN SMALL LETTER LZ DIGRAPH"; + + case UINT32_C(0x02AC): + return "LATIN LETTER BILABIAL PERCUSSIVE"; + + case UINT32_C(0x02AD): + return "LATIN LETTER BIDENTAL PERCUSSIVE"; + + case UINT32_C(0x02AE): + return "LATIN SMALL LETTER TURNED H WITH FISHHOOK"; + + case UINT32_C(0x02AF): + return "LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL"; + + // SPACING MODIFIER LETTERS: + case UINT32_C(0x02B0): + return "MODIFIER LETTER SMALL H"; + + case UINT32_C(0x02B1): + return "MODIFIER LETTER SMALL H WITH HOOK"; + + case UINT32_C(0x02B2): + return "MODIFIER LETTER SMALL J"; + + case UINT32_C(0x02B3): + return "MODIFIER LETTER SMALL R"; + + case UINT32_C(0x02B4): + return "MODIFIER LETTER SMALL TURNED R"; + + case UINT32_C(0x02B5): + return "MODIFIER LETTER SMALL TURNED R WITH HOOK"; + + case UINT32_C(0x02B6): + return "MODIFIER LETTER SMALL CAPITAL INVERTED R"; + + case UINT32_C(0x02B7): + return "MODIFIER LETTER SMALL W"; + + case UINT32_C(0x02B8): + return "MODIFIER LETTER SMALL Y"; + + case UINT32_C(0x02B9): + return "MODIFIER LETTER PRIME"; + + case UINT32_C(0x02BA): + return "MODIFIER LETTER DOUBLE PRIME"; + + case UINT32_C(0x02BB): + return "MODIFIER LETTER TURNED COMMA"; + + case UINT32_C(0x02BC): + return "MODIFIER LETTER APOSTROPHE"; + + case UINT32_C(0x02BD): + return "MODIFIER LETTER REVERSED COMMA"; + + case UINT32_C(0x02BE): + return "MODIFIER LETTER RIGHT HALF RING"; + + case UINT32_C(0x02BF): + return "MODIFIER LETTER LEFT HALF RING"; + + case UINT32_C(0x02C0): + return "MODIFIER LETTER GLOTTAL STOP"; + + case UINT32_C(0x02C1): + return "MODIFIER LETTER REVERSED GLOTTAL STOP"; + + case UINT32_C(0x02C2): + return "MODIFIER LETTER LEFT ARROWHEAD"; + + case UINT32_C(0x02C3): + return "MODIFIER LETTER RIGHT ARROWHEAD"; + + case UINT32_C(0x02C4): + return "MODIFIER LETTER UP ARROWHEAD"; + + case UINT32_C(0x02C5): + return "MODIFIER LETTER DOWN ARROWHEAD"; + + case UINT32_C(0x02C6): + return "MODIFIER LETTER CIRCUMFLEX"; + + case UINT32_C(0x02C7): + return "CARON"; + + case UINT32_C(0x02C8): + return "MODIFIER LETTER VERTICAL LINE"; + + case UINT32_C(0x02C9): + return "MODIFIER LETTER MACRON"; + + case UINT32_C(0x02CA): + return "MODIFIER LETTER ACUTE ACCENT"; + + case UINT32_C(0x02CB): + return "MODIFIER LETTER GRAVE ACCENT"; + + case UINT32_C(0x02CC): + return "MODIFIER LETTER LOW VERTICAL LINE"; + + case UINT32_C(0x02CD): + return "MODIFIER LETTER LOW MACRON"; + + case UINT32_C(0x02CE): + return "MODIFIER LETTER LOW GRAVE ACCENT"; + + case UINT32_C(0x02CF): + return "MODIFIER LETTER LOW ACUTE ACCENT"; + + case UINT32_C(0x02D0): + return "MODIFIER LETTER TRIANGULAR COLON"; + + case UINT32_C(0x02D1): + return "MODIFIER LETTER HALF TRIANGULAR COLON"; + + case UINT32_C(0x02D2): + return "MODIFIER LETTER CENTRED RIGHT HALF RING"; + + case UINT32_C(0x02D3): + return "MODIFIER LETTER CENTRED LEFT HALF RING"; + + case UINT32_C(0x02D4): + return "MODIFIER LETTER UP TACK"; + + case UINT32_C(0x02D5): + return "MODIFIER LETTER DOWN TACK"; + + case UINT32_C(0x02D6): + return "MODIFIER LETTER PLUS SIGN"; + + case UINT32_C(0x02D7): + return "MODIFIER LETTER MINUS SIGN"; + + case UINT32_C(0x02D8): + return "BREVE"; + + case UINT32_C(0x02D9): + return "DOT ABOVE"; + + case UINT32_C(0x02DA): + return "RING ABOVE"; + + case UINT32_C(0x02DB): + return "OGONEK"; + + case UINT32_C(0x02DC): + return "SMALL TILDE"; + + case UINT32_C(0x02DD): + return "DOUBLE ACUTE ACCENT"; + + case UINT32_C(0x02DE): + return "MODIFIER LETTER RHOTIC HOOK"; + + case UINT32_C(0x02DF): + return "MODIFIER LETTER CROSS ACCENT"; + + case UINT32_C(0x02E0): + return "MODIFIER LETTER SMALL GAMMA"; + + case UINT32_C(0x02E1): + return "MODIFIER LETTER SMALL L"; + + case UINT32_C(0x02E2): + return "MODIFIER LETTER SMALL S"; + + case UINT32_C(0x02E3): + return "MODIFIER LETTER SMALL X"; + + case UINT32_C(0x02E4): + return "MODIFIER LETTER SMALL REVERSED GLOTTAL STOP"; + + case UINT32_C(0x02E5): + return "MODIFIER LETTER EXTRA-HIGH TONE BAR"; + + case UINT32_C(0x02E6): + return "MODIFIER LETTER HIGH TONE BAR"; + + case UINT32_C(0x02E7): + return "MODIFIER LETTER MID TONE BAR"; + + case UINT32_C(0x02E8): + return "MODIFIER LETTER LOW TONE BAR"; + + case UINT32_C(0x02E9): + return "MODIFIER LETTER EXTRA-LOW TONE BAR"; + + case UINT32_C(0x02EA): + return "MODIFIER LETTER YIN DEPARTING TONE MARK"; + + case UINT32_C(0x02EB): + return "MODIFIER LETTER YANG DEPARTING TONE MARK"; + + case UINT32_C(0x02EC): + return "MODIFIER LETTER VOICING"; + + case UINT32_C(0x02ED): + return "MODIFIER LETTER UNASPIRATED"; + + case UINT32_C(0x02EE): + return "MODIFIER LETTER DOUBLE APOSTROPHE"; + + case UINT32_C(0x02EF): + return "MODIFIER LETTER LOW DOWN ARROWHEAD"; + + case UINT32_C(0x02F0): + return "MODIFIER LETTER LOW UP ARROWHEAD"; + + case UINT32_C(0x02F1): + return "MODIFIER LETTER LOW LEFT ARROWHEAD"; + + case UINT32_C(0x02F2): + return "MODIFIER LETTER LOW RIGHT ARROWHEAD"; + + case UINT32_C(0x02F3): + return "MODIFIER LETTER LOW RING"; + + case UINT32_C(0x02F4): + return "MODIFIER LETTER MIDDLE GRAVE ACCENT"; + + case UINT32_C(0x02F5): + return "MODIFIER LETTER MIDDLE DOUBLE GRAVE ACCENT"; + + case UINT32_C(0x02F6): + return "MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT"; + + case UINT32_C(0x02F7): + return "MODIFIER LETTER LOW TILDE"; + + case UINT32_C(0x02F8): + return "MODIFIER LETTER RAISED COLON"; + + case UINT32_C(0x02F9): + return "MODIFIER LETTER BEGIN HIGH TONE"; + + case UINT32_C(0x02FA): + return "MODIFIER LETTER END HIGH TONE"; + + case UINT32_C(0x02FB): + return "MODIFIER LETTER BEGIN LOW TONE"; + + case UINT32_C(0x02FC): + return "MODIFIER LETTER END LOW TONE"; + + case UINT32_C(0x02FD): + return "MODIFIER LETTER SHELF"; + + case UINT32_C(0x02FE): + return "MODIFIER LETTER OPEN SHELF"; + + case UINT32_C(0x02FF): + return "MODIFIER LETTER LOW LEFT ARROWHEAD"; + + // COMBINING DIACRITICAL MARKS: + case UINT32_C(0x0300): + return "COMBINING GRAVE ACCENT"; + + case UINT32_C(0x0301): + return "COMBINING ACUTE ACCENT"; + + case UINT32_C(0x0302): + return "COMBINING CIRCUMFLEX ACCENT"; + + case UINT32_C(0x0303): + return "COMBINING TILDE"; + + case UINT32_C(0x0304): + return "COMBINING MACRON"; + + case UINT32_C(0x0305): + return "COMBINING OVERLINE"; + + case UINT32_C(0x0306): + return "COMBINING BREVE"; + + case UINT32_C(0x0307): + return "COMBINING DOT ABOVE"; + + case UINT32_C(0x0308): + return "COMBINING DIAERESIS"; + + case UINT32_C(0x0309): + return "COMBINING HOOK ABOVE"; + + case UINT32_C(0x030A): + return "COMBINING RING ABOVE"; + + case UINT32_C(0x030B): + return "COMBINING DOUBLE ACUTE ACCENT"; + + case UINT32_C(0x030C): + return "COMBINING CARON"; + + case UINT32_C(0x030D): + return "COMBINING VERTICAL LINE ABOVE"; + + case UINT32_C(0x030E): + return "COMBINING DOUBLE VERTICAL LINE ABOVE"; + + case UINT32_C(0x030F): + return "COMBINING DOUBLE GRAVE ACCENT"; + + case UINT32_C(0x0310): + return "COMBINING CANDRABIND"; + + case UINT32_C(0x0311): + return "COMBINING INVERTED BREVE"; + + case UINT32_C(0x0312): + return "COMBINING TURNED COMMA ABOVE"; + + case UINT32_C(0x0313): + return "COMBINING COMMA ABOVE"; + + case UINT32_C(0x0314): + return "COMBINING REVERSED COMMA ABOVE"; + + case UINT32_C(0x0315): + return "COMBINING COMMA ABOVE RIGHT"; + + case UINT32_C(0x0316): + return "COMBINING GRAVE ACCENT BELOW"; + + case UINT32_C(0x0317): + return "COMBINING ACUTE ACCENT BELOW"; + + case UINT32_C(0x0318): + return "COMBINING LEFT TACK BELOW"; + + case UINT32_C(0x0319): + return "COMBINING RIGHT TACK BELOW"; + + case UINT32_C(0x031A): + return "COMBINING LEFT ANGLE ABOVE"; + + case UINT32_C(0x031B): + return "COMBINING HORN"; + + case UINT32_C(0x031C): + return "COMBINING LEFT HALF RING BELOW"; + + case UINT32_C(0x031D): + return "COMBINING UP TACK BELOW"; + + case UINT32_C(0x031E): + return "COMBINING DOWN TACK BELOW"; + + case UINT32_C(0x031F): + return "COMBINING PLUS SIGN BELOW"; + + case UINT32_C(0x0320): + return "COMBINING MINUS SIGN BELOW"; + + case UINT32_C(0x0321): + return "COMBINING PALATALIZED HOOK BELOW"; + + case UINT32_C(0x0322): + return "COMBINING RETROFLEX HOOK BELOW"; + + case UINT32_C(0x0323): + return "COMBINING DOT BELOW"; + + case UINT32_C(0x0324): + return "COMBINING DIAERESIS BELOW"; + + case UINT32_C(0x0325): + return "COMBINING RING BELOW"; + + case UINT32_C(0x0326): + return "COMBINING COMMA BELOW"; + + case UINT32_C(0x0327): + return "COMBINING CEDILLA"; + + case UINT32_C(0x0328): + return "COMBINING OGONEK"; + + case UINT32_C(0x0329): + return "COMBINING VERTICAL LINE BELOW"; + + case UINT32_C(0x032A): + return "COMBINING BRDIGE BELOW"; + + case UINT32_C(0x032B): + return "COMBINING INVERTED DOUBLE ARCH BELOW"; + + case UINT32_C(0x032C): + return "COMBINING CARON BELOW"; + + case UINT32_C(0x032D): + return "COMBINING CIRCUMFLEX ACCENT BELOW"; + + case UINT32_C(0x032E): + return "COMBINING BREVE BELOW"; + + case UINT32_C(0x032F): + return "COMBINING INVERTED BREVE BELOW"; + + case UINT32_C(0x0330): + return "COMBINING TILDE BELOW"; + + case UINT32_C(0x0331): + return "COMBINING MACRON BELOW"; + + case UINT32_C(0x0332): + return "COMBINING LOW LINE"; + + case UINT32_C(0x0333): + return "COMBINING DOUBLE LOW LINE"; + + case UINT32_C(0x0334): + return "COMBINING TILDE OVERLAY"; + + case UINT32_C(0x0335): + return "COMBINING SHORT STROKE OVERLAY"; + + case UINT32_C(0x0336): + return "COMBINING LONG STROKE OVERLAY"; + + case UINT32_C(0x0337): + return "COMBINING SHORT SOLIDUS OVERLAY"; + + case UINT32_C(0x0338): + return "COMBINING LONG SOLIDUS OVERLAY"; + + case UINT32_C(0x0339): + return "COMBINING RIGHT HALF RING BELOW"; + + case UINT32_C(0x033A): + return "COMBINING INVERTED BRIDGE BELOW"; + + case UINT32_C(0x033B): + return "COMBINING SQUARE BELOW"; + + case UINT32_C(0x033C): + return "COMBINING SEAGULL BELOW"; + + case UINT32_C(0x033D): + return "COMBINING X ABOVE"; + + case UINT32_C(0x033E): + return "COMBINING VERTICAL TILDE"; + + case UINT32_C(0x033F): + return "COMBINING DOUBLE OVERLINE"; + + case UINT32_C(0x0340): + return "COMBINING GRAVE TONE MARK"; + + case UINT32_C(0x0341): + return "COMBINING ACUTE TONE MARK"; + + case UINT32_C(0x0342): + return "COMBINING GREEK PERISPOMENI"; + + case UINT32_C(0x0343): + return "COMBINING GREEK KORONIS"; + + case UINT32_C(0x0344): + return "COMBINING GREEK DIALYTIKA TONOS"; + + case UINT32_C(0x0345): + return "COMBINING GREEK YPOGEGRAMMENI"; + + case UINT32_C(0x0346): + return "COMBINING BRIDGE ABOVE"; + + case UINT32_C(0x0347): + return "COMBINING EQUALS SIGN BELOW"; + + case UINT32_C(0x0348): + return "COMBINING DOUBLE VERTICAL LINE BELOW"; + + case UINT32_C(0x0349): + return "COMBINING LEFT ANGLE BELOW"; + + case UINT32_C(0x034A): + return "COMBINING NOT TILDE ABOVE"; + + case UINT32_C(0x034B): + return "COMBINING HOMOTHETIC ABOVE"; + + case UINT32_C(0x034C): + return "COMBINING ALMOST EQUAL TO ABOVE"; + + case UINT32_C(0x034D): + return "COMBINING LEFT RIGHT ARROW BELOW"; + + case UINT32_C(0x034E): + return "COMBINING UPWARDS ARROW BELOW"; + + case UINT32_C(0x034F): + return "COMBINING GRAPHEME JOINER"; + + case UINT32_C(0x0350): + return "COMBINING RIGHT ARROWHEAD ABOVE"; + + case UINT32_C(0x0351): + return "COMBINING LEFT HALF RING ABOVE"; + + case UINT32_C(0x0352): + return "COMBINING FERMATA"; + + case UINT32_C(0x0353): + return "COMBINING X BELOW"; + + case UINT32_C(0x0354): + return "COMBINING LEFT ARROWHEAD BELOW"; + + case UINT32_C(0x0355): + return "COMBINING RIGHT ARROWHEAD BELOW"; + + case UINT32_C(0x0356): + return "COMBINING RIGHT ARROWHEAD AND UP ARROWHEAD BELOW"; + + case UINT32_C(0x0357): + return "COMBINING RIGHT HALF RING ABOVE"; + + case UINT32_C(0x0358): + return "COMBINING DOT ABOVE RIGHT"; + + case UINT32_C(0x0359): + return "COMBINING ASTERISK BELOW"; + + case UINT32_C(0x035A): + return "COMBINING DOUBLE RING BELOW"; + + case UINT32_C(0x035B): + return "COMBINING ZIGZAG ABOVE"; + + case UINT32_C(0x035C): + return "COMBINING DOUBLE BREVE BELOW"; + + case UINT32_C(0x035D): + return "COMBINING DOUBLE BREVE"; + + case UINT32_C(0x035E): + return "COMBINING DOUBLE MACRON"; + + case UINT32_C(0x035F): + return "COMBINING DOUBLE MACRON BELOW"; + + case UINT32_C(0x0360): + return "COMBINING DOUBLE TILDE"; + + case UINT32_C(0x0361): + return "COMBINING DOUBLE INVERTED BREVE"; + + case UINT32_C(0x0362): + return "COMBINING DOUBLE RIGHTWARDS ARROW BELOW"; + + case UINT32_C(0x0363): + return "COMBINING LATIN SMALL LETTER A"; + + case UINT32_C(0x0364): + return "COMBINING LATIN SMALL LETTER E"; + + case UINT32_C(0x0365): + return "COMBINING LATIN SMALL LETTER I"; + + case UINT32_C(0x0366): + return "COMBINING LATIN SMALL LETTER O"; + + case UINT32_C(0x0367): + return "COMBINING LATIN SMALL LETTER "; + + case UINT32_C(0x0368): + return "COMBINING LATIN SMALL LETTER C"; + + case UINT32_C(0x0369): + return "COMBINING LATIN SMALL LETTER D"; + + case UINT32_C(0x036A): + return "COMBINING LATIN SMALL LETTER H"; + + case UINT32_C(0x036B): + return "COMBINING LATIN SMALL LETTER M"; + + case UINT32_C(0x036C): + return "COMBINING LATIN SMALL LETTER R"; + + case UINT32_C(0x036D): + return "COMBINING LATIN SMALL LETTER T"; + + case UINT32_C(0x036E): + return "COMBINING LATIN SMALL LETTER V"; + + case UINT32_C(0x036F): + return "COMBINING LATIN SMALL LETTER X"; + + // GREEK AND COPTIC: + case UINT32_C(0x0370): + return "GREEK CAPITAL LETTER HETA"; + + case UINT32_C(0x0371): + return "GREEK SMALL LETTER HETA"; + + case UINT32_C(0x0372): + return "GREEK CAPITAL LETTER ARCHAIC SAMPI"; + + case UINT32_C(0x0373): + return "GREEK SMALL LETTER ARCHAIC SAMPI"; + + case UINT32_C(0x0374): + return "GREEK NUMERAL SIGN"; + + case UINT32_C(0x0375): + return "GREEK LOWER NUMERAL SIGN"; + + case UINT32_C(0x0376): + return "GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA"; + + case UINT32_C(0x0377): + return "GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"; + + case UINT32_C(0x037A): + return "GREEK YPOGEGRAMMENI"; + + case UINT32_C(0x037B): + return "GREEK SMALL REVERSED LUNATE SIGMA SYMBOL"; + + case UINT32_C(0x037C): + return "GREEK SMALL DOTTED LUNATE SIGMA SYMBOL"; + + case UINT32_C(0x037D): + return "GREEK SMALL REVERSED DOTTED LUNATE SIGMAL SYMBOL"; + + case UINT32_C(0x037E): + return "GREEK QUESTION MARK"; + + case UINT32_C(0x037F): + return "GREEK CAPITAL LETTER YOT"; + + case UINT32_C(0x0384): + return "GREEK TONOS"; + + case UINT32_C(0x0385): + return "GREEK DIALYTIKA TONOS"; + + case UINT32_C(0x0386): + return "GREEK CAPITAL LETTER ALPHA WITH TONOS"; + + case UINT32_C(0x0387): + return "GREEK ANO TELEIA"; + + case UINT32_C(0x0388): + return "GREEK CAPITAL LETTER EPSILON WITH TONOS"; + + case UINT32_C(0x0389): + return "GREEK CAPITAL LETTER ETA WITH TONOS"; + + case UINT32_C(0x038A): + return "GREEK CAPITAL LETTER IOTA WITH TONOS"; + + case UINT32_C(0x038C): + return "GREEK CAPITAL LETTER OMICRON WITH TONOS"; + + case UINT32_C(0x038E): + return "GREEK CAPITAL LETTER USPILON WITH TONOS"; + + case UINT32_C(0x038F): + return "GREEK CAPITAL LETTER OMEGA WITH TONOS"; + + case UINT32_C(0x0390): + return "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"; + + case UINT32_C(0x0391): + return "GREEK CAPITAL LETTER ALPHA"; + + case UINT32_C(0x0392): + return "GREEK CAPITAL LETTER BETA"; + + case UINT32_C(0x0393): + return "GREEK CAPITAL LETTER GAMMA"; + + case UINT32_C(0x0394): + return "GREEK CAPITAL LETTER DELTA"; + + case UINT32_C(0x0395): + return "GREEK CAPITAL LETTER EPSILON"; + + case UINT32_C(0x0396): + return "GREEK CAPITAL LETTER ZETA"; + + case UINT32_C(0x0397): + return "GREEK CAPITAL LETTER ETA"; + + case UINT32_C(0x0398): + return "GREEK CAPITAL LETTER THETA"; + + case UINT32_C(0x0399): + return "GREEK CAPITAL LETTER IOTA"; + + case UINT32_C(0x039A): + return "GREEK CAPITAL LETTER KAPPA"; + + case UINT32_C(0x039B): + return "GREEK CAPITAL LETTER LAMBDA"; + + case UINT32_C(0x039C): + return "GREEK CAPITAL LETTER M"; + + case UINT32_C(0x039D): + return "GREEK CAPITAL LETTER N"; + + case UINT32_C(0x039E): + return "GREEK CAPITAL LETTER XI"; + + case UINT32_C(0x039F): + return "GREEK CAPITAL LETTER OMICRON"; + + case UINT32_C(0x03A0): + return "GREEK CAPITAL LETTER PI"; + + case UINT32_C(0x03A1): + return "GREEK CAPITAL LETTER RHO"; + + case UINT32_C(0x03A3): + return "GREEK CAPITAL LETTER SIGMA"; + + case UINT32_C(0x03A4): + return "GREEK CAPITAL LETTER TA"; + + case UINT32_C(0x03A5): + return "GREEK CAPITAL LETTER UPSILON"; + + case UINT32_C(0x03A6): + return "GREEK CAPITAL LETTER PHI"; + + case UINT32_C(0x03A7): + return "GREEK CAPITAL LETTER CHI"; + + case UINT32_C(0x03A8): + return "GREEK CAPITAL LETTER PSI"; + + case UINT32_C(0x03A9): + return "GREEK CAPITAL LETTER OMEGA"; + + case UINT32_C(0x03AA): + return "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"; + + case UINT32_C(0x03AB): + return "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"; + + case UINT32_C(0x03AC): + return "GREEK SMALL LETTER ALPHA WITH TONOS"; + + case UINT32_C(0x03AD): + return "GREEK SMALL LETTER EPSILON WITH TONOS"; + + case UINT32_C(0x03AE): + return "GREEK SMALL LETTER ETA WITH TONOS"; + + case UINT32_C(0x03AF): + return "GREEK SMALL LETTER IOTA WITH TONOS"; + + case UINT32_C(0x03B0): + return "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"; + + case UINT32_C(0x03B1): + return "GREEK SMALL LETTER ALPHA"; + + case UINT32_C(0x03B2): + return "GREEK SMALL LETTER BETA"; + + case UINT32_C(0x03B3): + return "GREEK SMALL LETTER GAMMA"; + + case UINT32_C(0x03B4): + return "GREEK SMALL LETTER DELTA"; + + case UINT32_C(0x03B5): + return "GREEK SMALL LETTER EPSILON"; + + case UINT32_C(0x03B6): + return "GREEK SMALL LETTER ZETA"; + + case UINT32_C(0x03B7): + return "GREEK SMALL LETTER ETA"; + + case UINT32_C(0x03B8): + return "GREEK SMALL LETTER THETA"; + + case UINT32_C(0x03B9): + return "GREEK SMALL LETTER IOTA"; + + case UINT32_C(0x03BA): + return "GREEK SMALL LETTER KAPPA"; + + case UINT32_C(0x03BB): + return "GREEK SMALL LETTER LAMBDA"; + + case UINT32_C(0x03BC): + return "GREEK SMALL LETTER M"; + + case UINT32_C(0x03BD): + return "GREEK SMALL LETTER N"; + + case UINT32_C(0x03BE): + return "GREEK SMALL LETTER XI"; + + case UINT32_C(0x03BF): + return "GREEK SMALL LETTER OMICRON"; + + case UINT32_C(0x03C0): + return "GREEK SMALL LETTER PI"; + + case UINT32_C(0x03C1): + return "GREEK SMALL LETTER RHO"; + + case UINT32_C(0x03C2): + return "GREEK SMALL LETTER FINAL SIGMA"; + + case UINT32_C(0x03C3): + return "GREEK SMALL LETTER SIGMA"; + + case UINT32_C(0x03C4): + return "GREEK SMALL LETTER TA"; + + case UINT32_C(0x03C5): + return "GREEK SMALL LETTER UPSILON"; + + case UINT32_C(0x03C6): + return "GREEK SMALL LETTER PHI"; + + case UINT32_C(0x03C7): + return "GREEK SMALL LETTER CHI"; + + case UINT32_C(0x03C8): + return "GREEK SMALL LETTER PSI"; + + case UINT32_C(0x03C9): + return "GREEK SMALL LETTER OMEGA"; + + case UINT32_C(0x03CA): + return "GREEK SMALL LETTER IOTA WITH DIALYTIKA"; + + case UINT32_C(0x03CB): + return "GREEK SMALL LETTER UPSILON WITH DIALYTIKA"; + + case UINT32_C(0x03CC): + return "GREEK SMALL LETTER OMICRON WITH TONOS"; + + case UINT32_C(0x03CD): + return "GREEK SMALL LETTER UPSILON WITH TONOS"; + + case UINT32_C(0x03CE): + return "GREEK SMALL LETTER OMEGA WITH TONOS"; + + case UINT32_C(0x03CF): + return "GREEK CAPITAL KAI SYMBOL"; + + case UINT32_C(0x03D0): + return "GREEK BETA SYMBOL"; + + case UINT32_C(0x03D1): + return "GREEK THETA SYMBOL"; + + case UINT32_C(0x03D2): + return "GREEK UPSILON WITH HOOK SYMBOL"; + + case UINT32_C(0x03D3): + return "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL"; + + case UINT32_C(0x03D4): + return "GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL"; + + case UINT32_C(0x03D5): + return "GREEK PHI SYMBOL"; + + case UINT32_C(0x03D6): + return "GREEK PI SYMBOL"; + + case UINT32_C(0x03D7): + return "GREEK KAI SYMBOL"; + + case UINT32_C(0x03D8): + return "GREEK LETTER ARCHAIC KOPPA"; + + case UINT32_C(0x03D9): + return "GREEK SMALL LETTER ARCHAIC KOPPA"; + + case UINT32_C(0x03DA): + return "GREEK LETTER STIGMA"; + + case UINT32_C(0x03DB): + return "GREEK SMALL LETTER STIGMA"; + + case UINT32_C(0x03DC): + return "GREEK LETTER DIGAMMA"; + + case UINT32_C(0x03DD): + return "GREEK SMALL LETTER DIGAMMA"; + + case UINT32_C(0x03DE): + return "GREEK LETTER KOPPA"; + + case UINT32_C(0x03DF): + return "GREEK SMALL LETTER KOPPA"; + + case UINT32_C(0x03E0): + return "GREEK LETTER SAMPI"; + + case UINT32_C(0x03F0): + return "GREEK KAPPA SYMBOL"; + + // HEBREW: + case UINT32_C(0x05D0): + return "HEBREW LETTER ALEF"; + + case UINT32_C(0x05D1): + return "HEBREW LETTER BET"; + + case UINT32_C(0x05D2): + return "HEBREW LETTER GIMEL"; + + case UINT32_C(0x05D3): + return "HEBREW LETTER DALET"; + + case UINT32_C(0x05D4): + return "HEBREW LETTER HE"; + + case UINT32_C(0x05D5): + return "HEBREW LETTER VAV"; + + case UINT32_C(0x05D6): + return "HEBREW LETTER ZAYIN"; + + case UINT32_C(0x05D7): + return "HEBREW LETTER HET"; + + case UINT32_C(0x05D8): + return "HEBREW LETTER TET"; + + case UINT32_C(0x05D9): + return "HEBREW LETTER YOD"; + + case UINT32_C(0x05DA): + return "HEBREW LETTER FINAL KAF"; + + case UINT32_C(0x05DB): + return "HEBREW LETTER KAF"; + + case UINT32_C(0x05DC): + return "HEBREW LETTER LAMED"; + + case UINT32_C(0x05DD): + return "HEBREW LETTER FINAL MEM"; + + case UINT32_C(0x05DE): + return "HEBREW LETTER MEM"; + + case UINT32_C(0x05DF): + return "HEBREW LETTER FINAL NUN"; + + case UINT32_C(0x05E0): + return "HEBREW LETTER NUN"; + + case UINT32_C(0x05E1): + return "HEBREW LETTER SAMEKH"; + + case UINT32_C(0x05E2): + return "HEBREW LETTER AYIN"; + + case UINT32_C(0x05E3): + return "HEBREW LETTER FINAL PE"; + + case UINT32_C(0x05E4): + return "HEBREW LETTER PE"; + + case UINT32_C(0x05E5): + return "HEBREW LETTER FINAL TSADI"; + + case UINT32_C(0x05E6): + return "HEBREW LETTER TSADI"; + + case UINT32_C(0x05E7): + return "HEBREW LETTER QOF"; + + case UINT32_C(0x05E8): + return "HEBREW LETTER RESH"; + + case UINT32_C(0x05E9): + return "HEBREW LETTER SHIN"; + + case UINT32_C(0x05EA): + return "HEBREW LETTER TAV"; + + case UINT32_C(0x05EF): + return "HEBREW YOD TRIANGLE"; + + // CYRILLIC: + case UINT32_C(0x0400): + return "CYRILLIC CAPITAL LETTER LE WITH GRAVE"; + + case UINT32_C(0x0401): + return "CYRILLIC CAPITAL LETTER LO"; + + case UINT32_C(0x0402): + return "CYRILLIC CAPITAL LETTER DJE"; + + case UINT32_C(0x0403): + return "CYRILLIC CAPITAL LETTER GJE"; + + case UINT32_C(0x0404): + return "CYRILLIC CAPITAL LETTER UKRAINIAN LE"; + + case UINT32_C(0x0405): + return "CYRILLIC CAPITAL LETTER DZE"; + + case UINT32_C(0x0406): + return "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I"; + + case UINT32_C(0x0407): + return "CYRILLIC CAPITAL LETTER YI"; + + case UINT32_C(0x0408): + return "CYRILLIC CAPITAL LETTER JE"; + + case UINT32_C(0x0409): + return "CYRILLIC CAPITAL LETTER LJE"; + + case UINT32_C(0x040A): + return "CYRILLIC CAPITAL LETTER NJE"; + + case UINT32_C(0x040B): + return "CYRILLIC CAPITAL LETTER TSHE"; + + case UINT32_C(0x040C): + return "CYRILLIC CAPITAL LETTER KJE"; + + case UINT32_C(0x040D): + return "CYRILLIC CAPITAL LETTER I WITH GRAVE"; + + case UINT32_C(0x040E): + return "CYRILLIC CAPITAL LETTER SHORT "; + + case UINT32_C(0x040F): + return "CYRILLIC CAPITAL LETTER DZHE"; + + case UINT32_C(0x0410): + return "CYRILLIC CAPITAL LETTER A"; + + case UINT32_C(0x0420): + return "CYRILLIC CAPITAL LETTER ER"; + + case UINT32_C(0x0430): + return "CYRILLIC SMALL LETTER A"; + + case UINT32_C(0x0440): + return "CYRILLIC SMALL LETTER ER"; + + case UINT32_C(0x0450): + return "CYRILLIC SMALL LETTER LE WITH GRAVE"; + + case UINT32_C(0x0460): + return "CYRILLIC CAPITAL LETTER OMEGA"; + + case UINT32_C(0x0470): + return "CYRILLIC CAPITAL LETTER PSI"; + + case UINT32_C(0x0480): + return "CYRILLIC CAPITAL LETTER KOPPA"; + + case UINT32_C(0x0490): + return "CYRILLIC CAPITAL LETTER GHE WITH UPTURN"; + + case UINT32_C(0x04A0): + return "CYRILLIC CAPITAL LETTER BASHKIR KA"; + + case UINT32_C(0x04B0): + return "CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE"; + + case UINT32_C(0x04C0): + return "CYRILLIC LETTER PALOCHKA"; + + case UINT32_C(0x04D0): + return "CYRILLIC CAPITAL LETTER A WITH BREVE"; + + case UINT32_C(0x04E0): + return "CYRILLIC CAPITAL LETTER ABKHASIAN DZE"; + + case UINT32_C(0x04F0): + return "CYRILLIC CAPITAL LETTER U WITH DIAERESIS"; + + // SYRIAC SUPPLEMENT: + case UINT32_C(0x0860): + return "SYRIAC LETTER MALAYALAM NGA"; + + case UINT32_C(0x0861): + return "SYRIAC LETTER MALAYALAM JA"; + + case UINT32_C(0x0862): + return "SYRIAC LETTER MALAYALAM NYA"; + + case UINT32_C(0x0863): + return "SYRIAC LETTER MALAYALAM TTA"; + + case UINT32_C(0x0864): + return "SYRIAC LETTER MALAYALAM NNA"; + + case UINT32_C(0x0865): + return "SYRIAC LETTER MALAYALAM NNNA"; + + case UINT32_C(0x0866): + return "SYRIAC LETTER MALAYALAM BHA"; + + case UINT32_C(0x0867): + return "SYRIAC LETTER MALAYALAM RA"; + + case UINT32_C(0x0868): + return "SYRIAC LETTER MALAYALAM LLA"; + + case UINT32_C(0x0869): + return "SYRIAC LETTER MALAYALAM LLLA"; + + case UINT32_C(0x086A): + return "SYRIAC LETTER MALAYALAM SSA"; + + // RUNIC: + case UINT32_C(0x16A0): + return "RUNIC LETTER FEHU FEOH FE F"; + + case UINT32_C(0x16A1): + return "RUNIC LETTER V"; + + case UINT32_C(0x16A2): + return "RUNIC LETTER URUZ UR "; + + case UINT32_C(0x16A3): + return "RUNIC LETTER YR"; + + case UINT32_C(0x16A4): + return "RUNIC LETTER Y"; + + case UINT32_C(0x16A5): + return "RUNIC LETTER W"; + + case UINT32_C(0x16A6): + return "RUNIC LETTER THURISAZ THURS THORN"; + + case UINT32_C(0x16A7): + return "RUNIC LETTER ETH"; + + case UINT32_C(0x16A8): + return "RUNIC LETTER ANSUZ A"; + + case UINT32_C(0x16A9): + return "RUNIC LETTER OS O"; + + case UINT32_C(0x16AA): + return "RUNIC LETTER AC A"; + + case UINT32_C(0x16AB): + return "RUNIC LETTER AESC"; + + case UINT32_C(0x16AC): + return "RUNIC LETTER LONG-BRANCHED-OSS O"; + + case UINT32_C(0x16AD): + return "RUNIC LETTER SHORT-TWIG-OSS O"; + + case UINT32_C(0x16AE): + return "RUNIC LETTER O"; + + case UINT32_C(0x16AF): + return "RUNIC LETTER OE"; + + case UINT32_C(0x16B0): + return "RUNIC LETTER ON"; + + case UINT32_C(0x16C0): + return "RUNIC LETTER DOTTED-N"; + + case UINT32_C(0x16D0): + return "RUNIC LETTER SHORT-TWIG-TYR T"; + + case UINT32_C(0x16E0): + return "RUNIC LETTER EAR"; + + case UINT32_C(0x16F0): + return "RUNIC BELGTHOR SYMBOL"; + + // CYRILLIC EXTENDED C: + case UINT32_C(0x1C80): + return "CYRILLIC SMALL LETTER ROUNDED VE"; + + case UINT32_C(0x1C81): + return "CYRILLIC SMALL LETTER LONG-LEGGED DE"; + + case UINT32_C(0x1C82): + return "CYRILLIC SMALL LETTER NARROW O"; + + case UINT32_C(0x1C83): + return "CYRILLIC SMALL LETTER WIDE ES"; + + case UINT32_C(0x1C84): + return "CYRILLIC SMALL LETTER TALL TE"; + + case UINT32_C(0x1C85): + return "CYRILLIC SMALL LETTER THREE-LEGGED TE"; + + case UINT32_C(0x1C86): + return "CYRILLIC SMALL LETTER TALL HARD SIGN"; + + case UINT32_C(0x1C87): + return "CYRILLIC SMALL LETTER TALL YAT"; + + case UINT32_C(0x1C88): + return "CYRILLIC SMALL LETTER UNBLENDED UK"; + + // GENERAL PUNCTUATION: + case UINT32_C(0x2000): + return "EN QUAD"; + + case UINT32_C(0x2001): + return "EM QUAD"; + + case UINT32_C(0x2002): + return "EN SPACE"; + + case UINT32_C(0x2003): + return "EM SPACE"; + + case UINT32_C(0x2004): + return "THREE-PER-EM SPACE"; + + case UINT32_C(0x2005): + return "FOUR-PER-EM SPACE"; + + case UINT32_C(0x2006): + return "SIX-PER-EM SPACE"; + + case UINT32_C(0x2007): + return "FIGURE SPACE"; + + case UINT32_C(0x2008): + return "PUNCTUATION SPACE"; + + case UINT32_C(0x2009): + return "THIN SPACE"; + + case UINT32_C(0x200A): + return "HAIR SPACE"; + + case UINT32_C(0x203C): + return "DOUBLE EXCLAMATION MARK"; + + case UINT32_C(0x2047): + return "DOUBLE QUOTATION MARK"; + + case UINT32_C(0x2048): + return "QUESTION EXCLAMATION MARK"; + + case UINT32_C(0x2049): + return "EXCLAMATION QUESTION MARK"; + + // CURRENCY SYMBOLS: + case UINT32_C(0x20A0): + return "EURO-CURRENCY SIGN"; + + case UINT32_C(0x20A1): + return "COLON SIGN"; + + case UINT32_C(0x20A2): + return "CRUZEIRO SIGN"; + + case UINT32_C(0x20A3): + return "FRENCH FRANC SIGN"; + + case UINT32_C(0x20A4): + return "LIRA SIGN"; + + case UINT32_C(0x20A5): + return "MILL SIGN"; + + case UINT32_C(0x20A6): + return "NAIRA SIGN"; + + case UINT32_C(0x20A7): + return "PESETA SIGN"; + + case UINT32_C(0x20A8): + return "RUPEE SIGN"; + + case UINT32_C(0x20A9): + return "WON SIGN"; + + case UINT32_C(0x20AA): + return "NEW SHEQEL SIGN"; + + case UINT32_C(0x20AB): + return "DONG SIGN"; + + case UINT32_C(0x20AC): + return "EURO SIGN"; + + case UINT32_C(0x20AD): + return "KIP SIGN"; + + case UINT32_C(0x20AE): + return "TUGRIK SIGN"; + + case UINT32_C(0x20AF): + return "DRACHMA SIGN"; + + case UINT32_C(0x20B0): + return "GERMAN PENNY SIGN"; + + case UINT32_C(0x20B1): + return "PESO SIGN"; + + case UINT32_C(0x20B2): + return "GUARANI SIGN"; + + case UINT32_C(0x20B3): + return "AUSTRAL SIGN"; + + case UINT32_C(0x20B4): + return "HRYVNIA SIGN"; + + case UINT32_C(0x20B5): + return "CEDI SIGN"; + + case UINT32_C(0x20B6): + return "LIVRE TOURNOIS SIGN"; + + case UINT32_C(0x20B7): + return "SPESMILO SIGN"; + + case UINT32_C(0x20B8): + return "TENGE SIGN"; + + case UINT32_C(0x20BA): + return "TURKISH LIRA SIGN"; + + case UINT32_C(0x20BB): + return "NORDIC MARK SIGN"; + + case UINT32_C(0x20BC): + return "MANAT SIGN"; + + case UINT32_C(0x20BD): + return "RUBLE SYMBOL"; + + case UINT32_C(0x20BE): + return "LARI SIGN"; + + case UINT32_C(0x20BF): + return "BITCOIN SIGN"; + + // LETTERLIKE SYMBOLS: + case UINT32_C(0x2100): + return "ACCOUNT OF"; + + case UINT32_C(0x2101): + return "ADRESSED TO THE SUBJECT"; + + case UINT32_C(0x2102): + return "DOUBLE-STRUCK CAPITAL C"; + + case UINT32_C(0x2103): + return "DEGREE CELSIUS"; + + case UINT32_C(0x2104): + return "CENTRE LINE SYMBOL"; + + case UINT32_C(0x2105): + return "CARE OF"; + + case UINT32_C(0x2106): + return "CADA UNA"; + + case UINT32_C(0x2107): + return "EULER CONSTANT"; + + case UINT32_C(0x2108): + return "SCRUPLE"; + + case UINT32_C(0x2109): + return "DEGREE FAHRENHEIT"; + + case UINT32_C(0x210A): + return "SCRIPT SMALL G"; + + case UINT32_C(0x210B): + return "SCRIPT CAPITAL H"; + + case UINT32_C(0x210C): + return "BLACK-LETTER CAPITAL H"; + + case UINT32_C(0x210D): + return "DOUBLE-STRUCK CAPITAL H"; + + case UINT32_C(0x210E): + return "PLANCK CONSTANT"; + + case UINT32_C(0x210F): + return "PLANCK CONSTANT OVER TWO PI"; + + case UINT32_C(0x2110): + return "SCRIPT CAPITAL I"; + + case UINT32_C(0x2111): + return "BLACK-LETTER CAPITAL I"; + + case UINT32_C(0x2112): + return "SCRIPT CAPITAL L"; + + case UINT32_C(0x2113): + return "SCRIPT SMALL L"; + + case UINT32_C(0x2114): + return "L B BAR SYMBOL"; + + case UINT32_C(0x2115): + return "DOUBLE-STRUCK CAPITAL N"; + + case UINT32_C(0x2116): + return "NUMERO SIGN"; + + case UINT32_C(0x2117): + return "SOUND RECORDING COPYRIGHT"; + + case UINT32_C(0x2118): + return "SCRIPT CAPITAL P"; + + case UINT32_C(0x2119): + return "DOUBLE-STRUCK CAPITAL P"; + + case UINT32_C(0x211A): + return "DOUBLE-STRUCK CAPITAL Q"; + + case UINT32_C(0x211B): + return "SCRIPT CAPITAL R"; + + case UINT32_C(0x211C): + return "BLACK-LETTER CAPITAL R"; + + case UINT32_C(0x211D): + return "DOUBLE-STRUCK CAPITAL R"; + + case UINT32_C(0x211E): + return "PRESCRIPTION TAKE"; + + case UINT32_C(0x211F): + return "RESPONSE"; + + case UINT32_C(0x2120): + return "SERVICE MARK"; + + case UINT32_C(0x2121): + return "TELEPHONE SIGN"; + + case UINT32_C(0x2122): + return "TRADE MARK SIGN"; + + case UINT32_C(0x2123): + return "VERSICLE"; + + case UINT32_C(0x2124): + return "DOUBLE-STRUCK CAPITAL Z"; + + case UINT32_C(0x2125): + return "OUNCE SIGN"; + + case UINT32_C(0x2126): + return "OHM SIGN"; + + case UINT32_C(0x2127): + return "INVERTED OHM SIGN"; + + case UINT32_C(0x2128): + return "BLACK-LETTER CAPITAL Z"; + + case UINT32_C(0x2129): + return "TURNED GREEK SMALL LETTER IOTA"; + + case UINT32_C(0x212A): + return "KELVIN SIGN"; + + case UINT32_C(0x212B): + return "ANGSTROM SIGN"; + + case UINT32_C(0x212C): + return "SCRIPT CAPITAL B"; + + case UINT32_C(0x212D): + return "BLACK-LETTER CAPITAL C"; + + case UINT32_C(0x212E): + return "ESTIMATED SYMBOL"; + + case UINT32_C(0x212F): + return "SCRIPT SMALL E"; + + case UINT32_C(0x2130): + return "SCRIPT CAPITAL E"; + + case UINT32_C(0x2131): + return "SCRIPT CAPITAL F"; + + case UINT32_C(0x2132): + return "TURNED CAPITAL F"; + + case UINT32_C(0x2133): + return "SCRIPT CAPITAL M"; + + case UINT32_C(0x2134): + return "SCRIPT SMALL O"; + + case UINT32_C(0x2135): + return "ALEF SYMBOL"; + + case UINT32_C(0x2136): + return "BET SYMBOL"; + + case UINT32_C(0x2137): + return "GIMEL SYMBOL"; + + case UINT32_C(0x2138): + return "DALET SYMBOL"; + + case UINT32_C(0x2139): + return "INFORMATION SOURCE"; + + case UINT32_C(0x213A): + return "ROTATED CAPITAL Q"; + + case UINT32_C(0x213B): + return "FACSIMILE SIGN"; + + case UINT32_C(0x213C): + return "DOUBLE-STRUCK SMALL PI"; + + case UINT32_C(0x213D): + return "DOUBLE-STRUCK SMALL GAMMA"; + + case UINT32_C(0x213E): + return "DOUBLE-STRUCK CAPITAL GAMMA"; + + case UINT32_C(0x213F): + return "DOUBLE-STRUCK CAPITAL PI"; + + case UINT32_C(0x2140): + return "DOUBLE-STRUCK N-ARY SUMMATION"; + + case UINT32_C(0x2141): + return "TURNED SANS-SERIF CAPITAL G"; + + case UINT32_C(0x2142): + return "TURNED SANS-SERIF CAPITAL L"; + + case UINT32_C(0x2143): + return "REVERSED SANS-SERIF CAPITAL L"; + + case UINT32_C(0x2144): + return "TURNED SANS-SERIF CAPITAL Y"; + + case UINT32_C(0x2145): + return "DOUBLE-STRUCK ITALIC CAPITAL D"; + + case UINT32_C(0x2146): + return "DOUBLE-STRUCK ITALIC SMALL D"; + + case UINT32_C(0x2147): + return "DOUBLE-STRUCK ITALIC SMALL E"; + + case UINT32_C(0x2148): + return "DOUBLE-STRUCK ITALIC SMALL I"; + + case UINT32_C(0x2149): + return "DOUBLE-STRUCK ITALIC SMALL J"; + + case UINT32_C(0x214A): + return "PROPERTY LINE"; + + case UINT32_C(0x214B): + return "TURNED AMPERSAND"; + + case UINT32_C(0x214C): + return "PER SIGN"; + + case UINT32_C(0x214D): + return "AKTIESELSKAB"; + + case UINT32_C(0x214E): + return "TURNED SMALL F"; + + case UINT32_C(0x214F): + return "SYMBOL FOR SAMARITAN SOURCE"; + + // NUMBER FORMS: + case UINT32_C(0x2150): + return "VULGAR FRACTION ONE SEVENTH"; + + case UINT32_C(0x2151): + return "VULGAR FRACTION ONE NINTH"; + + case UINT32_C(0x2152): + return "VULGAR FRACTION ONE TENTH"; + + case UINT32_C(0x2153): + return "VULGAR FRACTION ONE THIRD"; + + case UINT32_C(0x2154): + return "VULGAR FRACTION TWO THIRDS"; + + case UINT32_C(0x2155): + return "VULGAR FRACTION ONE FIFTH"; + + case UINT32_C(0x2156): + return "VULGAR FRACTION TWO FIFTHS"; + + case UINT32_C(0x2157): + return "VULGAR FRACTION THREE FIFTHS"; + + case UINT32_C(0x2158): + return "VULGAR FRACTION FOUR FIFTHS"; + + case UINT32_C(0x2159): + return "VULGAR FRACTION ONE SIXTH"; + + case UINT32_C(0x215A): + return "VULGAR FRACTION FIVE SIXTHS"; + + case UINT32_C(0x215B): + return "VULGAR FRACTION ONE EIGTH"; + + case UINT32_C(0x215C): + return "VULGAR FRACTION THREE EIGTHS"; + + case UINT32_C(0x215D): + return "VULGAR FRACTION FIVE EIGHTS"; + + case UINT32_C(0x215E): + return "VULGAR FRACTION SEVEN EIGTHS"; + + case UINT32_C(0x215F): + return "FRACTION NUMERATOR ONE"; + + case UINT32_C(0x2160): + return "ROMAN NUMERAL ONE"; + + case UINT32_C(0x2161): + return "ROMAN NUMERAL TWO"; + + case UINT32_C(0x2162): + return "ROMAN NUMERAL THREE"; + + case UINT32_C(0x2163): + return "ROMAN NUMERAL FOUR"; + + case UINT32_C(0x2164): + return "ROMAN NUMERAL FIVE"; + + case UINT32_C(0x2165): + return "ROMAN NUMERAL SIX"; + + case UINT32_C(0x2166): + return "ROMAN NUMERAL SEVEN"; + + case UINT32_C(0x2167): + return "ROMAN NUMERAL EIGHT"; + + case UINT32_C(0x2168): + return "ROMAN NUMERAL NINE"; + + case UINT32_C(0x2169): + return "ROMAN NUMERAL TEN"; + + case UINT32_C(0x216A): + return "ROMAN NUMERAL ELEVEN"; + + case UINT32_C(0x216B): + return "ROMAN NUMERAL TWELVE"; + + case UINT32_C(0x216C): + return "ROMAN NUMERAL FIFTY"; + + case UINT32_C(0x216D): + return "ROMAN NUMERAL ONE HUNDRED"; + + case UINT32_C(0x216E): + return "ROMAN NUMERAL FIVE HUNDRED"; + + case UINT32_C(0x216F): + return "ROMAN NUMERAL ONE THOUSAND"; + + case UINT32_C(0x2170): + return "SMALL ROMAN NUMERAL ONE"; + + case UINT32_C(0x2171): + return "SMALL ROMAN NUMERAL TWO"; + + case UINT32_C(0x2172): + return "SMALL ROMAN NUMERAL THREE"; + + case UINT32_C(0x2173): + return "SMALL ROMAN NUMERAL FOUR"; + + case UINT32_C(0x2174): + return "SMALL ROMAN NUMERAL FIVE"; + + case UINT32_C(0x2175): + return "SMALL ROMAN NUMERAL SIX"; + + case UINT32_C(0x2176): + return "SMALL ROMAN NUMERAL SEVEN"; + + case UINT32_C(0x2177): + return "SMALL ROMAN NUMERAL EIGHT"; + + case UINT32_C(0x2178): + return "SMALL ROMAN NUMERAL NINE"; + + case UINT32_C(0x2179): + return "SMALL ROMAN NUMERAL TEN"; + + case UINT32_C(0x217A): + return "SMALL ROMAN NUMERAL ELEVEN"; + + case UINT32_C(0x217B): + return "SMALL ROMAN NUMERAL TWELVE"; + + case UINT32_C(0x217C): + return "SMALL ROMAN NUMERAL FIFTY"; + + case UINT32_C(0x217D): + return "SMALL ROMAN NUMERAL ONE HUNDRED"; + + case UINT32_C(0x217E): + return "SMALL ROMAN NUMERAL FIVE HUNDRED"; + + case UINT32_C(0x217F): + return "SMALL ROMAN NUMERAL ONE THOUSAND"; + + case UINT32_C(0x2180): + return "ROMAN NUMERAL ONE THOUSAND C D"; + + case UINT32_C(0x2181): + return "ROMAN NUMERAL FIVE THOUSAND"; + + case UINT32_C(0x2182): + return "ROMAN NUMERAL TEN THOUSAND"; + + case UINT32_C(0x2183): + return "ROMAN NUMERAL REVERSED ONE HUNDRED"; + + case UINT32_C(0x2184): + return "LATIN SMALL LETTER REVERSED C"; + + case UINT32_C(0x2185): + return "ROMAN NUMERAL SIX LATE FORM"; + + case UINT32_C(0x2186): + return "ROMAN NUMERAL FIFTY EARLY FORM"; + + case UINT32_C(0x2187): + return "ROMAN NUMERAL FIFTY THOUSAND"; + + case UINT32_C(0x2188): + return "ROMAN NUMERAL ONE HUNDRED THOUSAND"; + + case UINT32_C(0x2189): + return "VULGAR FRACTION ZERO THIRDS"; + + case UINT32_C(0x218A): + return "TURNED DIGIT TWO"; + + case UINT32_C(0x218B): + return "TURNED DIGIT THREE"; + + // MISCELLANEOUS SYMBOLS: + case UINT32_C(0x2630): + return "TRIGRAM FOR HEAVEN"; + + case UINT32_C(0x2631): + return "TRIGRAM FOR LAKE"; + + case UINT32_C(0x2632): + return "TRIGRAM FOR FIRE"; + + case UINT32_C(0x2633): + return "TRIGRAM FOR THUNDER"; + + case UINT32_C(0x2634): + return "TRIGRAM FOR WIND"; + + case UINT32_C(0x2635): + return "TRIGRAM FOR WATER"; + + case UINT32_C(0x2636): + return "TRIGRAM FOR MOUNTAIN"; + + case UINT32_C(0x2637): + return "TRIGRAM FOR EARTH"; + + case UINT32_C(0x2638): + return "WHEEL OF DHARMA"; + + case UINT32_C(0x2639): + return "WHITE FROWNING FACE"; + + case UINT32_C(0x263A): + return "WHITE SMILING FACE"; + + case UINT32_C(0x263B): + return "BLACK SMILING FACE"; + + case UINT32_C(0x263C): + return "WHITE SUN WITH RAYS"; + + case UINT32_C(0x263D): + return "FIRST QUARTER MOON"; + + case UINT32_C(0x263E): + return "LAST QUARTER MOON"; + + case UINT32_C(0x263F): + return "MERCURY"; + + case UINT32_C(0x2640): + return "FEMALE SIGN"; + + case UINT32_C(0x2641): + return "EARTH"; + + case UINT32_C(0x2642): + return "MALE SIGN"; + + case UINT32_C(0x2643): + return "JUPITER"; + + case UINT32_C(0x2644): + return "SATURN"; + + case UINT32_C(0x2645): + return "URANUS"; + + case UINT32_C(0x2646): + return "NEPTUNE"; + + case UINT32_C(0x2647): + return "PLUTO"; + + case UINT32_C(0x2648): + return "ARIES"; + + case UINT32_C(0x2649): + return "TAURUS"; + + case UINT32_C(0x264A): + return "GEMNINI"; + + case UINT32_C(0x264B): + return "CANCER"; + + case UINT32_C(0x264C): + return "LEO"; + + case UINT32_C(0x264D): + return "VIRGO"; + + case UINT32_C(0x264E): + return "LIBRA"; + + case UINT32_C(0x264F): + return "SCORPIUS"; + + case UINT32_C(0x2650): + return "SAGITTARIUS"; + + case UINT32_C(0x2651): + return "CAPRICORN"; + + case UINT32_C(0x2652): + return "AQUARIUS"; + + case UINT32_C(0x2653): + return "PISCES"; + + case UINT32_C(0x2654): + return "WHITE CHESS KING"; + + case UINT32_C(0x2655): + return "WHITE CHESS QUEEN"; + + case UINT32_C(0x2656): + return "WHITE CHESS ROOK"; + + case UINT32_C(0x2657): + return "WHITE CHESS BISHOP"; + + case UINT32_C(0x2658): + return "WHITE CHESS KNIGHT"; + + case UINT32_C(0x2659): + return "WHITE CHESS PAWN"; + + case UINT32_C(0x265A): + return "BLACK CHESS KING"; + + case UINT32_C(0x265B): + return "BLACK CHESS QUEEN"; + + case UINT32_C(0x265C): + return "BLACK CHESS ROOK"; + + case UINT32_C(0x265D): + return "BLACK CHESS BISHOP"; + + case UINT32_C(0x265E): + return "BLACK CHESS KNIGHT"; + + case UINT32_C(0x265F): + return "BLACK CHESS PAWN"; + + case UINT32_C(0x2660): + return "BLACK SPADE SUIT"; + + case UINT32_C(0x2661): + return "WHITE HEART SUIT"; + + case UINT32_C(0x2662): + return "WHITE DIAMOND SUIT"; + + case UINT32_C(0x2663): + return "BLACK CLUB SUIT"; + + case UINT32_C(0x2664): + return "WHITE SPADE SUIT"; + + case UINT32_C(0x2665): + return "BLACK HEART SUIT"; + + case UINT32_C(0x2666): + return "BLACK DIAMOND SUIT"; + + case UINT32_C(0x2667): + return "WHITE CLUB SUIT"; + + case UINT32_C(0x2668): + return "HOT SPRINGS"; + + case UINT32_C(0x2669): + return "QUARTER NOTE"; + + case UINT32_C(0x266A): + return "EIGHT NOTE"; + + case UINT32_C(0x266B): + return "BEAMED EIGTH NOTES"; + + case UINT32_C(0x266C): + return "BEAMED SIXTEENTH NOTES"; + + case UINT32_C(0x266D): + return "MUSIC FLAT SIGN"; + + case UINT32_C(0x266E): + return "MUSIC NEUTRAL SIGN"; + + case UINT32_C(0x266F): + return "MUSIC SHARP SIGN"; + + case UINT32_C(0x2670): + return "WEST SYRIAC CROSS"; + + case UINT32_C(0x2671): + return "EAST SYRIAC CROSS"; + + case UINT32_C(0x2672): + return "UNIVERSAL RECYCLING SYMBOL"; + + case UINT32_C(0x2673): + return "RECYCLING SYMBOL FOR TYPE-1 PLASTICS"; + + case UINT32_C(0x2674): + return "RECYCLING SYMBOL FOR TYPE-2 PLASTICS"; + + case UINT32_C(0x2675): + return "RECYCLING SYMBOL FOR TYPE-3 PLASTICS"; + + case UINT32_C(0x2676): + return "RECYCLING SYMBOL FOR TYPE-4 PLASTICS"; + + case UINT32_C(0x2677): + return "RECYCLING SYMBOL FOR TYPE-5 PLASTICS"; + + case UINT32_C(0x2678): + return "RECYCLING SYMBOL FOR TYPE-6 PLASTICS"; + + case UINT32_C(0x2679): + return "RECYCLING SYMBOL FOR TYPE-7 PLASTICS"; + + case UINT32_C(0x267A): + return "RECYCLING SYMBOL FOR GENERIC MATERIALS"; + + case UINT32_C(0x267B): + return "BLACK UNIVERSAL RECYCLING SYMBOL"; + + case UINT32_C(0x267C): + return "RECYCLED PAPER SYMBOL"; + + case UINT32_C(0x267D): + return "PARTIALLY-RECYCLED PAPER SYMBOL"; + + case UINT32_C(0x267E): + return "PERMANENT PAPER SIGN"; + + case UINT32_C(0x267F): + return "WHEELCHAIR SYMBOL"; + + case UINT32_C(0x26B9): + return "SEXTILE"; + + // DINGBATS: + case UINT32_C(0x271D): + return "LATIN CROSS"; + + case UINT32_C(0x2721): + return "STAR OF DAVID"; + + // SUPPLEMENTAL PUNCTUATION: + case UINT32_C(0x2E3B): + return "THREE-EM DASH"; + + // ARABIC PRESENTATION FORMS-A: + case UINT32_C(0xFDFD): + return "ARABIC LIGATURE BISMILLAH AL-RAHMAN AR-RAHEEM"; + + // ANCIENT SYMBOLS: + case UINT32_C(0x00010190): + return "ROMAN SEXTANS SIGN"; + + case UINT32_C(0x00010191): + return "ROMAN UNCIA SIGN"; + + case UINT32_C(0x00010192): + return "ROMAN SEMUNCIA SIGN"; + + case UINT32_C(0x00010193): + return "ROMAN SEXTULA SIGN"; + + case UINT32_C(0x00010194): + return "ROMAN DIMIDIA SEXTULA SIGN"; + + case UINT32_C(0x00010195): + return "ROMAN SILIQUA SIGN"; + + case UINT32_C(0x00010196): + return "ROMAN DENARIUS SIGN"; + + case UINT32_C(0x00010197): + return "ROMAN QUINARIUS SIGN"; + + case UINT32_C(0x00010198): + return "ROMAN SESTERTIUS SIGN"; + + case UINT32_C(0x00010199): + return "ROMAN DUPONDIUS SIGN"; + + case UINT32_C(0x0001019A): + return "ROMAN AS SIGN"; + + case UINT32_C(0x0001019B): + return "ROMAN CENTURIAL SIGN"; + + case UINT32_C(0x0001019C): + return "ASCIA SIGN"; + + // BRAHMI: + case UINT32_C(0x00011066): + return "BRAHMI DIGIT ZERO"; + + case UINT32_C(0x00011067): + return "BRAHMI DIGIT ONE"; + + case UINT32_C(0x00011068): + return "BRAHMI DIGIT TWO"; + + case UINT32_C(0x00011069): + return "BRAHMI DIGIT THREE"; + + case UINT32_C(0x0001106A): + return "BRAHMI DIGIT FOUR"; + + case UINT32_C(0x0001106B): + return "BRAHMI DIGIT FIVE"; + + case UINT32_C(0x0001106C): + return "BRAHMI DIGIT SIX"; + + case UINT32_C(0x0001106D): + return "BRAHMI DIGIT SEVEN"; + + case UINT32_C(0x0001106E): + return "BRAHMI DIGIT EIGHT"; + + case UINT32_C(0x0001106F): + return "BRAHMI DIGIT NINE"; + + // CUNEIFORM: + case UINT32_C(0x00012031): + return "CUNEIFORM SIGN AN PLUS NAGA SQUARED"; + + // CUNEIFORM NUMBERS AND PUNCTUATION: + case UINT32_C(0x0001242B): + return "CUNEIFORM NUMERIC SIGN NINE SHAR2"; + + // EGYPTIAN HIEROGLYPHS: + case UINT32_C(0x000130B8): + return "EGYPTIAN HIEROGLYPH D052"; + + // COUNTING ROD NUMERALS: + case UINT32_C(0x0001D372): + return "IDEOGRAPHIC TALLY MARK ONE"; + + case UINT32_C(0x0001D373): + return "IDEOGRAPHIC TALLY MARK TWO"; + + case UINT32_C(0x0001D374): + return "IDEOGRAPHIC TALLY MARK THREE"; + + case UINT32_C(0x0001D375): + return "IDEOGRAPHIC TALLY MARK FOUR"; + + case UINT32_C(0x0001D376): + return "IDEOGRAPHIC TALLY MARK FIVE"; + + case UINT32_C(0x0001D377): + return "TALLY MARK ONE"; + + case UINT32_C(0x0001D378): + return "TALLY MARK FIVE"; + + // ENCLOSED ALPHANUMERIC SUPPLEMENT: + case UINT32_C(0x0001F10D): + return "CIRCLED ZERO WITH SLASH"; + + case UINT32_C(0x0001F10E): + return "CIRCLED ANTICKLOCKWISE ARROW"; + + case UINT32_C(0x0001F10F): + return "CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH"; + + case UINT32_C(0x0001F12F): + return "COPYLEFT SYMBOL"; + + case UINT32_C(0x0001F16D): + return "CIRCLED CC"; + + case UINT32_C(0x0001F16E): + return "CIRCLED C WITH OVERLAID BACKSLASH"; + + case UINT32_C(0x0001F16F): + return "CIRCLED HUMAN FIGURE"; + + // EMOTICONS: + case UINT32_C(0x0001F600): + return "GRINNING FACE"; + + case UINT32_C(0x0001F601): + return "GRINNING FACE WITH SMIRKING EYES"; + + case UINT32_C(0x0001F602): + return "FACE WITH TEARS OF JOY"; + + case UINT32_C(0x0001F603): + return "SMILING FACE WITH OPEN MOUTH"; + + case UINT32_C(0x0001F604): + return "SMILING FACE WITH OPEN MOUTH AND SMILING EYES"; + + case UINT32_C(0x0001F605): + return "SMILING FACE WITH OPEN MOUTH AND COULD SWEAT"; + + case UINT32_C(0x0001F606): + return "SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES"; + + case UINT32_C(0x0001F607): + return "SMILING FACE WITH HALO"; + + case UINT32_C(0x0001F608): + return "SMILING FACE WITH HORNS"; + + case UINT32_C(0x0001F609): + return "WINKING FACE"; + + case UINT32_C(0x0001F60A): + return "SMILING FACE WITH SMILING EYES"; + + case UINT32_C(0x0001F60B): + return "FACE SAVOURING DELICIOUS FOOD"; + + case UINT32_C(0x0001F60C): + return "RELIEVED FACE"; + + case UINT32_C(0x0001F60D): + return "SMILLING FACE HEART-SHAPED EYES"; + + case UINT32_C(0x0001F60E): + return "SMILLING FACE WITH SUNGLASSES"; + + case UINT32_C(0x0001F60F): + return "SMIRKING FACE"; + + case UINT32_C(0x0001F610): + return "NEUTRAL FACE"; + + case UINT32_C(0x0001F611): + return "EXPRESSIONLESS FACE"; + + case UINT32_C(0x0001F612): + return "UNAMUSED FACE"; + + case UINT32_C(0x0001F613): + return "FACE WITH COLD SWEAT"; + + case UINT32_C(0x0001F614): + return "PENSIVE FACE"; + + case UINT32_C(0x0001F615): + return "CONFUSED FACE"; + + case UINT32_C(0x0001F616): + return "CONFOUNDED FACE"; + + case UINT32_C(0x0001F617): + return "KISSING FACE"; + + case UINT32_C(0x0001F618): + return "FACE THROWING A KISS"; + + case UINT32_C(0x0001F619): + return "KISSING FACE WITH SMILLING EYES"; + + case UINT32_C(0x0001F61A): + return "KISSING FACE WITH CLOSED EYES"; + + case UINT32_C(0x0001F61B): + return "FACE WITH STUCK-OUT TONGUE"; + + case UINT32_C(0x0001F61C): + return "FACE WITH STUCK-OUT TONGUE AND WINKING EYE"; + + case UINT32_C(0x0001F61D): + return "FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES"; + + case UINT32_C(0x0001F61E): + return "DISSAPOINTED FACE"; + + case UINT32_C(0x0001F61F): + return "WORRIED FACE"; + + case UINT32_C(0x0001F620): + return "ANGRY FACE"; + + case UINT32_C(0x0001F621): + return "POUTING FACE"; + + case UINT32_C(0x0001F622): + return "CRYING FACE"; + + case UINT32_C(0x0001F623): + return "PERSEVERING FACE"; + + case UINT32_C(0x0001F624): + return "FACE WITH LOOK OF TRIUMPH"; + + case UINT32_C(0x0001F625): + return "DISSAPOINTED BUT RELIEVED FACE"; + + case UINT32_C(0x0001F626): + return "FROWNING FACE WITH OPEN MOUTH"; + + case UINT32_C(0x0001F627): + return "ANGUISHED FACE"; + + case UINT32_C(0x0001F628): + return "FEARFUL FACE"; + + case UINT32_C(0x0001F629): + return "WEARY FACE"; + + case UINT32_C(0x0001F62A): + return "SLEEPY FACE"; + + case UINT32_C(0x0001F62B): + return "TIRED FACE"; + + case UINT32_C(0x0001F62C): + return "GRIMACING FACE"; + + case UINT32_C(0x0001F62D): + return "LOUDLY CRYING FACE"; + + case UINT32_C(0x0001F62E): + return "FACE WITH OPEN MOUTH"; + + case UINT32_C(0x0001F62F): + return "HUSHED FACE"; + + case UINT32_C(0x0001F630): + return "FACE WITH OPEN MOUTH AND COLD SWEAT"; + + case UINT32_C(0x0001F631): + return "FACE SCREAMING IN FEAR"; + + case UINT32_C(0x0001F632): + return "ASTONISHED FACE"; + + case UINT32_C(0x0001F633): + return "FLUSHED FACE"; + + case UINT32_C(0x0001F634): + return "SLEEPING FACE"; + + case UINT32_C(0x0001F635): + return "DIZZY FACE"; + + case UINT32_C(0x0001F636): + return "FACE WITHOUT MOUTH"; + + case UINT32_C(0x0001F637): + return "FACE WITH MEDICAL MASK"; + + case UINT32_C(0x0001F641): + return "SLIGHTLY FROWNING FACE"; + + case UINT32_C(0x0001F642): + return "SLIGHTLY SMILING FACE"; + + case UINT32_C(0x0001F643): + return "UPSIDE-DOWN FACE"; + + case UINT32_C(0x0001F644): + return "FACE WITH ROLLING EYES"; + + // ORNAMENTAL DINGBATS: + case UINT32_C(0x0001F670): + return "SCRIPT LIGATURE ET ORNAMENT"; + + case UINT32_C(0x0001F671): + return "HEAVY SCRIPT LIGATURE ET ORNAMENT"; + + case UINT32_C(0x0001F672): + return "LIGATURE OPEN ET ORNAMENT"; + + case UINT32_C(0x0001F673): + return "HEAVY LIGATURE OPEN ET ORNAMENT"; + + case UINT32_C(0x0001F674): + return "HEAVY AMPERSAND ORNAMENT"; + + case UINT32_C(0x0001F675): + return "SWASH AMPERSAND ORNAMENT"; + + case UINT32_C(0x0001F676): + return "SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT"; + + case UINT32_C(0x0001F677): + return "SANS-SERIF HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT"; + + case UINT32_C(0x0001F678): + return "SANS-SERIF HEAVY LOW DOUBLE QUOTATION MARK ORNAMENT"; + + case UINT32_C(0x0001F679): + return "HEAVY INTERROBANG ORNAMENT"; + + case UINT32_C(0x0001F67A): + return "SANS-SERIF INTERROBANG ORNAMENT"; + + case UINT32_C(0x0001F67B): + return "HEAVY SANS-SERIF INTERROBANG ORNAMENT"; + + case UINT32_C(0x0001F67C): + return "VERY HEAVY SOLIDUS"; + + case UINT32_C(0x0001F67D): + return "VERY HEAVY REVERSE SOLIDUS"; + + case UINT32_C(0x0001F67E): + return "CHECKER BOARD"; + + case UINT32_C(0x0001F67F): + return "REVERSE CHECKER BOARD"; + + // CJK UNIFIED IDEOGRAPHS EXTENSION G: + case UINT32_C(0x0003106C): + return "CJK UNIFIED IDEOGRAPH-3106C"; + + // TAGS: + case UINT32_C(0x000E0001): + return "LANGUAGE TAG"; + + case UINT32_C(0x000E0020): + return "TAG SPACE"; + + case UINT32_C(0x000E0021): + return "TAG EXCLAMATION MARK"; + + case UINT32_C(0x000E0022): + return "TAG QUOTATION MARK"; + + case UINT32_C(0x000E0023): + return "TAG NUMBER SIGN"; + + case UINT32_C(0x000E0024): + return "TAG DOLLAR SIGN"; + + case UINT32_C(0x000E0025): + return "TAG PERCENT SIGN"; + + case UINT32_C(0x000E0026): + return "TAG AMPERSAND"; + + case UINT32_C(0x000E0027): + return "TAG APOSTROPHE"; + + case UINT32_C(0x000E0028): + return "TAG LEFT PARANTHESIS"; + + case UINT32_C(0x000E0029): + return "TAG RIGHT PARANTHESIS"; + + case UINT32_C(0x000E002A): + return "TAG ASTERISK"; + + case UINT32_C(0x000E002B): + return "TAG PLUS SIGN"; + + case UINT32_C(0x000E002C): + return "TAG COMMA"; + + case UINT32_C(0x000E002D): + return "TAG HYPHEN-MINUS"; + + case UINT32_C(0x000E002E): + return "TAG FULL STOP"; + + case UINT32_C(0x000E002F): + return "TAG SOLIDUS"; + + case UINT32_C(0x000E0030): + return "TAG DIGIT ZERO"; + + case UINT32_C(0x000E0031): + return "TAG DIGIT ONE"; + + case UINT32_C(0x000E0032): + return "TAG DIGIT TWO"; + + case UINT32_C(0x000E0033): + return "TAG DIGIT THREE"; + + case UINT32_C(0x000E0034): + return "TAG DIGIT FOUR"; + + case UINT32_C(0x000E0035): + return "TAG DIGIT FIVE"; + + case UINT32_C(0x000E0036): + return "TAG DIGIT SIX"; + + case UINT32_C(0x000E0037): + return "TAG DIGIT SEVEN"; + + case UINT32_C(0x000E0038): + return "TAG DIGIT EIGHT"; + + case UINT32_C(0x000E0039): + return "TAG DIGIT NINE"; + + case UINT32_C(0x000E003A): + return "TAG COLON"; + + case UINT32_C(0x000E003B): + return "TAG SEMICOLON"; + + case UINT32_C(0x000E003C): + return "TAG LESS-THAN SIGN"; + + case UINT32_C(0x000E003D): + return "TAG EQUALS SIGN"; + + case UINT32_C(0x000E003E): + return "TAG GREATER-THAN SIGN"; + + case UINT32_C(0x000E003F): + return "TAG QUESTION MARK"; + + case UINT32_C(0x000E0040): + return "TAG COMMERCIAL AT"; + + case UINT32_C(0x000E0041): + return "TAG LATIN CAPITAL LETTER A"; + + case UINT32_C(0x000E0042): + return "TAG LATIN CAPITAL LETTER B"; + + case UINT32_C(0x000E0043): + return "TAG LATIN CAPITAL LETTER C"; + + case UINT32_C(0x000E0044): + return "TAG LATIN CAPITAL LETTER D"; + + case UINT32_C(0x000E0045): + return "TAG LATIN CAPITAL LETTER E"; + + case UINT32_C(0x000E0046): + return "TAG LATIN CAPITAL LETTER F"; + + case UINT32_C(0x000E0047): + return "TAG LATIN CAPITAL LETTER G"; + + case UINT32_C(0x000E0048): + return "TAG LATIN CAPITAL LETTER H"; + + case UINT32_C(0x000E0049): + return "TAG LATIN CAPITAL LETTER I"; + + case UINT32_C(0x000E004A): + return "TAG LATIN CAPITAL LETTER J"; + + case UINT32_C(0x000E004B): + return "TAG LATIN CAPITAL LETTER K"; + + case UINT32_C(0x000E004C): + return "TAG LATIN CAPITAL LETTER L"; + + case UINT32_C(0x000E004D): + return "TAG LATIN CAPITAL LETTER M"; + + case UINT32_C(0x000E004E): + return "TAG LATIN CAPITAL LETTER N"; + + case UINT32_C(0x000E004F): + return "TAG LATIN CAPITAL LETTER O"; + + case UINT32_C(0x000E0050): + return "TAG LATIN CAPITAL LETTER P"; + + case UINT32_C(0x000E0051): + return "TAG LATIN CAPITAL LETTER Q"; + + case UINT32_C(0x000E0052): + return "TAG LATIN CAPITAL LETTER R"; + + case UINT32_C(0x000E0053): + return "TAG LATIN CAPITAL LETTER S"; + + case UINT32_C(0x000E0054): + return "TAG LATIN CAPITAL LETTER T"; + + case UINT32_C(0x000E0055): + return "TAG LATIN CAPITAL LETTER "; + + case UINT32_C(0x000E0056): + return "TAG LATIN CAPITAL LETTER V"; + + case UINT32_C(0x000E0057): + return "TAG LATIN CAPITAL LETTER W"; + + case UINT32_C(0x000E0058): + return "TAG LATIN CAPITAL LETTER X"; + + case UINT32_C(0x000E0059): + return "TAG LATIN CAPITAL LETTER Y"; + + case UINT32_C(0x000E005A): + return "TAG LATIN CAPITAL LETTER Z"; + + case UINT32_C(0x000E005B): + return "TAG LEFT SQUARE BRACKET"; + + case UINT32_C(0x000E005C): + return "TAG REVERSE SOLIDUS"; + + case UINT32_C(0x000E005D): + return "TAG RIGHT SQUARE BRACKET"; + + case UINT32_C(0x000E005E): + return "TAG CIRCUMFLEX ACCENT"; + + case UINT32_C(0x000E005F): + return "TAG LOW LINE"; + + case UINT32_C(0x000E0060): + return "TAG GRAVE ACCENT"; + + case UINT32_C(0x000E0061): + return "TAG LATIN SMALL LETTER A"; + + case UINT32_C(0x000E0062): + return "TAG LATIN SMALL LETTER B"; + + case UINT32_C(0x000E0063): + return "TAG LATIN SMALL LETTER C"; + + case UINT32_C(0x000E0064): + return "TAG LATIN SMALL LETTER D"; + + case UINT32_C(0x000E0065): + return "TAG LATIN SMALL LETTER E"; + + case UINT32_C(0x000E0066): + return "TAG LATIN SMALL LETTER F"; + + case UINT32_C(0x000E0067): + return "TAG LATIN SMALL LETTER G"; + + case UINT32_C(0x000E0068): + return "TAG LATIN SMALL LETTER H"; + + case UINT32_C(0x000E0069): + return "TAG LATIN SMALL LETTER I"; + + case UINT32_C(0x000E006A): + return "TAG LATIN SMALL LETTER J"; + + case UINT32_C(0x000E006B): + return "TAG LATIN SMALL LETTER K"; + + case UINT32_C(0x000E006C): + return "TAG LATIN SMALL LETTER L"; + + case UINT32_C(0x000E006D): + return "TAG LATIN SMALL LETTER M"; + + case UINT32_C(0x000E006E): + return "TAG LATIN SMALL LETTER N"; + + case UINT32_C(0x000E006F): + return "TAG LATIN SMALL LETTER O"; + + case UINT32_C(0x000E0070): + return "TAG LATIN SMALL LETTER P"; + + case UINT32_C(0x000E0071): + return "TAG LATIN SMALL LETTER Q"; + + case UINT32_C(0x000E0072): + return "TAG LATIN SMALL LETTER R"; + + case UINT32_C(0x000E0073): + return "TAG LATIN SMALL LETTER S"; + + case UINT32_C(0x000E0074): + return "TAG LATIN SMALL LETTER T"; + + case UINT32_C(0x000E0075): + return "TAG LATIN SMALL LETTER "; + + case UINT32_C(0x000E0076): + return "TAG LATIN SMALL LETTER V"; + + case UINT32_C(0x000E0077): + return "TAG LATIN SMALL LETTER W"; + + case UINT32_C(0x000E0078): + return "TAG LATIN SMALL LETTER X"; + + case UINT32_C(0x000E0079): + return "TAG LATIN SMALL LETTER Y"; + + case UINT32_C(0x000E007A): + return "TAG LATIN SMALL LETTER Z"; + + case UINT32_C(0x000E007B): + return "TAG LEFT CURLY BRACKET"; + + case UINT32_C(0x000E007C): + return "TAG VERTICAL LINE"; + + case UINT32_C(0x000E007D): + return "TAG RIGHT CURLY BRACKET"; + + case UINT32_C(0x000E007E): + return "TAG TILDE"; + + case UINT32_C(0x000E007F): + return "CANCEL TAG"; + } +} diff --git a/u8c/source/format/decode_utf16.c b/u8c/source/format/decode_utf16.c new file mode 100644 index 0000000..b163819 --- /dev/null +++ b/u8c/source/format/decode_utf16.c @@ -0,0 +1,70 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> +#include <u8c/format.h> + +#include <limits.h> +#include <stddef.h> +#include <stdint.h> + +size_t u8c_decode_utf16(uint_least32_t* const restrict buffer, uint_least16_t const* const restrict source, size_t const count) { + ptrdiff_t index_out = 0x0; + + for (ptrdiff_t index_in = 0x0; index_in < (ptrdiff_t)count; ++index_out) { + size_t const remaining = count - 0x1u - index_in; + + uint_least32_t const hextet = (uint_least32_t)source[index_in]; + + uint_least32_t code_point = UINT32_C(0x0); + + if (hextet >= UINT32_C(0xD800) && hextet <= UINT32_C(0xDBFF)) { + if (remaining < 0x1u) { + code_point = UINT32_C(0xFFFD); + } else { + uint_least32_t const hextet1 = (uint_least32_t)source[index_in + 0x1]; + + code_point |= (hextet ^ UINT32_C(0xD800)) << UINT32_C(0xA); + code_point |= hextet1 ^ UINT32_C(0xDC00); + + code_point += UINT32_C(0x10000); + } + + index_in += 0x2; + } else { + if (hextet >= UINT32_C(0xDC00) && hextet <= UINT32_C(0xDFFF)) { + code_point = UINT32_C(0xFFFD); + } else { + code_point |= hextet; + } + + ++index_in; + } + + if (u8c_is_surrogate(code_point)) { code_point = UINT32_C(0xFFFD); } + + buffer[index_out] = code_point; + } + + return (size_t)index_out; +} diff --git a/u8c/source/format/decode_utf16_length.c b/u8c/source/format/decode_utf16_length.c new file mode 100644 index 0000000..bb65ce3 --- /dev/null +++ b/u8c/source/format/decode_utf16_length.c @@ -0,0 +1,43 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/format.h> + +#include <stddef.h> +#include <stdint.h> + +size_t u8c_decode_utf16_length(uint_least16_t const* const restrict source, size_t const count) { + size_t length = 0x0u; + + for (ptrdiff_t index = 0x0; index < (ptrdiff_t)count; ++length) { + uint_least16_t const hextet = source[index]; + + if (hextet >= UINT16_C(0xD800) && hextet < UINT16_C(0xDC00)) { + index += 0x2u; + } else { + ++index; + } + } + + return length; +} diff --git a/u8c/source/format/decode_utf8.c b/u8c/source/format/decode_utf8.c new file mode 100644 index 0000000..999095f --- /dev/null +++ b/u8c/source/format/decode_utf8.c @@ -0,0 +1,118 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> +#include <u8c/format.h> + +#include <stddef.h> +#include <stdint.h> + +size_t u8c_decode_utf8(uint_least32_t* const restrict buffer, char const* const restrict _source, size_t const count) { + char unsigned const* const restrict source = (char unsigned const*)_source; + + ptrdiff_t index_out = 0x0; + + for (ptrdiff_t index_in = 0x0; index_in < (ptrdiff_t)count; ++index_out) { + size_t const remaining = count - 0x1u - index_in; + + uint_least32_t const octet = (uint_least32_t)source[index_in]; + + uint_least32_t code_point = UINT32_C(0x0); + + if ((octet & UINT32_C(0xF8)) == UINT32_C(0xF0)) { + if (remaining < 0x3u) { + code_point = UINT32_C(0xFFFD); + } else { + uint_least32_t const octet1 = (uint_least32_t)source[index_in + 0x1]; + uint_least32_t const octet2 = (uint_least32_t)source[index_in + 0x2]; + uint_least32_t const octet3 = (uint_least32_t)source[index_in + 0x3]; + + if ( + (octet1 & 0xC0) != 0x80 + || (octet2 & 0xC0) != 0x80 + || (octet3 & 0xC0) != 0x80 + ) { + code_point = UINT32_C(0xFFFD); + } else { + code_point |= (octet ^ UINT32_C(0xF0)) << UINT32_C(0x12); + code_point |= (octet1 ^ UINT32_C(0x80)) << UINT32_C(0xC); + code_point |= (octet2 ^ UINT32_C(0x80)) << UINT32_C(0x6); + code_point |= octet3 ^ UINT32_C(0x80); + } + } + + index_in += 0x4; + } else if ((octet & UINT32_C(0xF0)) == UINT32_C(0xE0)) { + if (remaining < 0x2u) { + code_point = UINT32_C(0xFFFD); + } else { + uint_least32_t const octet1 = (uint_least32_t)source[index_in + 0x1]; + uint_least32_t const octet2 = (uint_least32_t)source[index_in + 0x2]; + + if ( + (octet1 & 0xC0) != 0x80 + || (octet2 & 0xC0) != 0x80 + ) { + code_point = UINT32_C(0xFFFD); + } else { + code_point |= (octet ^ UINT32_C(0xE0)) << UINT32_C(0xC); + code_point |= (octet1 ^ UINT32_C(0x80)) << UINT32_C(0x6); + code_point |= octet2 ^ UINT32_C(0x80); + } + } + + index_in += 0x3; + } else if ((octet & UINT32_C(0xE0)) == 0xC0) { + if (remaining < 0x1u) { + code_point = UINT32_C(0xFFFD); + } else { + uint_least32_t const octet1 = (uint_least32_t)source[index_in + 0x1]; + + if ((octet1 & 0xC0) != 0x80) { + code_point = UINT32_C(0xFFFD); + } else { + code_point |= (octet ^ UINT32_C(0xC0)) << UINT32_C(0x6); + code_point |= octet1 ^ UINT32_C(0x80); + } + } + + index_in += 0x2; + } else if ((octet & UINT32_C(0x80)) == UINT32_C(0x0)) { + code_point |= octet; + + ++index_in; + } else { + code_point = UINT32_C(0xFFFD); + + ++index_in; + } + + if (code_point > u8c_MAXIMUM_CODE_POINT) { code_point = UINT32_C(0xFFFD); } + + if (u8c_is_surrogate(code_point)) { code_point = UINT32_C(0xFFFD); } + + buffer[index_out] = code_point; + } + + return (size_t)index_out; +} diff --git a/u8c/source/format/decode_utf8_length.c b/u8c/source/format/decode_utf8_length.c new file mode 100644 index 0000000..09d27a4 --- /dev/null +++ b/u8c/source/format/decode_utf8_length.c @@ -0,0 +1,49 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/format.h> + +#include <stddef.h> +#include <stdint.h> + +size_t u8c_decode_utf8_length(char const* const restrict _source, size_t const count) { + char unsigned const* const restrict source = (char unsigned const*)_source; + + size_t length = 0x0u; + + for (ptrdiff_t index = 0x0; index < (ptrdiff_t)count; ++length) { + char unsigned const octet = source[index]; + + if (octet >= 0xF0u) { + index += 0x4u; + } else if (octet >= 0xE0u) { + index += 0x3u; + } else if (octet >= 0xC0u) { + index += 0x2u; + } else { + ++index; + } + } + + return length; +} diff --git a/u8c/source/format/encode_utf16.c b/u8c/source/format/encode_utf16.c new file mode 100644 index 0000000..9cdd9b3 --- /dev/null +++ b/u8c/source/format/encode_utf16.c @@ -0,0 +1,59 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> +#include <u8c/format.h> + +#include <stddef.h> +#include <stdint.h> + +size_t u8c_encode_utf16(uint_least16_t* const restrict buffer, uint_least32_t const* const restrict source, size_t const count) { + ptrdiff_t index_out = 0x0; + + for (ptrdiff_t index_in = 0x0; index_in < (ptrdiff_t)count; ++index_in) { + uint_least32_t code_point = source[index_in]; + + if (code_point > u8c_MAXIMUM_CODE_POINT) { code_point = UINT32_C(0xFFFD); } + + if (u8c_is_surrogate(code_point)) { code_point = UINT32_C(0xFFFD); } + + if (code_point >= UINT32_C(0x10000)) { + /* Two hextets. */ + + code_point -= UINT32_C(0x10000); + + buffer[index_out + 0x0] = (uint_least16_t)((code_point >> UINT32_C(0xA)) | UINT32_C(0xD800)); + buffer[index_out + 0x1] = (uint_least16_t)((code_point & UINT32_C(0x3FF)) | UINT32_C(0xDC00)); + + index_out += 0x2u; + } else { + /* One hextet. */ + + buffer[index_out] = code_point; + + ++index_out; + } + } + + return (size_t)index_out; +} diff --git a/u8c/source/format/encode_utf16_length.c b/u8c/source/format/encode_utf16_length.c new file mode 100644 index 0000000..1dc6d3d --- /dev/null +++ b/u8c/source/format/encode_utf16_length.c @@ -0,0 +1,48 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> +#include <u8c/format.h> + +#include <stddef.h> +#include <stdint.h> + +size_t u8c_encode_utf16_length(uint_least32_t const* const restrict source, size_t const count) { + size_t length = 0x0u; + + for (ptrdiff_t index = 0x0; index < (ptrdiff_t)count; ++index) { + uint_least32_t code_point = source[index]; + + if (code_point > u8c_MAXIMUM_CODE_POINT) { code_point = UINT32_C(0xFFFD); } + + if (u8c_is_surrogate(code_point)) { code_point = UINT32_C(0xFFFD); } + + if (code_point >= UINT32_C(0x10000)) { + length += 0x2u; + } else { + ++length; + } + } + + return length; +} diff --git a/u8c/source/format/encode_utf8.c b/u8c/source/format/encode_utf8.c new file mode 100644 index 0000000..9bdfb69 --- /dev/null +++ b/u8c/source/format/encode_utf8.c @@ -0,0 +1,76 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> +#include <u8c/format.h> + +#include <stddef.h> +#include <stdint.h> + +size_t u8c_encode_utf8(char* const restrict _buffer, uint_least32_t const* const restrict source, size_t const count) { + char unsigned* const restrict buffer = (char unsigned*)_buffer; + + ptrdiff_t index_out = 0x0; + + for (ptrdiff_t index_in = 0x0; index_in < (ptrdiff_t)count; ++index_in) { + uint_least32_t code_point = source[index_in]; + + if (code_point > u8c_MAXIMUM_CODE_POINT) { code_point = UINT32_C(0xFFFD); } + + if (u8c_is_surrogate(code_point)) { code_point = UINT32_C(0xFFFD); } + + if (code_point >= 0x10000u) { + /* Four octets. */ + + buffer[index_out + 0x0] = (char unsigned)((code_point >> UINT32_C(0x12)) | UINT32_C(0xF0)); + buffer[index_out + 0x1] = (char unsigned)((code_point >> UINT32_C(0xC) & UINT32_C(0x3F)) | UINT32_C(0x80)); + buffer[index_out + 0x2] = (char unsigned)((code_point >> UINT32_C(0x6) & UINT32_C(0x3F)) | UINT32_C(0x80)); + buffer[index_out + 0x3] = (char unsigned)((code_point & UINT32_C(0x3F)) | UINT32_C(0x80)); + + index_out += 0x4u; + } else if (code_point >= 0x800u) { + /* Three octets. */ + + buffer[index_out + 0x0] = (char unsigned)((code_point >> UINT32_C(0xC)) | UINT32_C(0xE0)); + buffer[index_out + 0x1] = (char unsigned)((code_point >> UINT32_C(0x6) & UINT32_C(0x3F)) | UINT32_C(0x80)); + buffer[index_out + 0x2] = (char unsigned)((code_point & UINT32_C(0x3F)) | UINT32_C(0x80)); + + index_out += 0x3u; + } else if (code_point >= 0x80u) { + /* Two octets. */ + + buffer[index_out + 0x0] = (char unsigned)((code_point >> UINT32_C(0x6)) | UINT32_C(0xC0)); + buffer[index_out + 0x1] = (char unsigned)((code_point & UINT32_C(0x3F)) | UINT32_C(0x80)); + + index_out += 0x2u; + } else { + /* One octet. */ + + buffer[index_out] = (char unsigned)code_point; + + ++index_out; + } + } + + return (size_t)index_out; +} diff --git a/u8c/source/format/encode_utf8_length.c b/u8c/source/format/encode_utf8_length.c new file mode 100644 index 0000000..c83f6e1 --- /dev/null +++ b/u8c/source/format/encode_utf8_length.c @@ -0,0 +1,56 @@ +/* + Copyright 2021, 2023 Gabriel Bjørnager Jensen. + + This file is part of u8c. + + u8c is free software: you can redistribute it + and/or modify it under the terms of the GNU + Lesser General Public License as published by + the Free Software Foundation, either version 3 of + the License, or (at your option) any later + version. + + u8c is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU + Lesser General Public License along with u8c. If + not, see <https://www.gnu.org/licenses/>. +*/ + +#include <u8c/character.h> +#include <u8c/format.h> + +#include <stddef.h> +#include <stdint.h> + +size_t u8c_encode_utf8_length(uint_least32_t const* const restrict source, size_t const count) { + size_t length = 0x0u; + + for (ptrdiff_t index = 0x0; index < (ptrdiff_t)count; ++index) { + uint_least32_t code_point = source[index]; + + // We need to check if the input is valid, as the + // its invalidity is replaced with the replacement + // character, which may have a different length. + + if (code_point > u8c_MAXIMUM_CODE_POINT) { code_point = UINT32_C(0xFFFD); } + + if (u8c_is_surrogate(code_point)) { code_point = UINT32_C(0xFFFD); } + + if (code_point >= UINT32_C(0x10000)) { + length += 0x4u; + } else if (code_point >= UINT32_C(0x800)) { + length += 0x3u; + } else if (code_point >= UINT32_C(0x80)) { + length += 0x2u; + } else { + ++length; + } + } + + return length; +} diff --git a/u8c/src/operator.cc b/u8c/src/operator.cc deleted file mode 100644 index 3ea8eae..0000000 --- a/u8c/src/operator.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#include <algorithm> /* std::copy */ -#include <cstdint> /* u8c_ubytec */ -#include <ostream> /* std::ostream */ -#include <u8c/str> - -auto u8c::operator << (std::ostream & _strm,u8c::str const & _str) -> std::ostream & { - auto const u8 = _str.u8().app(u8'\u0000'); - return _strm << reinterpret_cast<unsigned char *>(u8.begin()); -} diff --git a/u8c/src/u8c/fmt.cc b/u8c/src/u8c/fmt.cc deleted file mode 100644 index 654fb98..0000000 --- a/u8c/src/u8c/fmt.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#include <cstddef> /* std::nullptr_t */ -#include <string> /* std::u32string, std::u8string */ -#include <u8c/u8c> -#include <u8c/str> - -template<> auto u8c::fmt(char32_t const _chr) -> u8c::str { - return _chr; -} -template<> auto u8c::fmt(std::nullptr_t) -> u8c::str { - return U"nullptr"; -} -template<> auto u8c::fmt(u8c::str const _str) -> u8c::str { - return _str; -} -template<> auto u8c::fmt(void * _ptr) -> u8c::str { - if(_ptr == nullptr) [[unlikely]] { - return U"nullptr"; - } - return U"PTR"; -} diff --git a/u8c/src/u8c/print.cc b/u8c/src/u8c/print.cc deleted file mode 100644 index bb08cb4..0000000 --- a/u8c/src/u8c/print.cc +++ /dev/null @@ -1,25 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#include <ostream> /* std::ostream */ - -#if 0x0 -auto u8c::print([[maybe_unused]] std::ostream & _strm,[[maybe_unused]] u8c::str _msg) -> void { -} -#endif diff --git a/u8c/src/u8c/println.cc b/u8c/src/u8c/println.cc deleted file mode 100644 index f607ad7..0000000 --- a/u8c/src/u8c/println.cc +++ /dev/null @@ -1,25 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the - terms of the GNU Affero General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your - option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public - License for more details. - - You should have received a copy of the GNU Affero General Public License - along with u8c. If not, see <https://www.gnu.org/licenses/>. -*/ - -#include <ostream> /* std::ostream */ - -#if 0x0 -auto u8c::println([[maybe_unused]] std::ostream & _strm,[[maybe_unused]] u8c::str _msg) -> void { -} -#endif
\ No newline at end of file |