diff options
Diffstat (limited to 'u8c')
45 files changed, 6699 insertions, 0 deletions
diff --git a/u8c/include/u8c/arr b/u8c/include/u8c/arr new file mode 100644 index 0000000..81372f6 --- /dev/null +++ b/u8c/include/u8c/arr @@ -0,0 +1,62 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_RMLtbYyYWBICBIbq) +#define u8c_key_RMLtbYyYWBICBIbq + +#include <u8c/misc> + +namespace u8c { + template<typename T> class arr { + public: + constexpr auto alloc( u8c::size num) -> void; + constexpr auto app( u8c::arr<T> const & oth) -> u8c::arr<T> const &; + [[nodiscard]] constexpr arr() noexcept = default; + [[nodiscard]] constexpr arr( T const * begin, T const * end); + [[nodiscard]] constexpr arr( u8c::size num); + [[nodiscard]] constexpr arr( u8c::arr<T> const & oth); + template<u8c::size N> [[nodiscard]] constexpr arr( T const (& arr)[N]) noexcept; + template<typename T0> requires std::convertible_to<T0,T> [[nodiscard]] constexpr arr( T0 val); + template<typename T0> requires std::convertible_to<T0,T> [[nodiscard]] constexpr arr( u8c::size num, T0 val); + [[nodiscard]] constexpr auto begin() const noexcept -> T *; + [[nodiscard]] constexpr auto end() const noexcept -> T *; + template<typename T0> requires std::convertible_to<T0,T> constexpr auto fill( T0 val) -> void; + template<typename T0> requires std::convertible_to<T0,T> constexpr auto fill( T * begin, T * end,T0 val) -> void; + [[nodiscard]] constexpr auto isstatic() const noexcept -> bool; + constexpr auto operator = ( u8c::arr<T> const & oth) -> u8c::arr<T> const &; + [[nodiscard]] constexpr auto operator [] (u8c::size pos) const noexcept -> T &; + constexpr auto realloc( u8c::size num) -> void; + constexpr auto set( T const * begin, T const * end) -> void; + constexpr auto set( u8c::arr<T> const & oth) -> void; + template<u8c::size N> constexpr auto set( T const (& arr)[N]) noexcept -> void; + template<typename T0> requires std::convertible_to<T0,T> constexpr auto set( T0 val) -> void; + [[nodiscard]] constexpr auto sub( T const * begin, T const * end) const -> u8c::arr<T>; + [[nodiscard]] constexpr auto sz() const noexcept -> u8c::size; + constexpr ~arr() noexcept; + constexpr static auto npos = -0x1uz; + private: + bool _isstatic = false; + T * _ptr = nullptr; + u8c::size _sz = 0x0uz; + }; +} + +#include <u8c/arr.d/arr> + +#endif diff --git a/u8c/include/u8c/arr.d/arr b/u8c/include/u8c/arr.d/arr new file mode 100644 index 0000000..a0af8b7 --- /dev/null +++ b/u8c/include/u8c/arr.d/arr @@ -0,0 +1,159 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_zQ92KNf0pxkz48g2) +#define u8c_key_zQ92KNf0pxkz48g2 + +#include <algorithm> /* std::copy, std::fill */ +#include <cstdlib> /* std::abort */ +#include <iostream> /* std::cerr, std::endl */ +#include <stdexcept> /* std::invalid_argument, std::out_of_range */ +#include <type_traits> /* std::is_constant_evaluated */ + +template<typename T> constexpr auto u8c::arr<T>::alloc(u8c::size const _num) -> void { + if (this->isstatic()) [[unlikely]] { + this->_isstatic = false; + } + else { + ::delete[] this->_ptr; + } + this->_ptr = ::new T[_num]; + this->_sz = _num; +} +template<typename T> constexpr auto u8c::arr<T>::app(u8c::arr<T> const & _oth) -> u8c::arr<T> const & { + this->realloc(this->sz() + _oth.sz()); + std::copy(_oth.begin(),_oth.end(),this->begin() + this->sz() - _oth.sz()); + return *this; +} +template<typename T> constexpr u8c::arr<T>::arr(T const * const _begin,T const * const _end) { + this->set(_begin,_end); +} +template<typename T> constexpr u8c::arr<T>::arr(u8c::size const _num) { + this->alloc(_num); +} +template<typename T> constexpr u8c::arr<T>::arr(u8c::arr<T> const & _oth) { + this->set(_oth); +} +template<typename T> template<u8c::size N> constexpr u8c::arr<T>::arr(T const (&_arr)[N]) noexcept { + this->set(_arr); +} +template<typename T> template<typename T0> requires std::convertible_to<T0,T> constexpr u8c::arr<T>::arr(T0 const _val) { + this->set(_val); +} +template<typename T> template<typename T0> requires std::convertible_to<T0,T> constexpr u8c::arr<T>::arr(u8c::size const _num,T0 const _val) { + this->alloc(_num); + this->fill(this->begin(),this->end(),_val); +} +template<typename T> constexpr auto u8c::arr<T>::begin() const noexcept -> T * { + return this->_ptr; +} +template<typename T> constexpr auto u8c::arr<T>::end() const noexcept -> T * { + return this->begin() + this->_sz; +} +template<typename T> template<typename T0> requires std::convertible_to<T0,T> constexpr auto u8c::arr<T>::fill(T0 const _val) -> void { + this->fill(this->begin(),this->end(),_val); +} +template<typename T> template<typename T0> requires std::convertible_to<T0,T> constexpr auto u8c::arr<T>::fill(T * const u8c_restr _begin,T * const u8c_restr _end,T0 const _val) -> void { + if (this->sz() == 0x0uz) [[unlikely]] { + return; /* slime incident */ + } + if (_begin < this->begin() || _end > this->end()) [[unlikely]] { + throw std::out_of_range("Beginning or end are out of this array's range."); + } + if (this->isstatic()) [[unlikely]] { + this->alloc(static_cast<u8c::size>(_end - _begin + 0x1uz)); + } + std::fill(this->begin(),this->end(),static_cast<T>(_val)); +} +template<typename T> constexpr auto u8c::arr<T>::isstatic() const noexcept -> bool { + return this->_isstatic; +} +template<typename T> constexpr auto u8c::arr<T>::operator = (u8c::arr<T> const & _oth) -> u8c::arr<T> const & { + this->set(_oth); + return *this; +} +template<typename T> constexpr auto u8c::arr<T>::operator [] (u8c::size const _pos) const noexcept -> T & { + if constexpr (u8c::dbg) { + if (_pos > this->sz()) [[unlikely]] { + //std::cerr << "u8c :: " << std::source_location::current().function_name() << " :: Input parameter is out of range." << std::endl; + std::cerr << "u8c :: " << __func__ << " :: Input parameter is out of range." << std::endl; + std::abort(); + } + } + return this->begin()[_pos]; +} +template<typename T> constexpr auto u8c::arr<T>::realloc(u8c::size const _num) -> void { + if (this->sz() == 0x0uz) [[unlikely]] { + return this->alloc(_num); + } + if (this->isstatic()) [[unlikely]] { + this->_isstatic = false; + } + this->_ptr = u8c::renew(this->begin(),this->sz(),_num); + this->_sz = _num; +} +template<typename T> constexpr auto u8c::arr<T>::set(T const * const u8c_restr _begin,T const * const u8c_restr _end) -> void { + if constexpr (u8c::dbg) { + if (_begin == nullptr || _end == nullptr) [[unlikely]] { + throw std::invalid_argument("Provided parameter has value of nullptr."); + } + } + this->alloc(static_cast<u8c::size>(_end - _begin + 0x1uz)); + std::copy(_begin,_end,this->begin()); +} +template<typename T> constexpr auto u8c::arr<T>::set(u8c::arr<T> const & _oth) -> void { + this->set(_oth.begin(),_oth.end()); +} +template<typename T> template<u8c::size N> constexpr auto u8c::arr<T>::set(T const (&_arr)[N]) noexcept -> void { + this->~arr(); + this->_isstatic = true; + this->_ptr = _arr; + this->_sz = N; +} +template<typename T> template<typename T0> requires std::convertible_to<T0,T> constexpr auto u8c::arr<T>::set(T0 const _val) -> void { + this->alloc(0x1uz); + *this->begin() = _val; +} +template<typename T> constexpr auto u8c::arr<T>::sub(T const * const u8c_restr _begin,T const * const u8c_restr _end) const -> u8c::arr<T> { + if (_begin < this->begin() || _end > this->end()) [[unlikely]] { + throw std::out_of_range("Beginning or end are out of this array's range."); + } + u8c::size const sz = static_cast<u8c::size>(_end - _begin) + 0x1uz; + u8c::arr<T> arr; + if (this->isstatic()) [[unlikely]] { + arr._sz = sz; + arr._ptr = _begin; + } + else { + arr.alloc(sz); + std::copy(_begin,_end,arr.begin()); + } + return arr; +} +template<typename T> constexpr auto u8c::arr<T>::sz() const noexcept -> u8c::size { + return this->_sz; +} +template<typename T> constexpr u8c::arr<T>::~arr<T>() noexcept { + if (this->isstatic()) { + return; + } + ::delete[] this->_ptr; +} + +#endif diff --git a/u8c/include/u8c/cstr b/u8c/include/u8c/cstr new file mode 100644 index 0000000..f8f5184 --- /dev/null +++ b/u8c/include/u8c/cstr @@ -0,0 +1,37 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_gM1GPEGwZN8BgcwU) +#define u8c_key_gM1GPEGwZN8BgcwU + +#include <u8c/misc> + +namespace u8c { + constexpr auto cstrcmp(char const * lstr,char const * rstr) noexcept -> u8c::byte; + constexpr auto cstrcpy(char * dest,char const * src) noexcept -> char *; + constexpr auto cstrdup(char const * str) -> char *; + constexpr auto cstrlen(char const * str) noexcept -> u8c::size; +} + +#include <u8c/cstr.d/cstrcmp> +#include <u8c/cstr.d/cstrcpy> +#include <u8c/cstr.d/cstrdup> +#include <u8c/cstr.d/cstrlen> + +#endif diff --git a/u8c/include/u8c/cstr.d/cstrcmp b/u8c/include/u8c/cstr.d/cstrcmp new file mode 100644 index 0000000..9615ad2 --- /dev/null +++ b/u8c/include/u8c/cstr.d/cstrcmp @@ -0,0 +1,43 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_hQ3g8CRKOedpjvM7) +#define u8c_key_hQ3g8CRKOedpjvM7 + + +#include <algorithm> /* std::min */ + +constexpr auto u8c::cstrcmp(char const * const u8c_restr _lstr,char const * const u8c_restr _rstr) noexcept -> u8c::byte { + auto const maxn = std::min(u8c::cstrlen(_lstr),u8c::cstrlen(_rstr)); + for (auto n = 0x0uz;n <= maxn;n += 0x1uz) { + auto const lchr = _lstr[n]; + auto const rchr = _rstr[n]; + if (lchr != rchr) [[unlikely]] { + if (lchr > rchr) { + return u8c_bytec(-0x1); + } + if (lchr < rchr) { + return u8c_bytec(0x1); + } + } + } + return u8c_bytec(0x0); +} + +#endif diff --git a/u8c/include/u8c/cstr.d/cstrcpy b/u8c/include/u8c/cstr.d/cstrcpy new file mode 100644 index 0000000..37cad96 --- /dev/null +++ b/u8c/include/u8c/cstr.d/cstrcpy @@ -0,0 +1,34 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_MvAfxuZelp52mHk5) +#define u8c_key_MvAfxuZelp52mHk5 + +#include <algorithm> /* std::copy */ + +constexpr auto u8c::cstrcpy(char * const u8c_restr _dest,char const * const u8c_restr _src) noexcept -> char * { + auto const sz = u8c::cstrlen(_src); + if (sz == 0x0uz) [[unlikely]] { + return _dest; + } + std::copy(_src,_src + sz - 0x1uz,_dest); + return _dest; +} + +#endif diff --git a/u8c/include/u8c/cstr.d/cstrdup b/u8c/include/u8c/cstr.d/cstrdup new file mode 100644 index 0000000..d37f03b --- /dev/null +++ b/u8c/include/u8c/cstr.d/cstrdup @@ -0,0 +1,29 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_vf9vDNmIndanmgPg) +#define u8c_key_vf9vDNmIndanmgPg + +constexpr auto u8c::cstrdup(char const * const u8c_restr _str1) -> char * { + auto const sz = u8c::cstrlen(_str1); + auto * const u8c_restr str = ::new char[sz]; + return u8c::cstrcpy(str,_str1);; +} + +#endif diff --git a/u8c/include/u8c/cstr.d/cstrlen b/u8c/include/u8c/cstr.d/cstrlen new file mode 100644 index 0000000..2f7cb5a --- /dev/null +++ b/u8c/include/u8c/cstr.d/cstrlen @@ -0,0 +1,34 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_2yXSG12yvwzu2XCt) +#define u8c_key_2yXSG12yvwzu2XCt + +constexpr auto u8c::cstrlen(char const * const u8c_restr _str) noexcept -> u8c::size { + auto sz = 0x0uz; + for (u8c::size n = 0x0uz;;n += 0x1uz) { + if (_str[n] == '\u0000') [[unlikely]] { + break; + } + sz += 0x1uz; + } + return sz; +} + +#endif diff --git a/u8c/include/u8c/impl b/u8c/include/u8c/impl new file mode 100644 index 0000000..af6a2c4 --- /dev/null +++ b/u8c/include/u8c/impl @@ -0,0 +1,33 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_9y1ZpGLV5Chmuy9U) +#define u8c_key_9y1ZpGLV5Chmuy9U + +#include <u8c/cstr> + +namespace u8c { + [[nodiscard]] consteval auto isarch(char const * arch) noexcept -> bool; + [[nodiscard]] consteval auto isos( char const * os) noexcept -> bool; +} + +#include <u8c/impl.d/isarch> +#include <u8c/impl.d/isos> + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/impl.d/isarch b/u8c/include/u8c/impl.d/isarch new file mode 100644 index 0000000..33b0faa --- /dev/null +++ b/u8c/include/u8c/impl.d/isarch @@ -0,0 +1,90 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_HeaDXGqHoIcCdWWR) +#define u8c_key_HeaDXGqHoIcCdWWR + +consteval auto u8c::isarch([[maybe_unused]] char const * const u8c_restr _arch) noexcept -> bool { + if (!u8c::cstrcmp(_arch,"alpha")) { +#if defined(__alpha) || defined(__alpha__) || defined(_M_ALPHA) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"arm")) { +#if defined(__TARGET_ARCH_ARM) || defined(__TARGET_ARCH_THUMB) || defined(__arm) || defined(__arm__) || defined(__thumb__) || defined(_M_ARM) || defined(_M_ARMT) + return true; +#else + return false; + } +#endif + if (!u8c::cstrcmp(_arch,"itanium")) { +#if defined(__IA64__) || defined(__ia64__) || defined(__itanium__) || defined(_IA64) || defined(_M_IA64) + return true; +#else + return false; + } +#endif + if (!u8c::cstrcmp(_arch,"m68k")) { +#if defined(__MC68K__) || defined(__m68k__) || defined(M68000) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"mips")) { +#if defined(__MIPS__) || defined(__mips) || defined(__mips__) || defined(mips) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"pa-risc")) { +#if defined(__HPPA__) || defined(__hppa) || defined(__hppa__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"ppc")) { +#if defined(__POWERPC__) || defined(__ppc) || defined(__PPC__) || defined(__powerpc) || defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(_M_PPC) || defined(_XENON) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"sparc")) { +#if defined(__sparc) || defined(__sparc__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_arch,"x86")) { +#if defined(__386) || defined(__I86__) || defined(__IA32__) || defined(__INTEL__) || defined(__THW_INTEL__) || defined(__X86__) || defined(__i386) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_I86) || defined(_M_IX86) || defined(_X86_) ||defined(i386) + return true; +#else + return false; +#endif + } + return false; +} + +#endif diff --git a/u8c/include/u8c/impl.d/isos b/u8c/include/u8c/impl.d/isos new file mode 100644 index 0000000..06a6934 --- /dev/null +++ b/u8c/include/u8c/impl.d/isos @@ -0,0 +1,258 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_ACd4FIGZ23h2QNrU) +#define u8c_key_ACd4FIGZ23h2QNrU + +#if __has_include(<sys/param.h>) +#include <sys/param.h> /* BSD */ +#endif +#if __has_include(<unistd.h>) +#include <unistd.h> /* _POSIX_VERSION */ +#endif + +consteval auto u8c::isos(char const * const u8c_restr _os) noexcept -> bool { + if (!u8c::cstrcmp(_os,"aix")) { +#if defined(__TOS_AIX__) || defined(_AIX) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"amigaos")) { +#if defined(__amigaos__) || defined(AMIGA) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"android")) { +#if defined(__ANDROID__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"beos")) { +#if defined(__BEOS__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"bluegene")) { +#if defined(__THW_BLUEGENE__) || defined(__TOS_BGQ__) || defined(__bg__) || defined(__bgq__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"bsd")) { +#if defined(_SYSTYPE_BSD) || defined(BSD) + return true; +#else + return u8c::isos("bsdos") || u8c::isos("dragonflybsd") || u8c::isos("freebsd") || u8c::isos("netbsd") || u8c::isos("openbsd"); +#endif + } + if (!u8c::cstrcmp(_os,"bsdos")) { +#if defined(__bsdi__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"dragonflybsd")) { +#if defined(__DragonFly__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"freebsd")) { +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"hpux")) { +#if defined(__hpux) || defined(_hpux) || defined(hpux) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"hurd")) { +#if defined(__GNU__) || defined(__gnu_hurd__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"ibmi")) { +#if defined(__OS400__) || defined(__OS400_TGTVRM__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"integrity")) { +#if defined(__INTEGRITY) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"interix")) { +#if defined(__INTERIX) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"irix")) { +#if defined(__sgi) || defined(sgi) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"linux")) { +#if defined(__linux) || defined(__linux__) || defined(linux) || defined(u8c_os_android) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"mac")) { +#if (defined(__APPLE__) && defined(__MACH__)) || defined(Macintosh) || defined(macintosh) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"minix")) { +#if defined(__minix) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"msdos")) { +#if defined(__DOS__) || defined(__MSDOS__) || defined(_MSDOS) || defined(MSDOS) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"netbsd")) { +#if defined(__NetBSD__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"nonstop")) { +#if defined(__TANDEM) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"openbsd")) { +#if defined(__OpenBSD__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"os2")) { +#if defined(__OS2__) || defined(__TOS_OS2__) || defined(_OS2) || defined(OS2) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"palmos")) { +#if defined(__palmos__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"qnx")) { +#if defined(__QNX__) || defined(__QNXNTO__) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"solaris")) { +#if (defined(__SVR4) || defined(__svr4__) || defined(__sysv__) || defined(_SYSTYPE_SVR4)) && (defined(__sun) || defined(sun)) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"sun")) { +#if defined(__sun) || defined(sun) + return !u8c::isos("solaris"); +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"unicos")) { +#if defined(_UNICOS) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"unix")) { +#if defined(__unix) || defined(__unix__) || defined(_POSIX_VERSION) || defined(u8c_os_aix) || defined(u8c_os_android)|| defined(u8c_os_bsd) || defined(u8c_os_hpux) || defined(u8c_os_hurd) || defined(u8c_os_linux) || defined(u8c_os_mac) || defined(u8c_os_minix) || defined(u8c_os_solaris) || defined(u8c_os_sun) || defined(u8c_os_unicos) || defined(u8c_os_unixware) || defined(unix) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"unixware")) { +#if defined(sco) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"win")) { +#if defined(__TOS_WIN__) || defined(__WIN32__) || defined(__WINDOWS__) || defined(_WIN16) || defined(_WIN32) || defined(_WIN32_CE) || defined(_WIN64) + return true; +#else + return false; +#endif + } + if (!u8c::cstrcmp(_os,"zos")) { +#if defined(__HOS_MVS__) || defined(__MVS__) || defined(__TOS_MVS__) + return true; +#else + return false; +#endif + } + return false; +} + +#endif diff --git a/u8c/include/u8c/math b/u8c/include/u8c/math new file mode 100644 index 0000000..2b12906 --- /dev/null +++ b/u8c/include/u8c/math @@ -0,0 +1,80 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_vm8mXaTP2bcUVL49) +#define u8c_key_vm8mXaTP2bcUVL49 + +#include <compare> /* std::partial_ordering */ +#include <concepts> /* std::convertible_to, std::floating_point, std::integral, std::signed_integral */ +#include <type_traits> /* std::is_arithmetic_v, std::is_same_v */ +#include <u8c/impl> + +namespace u8c { + template<std::signed_integral T = int> class quota { + public: + [[nodiscard]] constexpr static auto inf() noexcept -> u8c::quota<T>; + [[nodiscard]] constexpr auto isinf() const noexcept -> bool; + [[nodiscard]] constexpr auto isnan() const noexcept -> bool; + [[nodiscard]] constexpr auto lower() const noexcept -> T; + [[nodiscard]] constexpr static auto nan() noexcept -> u8c::quota<T>; + [[nodiscard]] constexpr auto upper() const noexcept -> T; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator + (u8c::quota<T0> const & oth) const noexcept -> u8c::quota<T>; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator - (u8c::quota<T0> const & oth) const noexcept -> u8c::quota<T>; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator <=> (u8c::quota<T0> const & oth) const noexcept -> std::partial_ordering; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator = ( u8c::quota<T0> const & oth) const noexcept -> bool; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr auto operator == ( u8c::quota<T0> const & oth) const noexcept -> bool; + template<std::integral T0> [[nodiscard]] constexpr operator T0 () const noexcept; + [[nodiscard]] constexpr quota() noexcept = default; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr quota( T0 val) noexcept; + template<std::convertible_to<T> T0> [[nodiscard]] constexpr quota( u8c::quota<T0> const & oth) noexcept; + template<std::signed_integral T0> [[nodiscard]] constexpr quota( T0 upper,T0 lower) noexcept; + constexpr ~quota() noexcept = default; + private: + u8c::ubyte _flags = false; + T _lower = T{0x0}; + T _upper = T{0x0}; + + }; + template<typename T,typename T0 = void> concept arith = std::is_arithmetic_v<T> || std::is_same_v<T,u8c::quota<T0>>; + template<typename T> [[u8c_attr_const]] constexpr auto abs( u8c::quota<T> val) noexcept -> u8c::quota<T>; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto abs( T val) noexcept -> T; + template<typename T> [[u8c_attr_const]] constexpr auto fma( u8c::quota<T> x, u8c::quota<T> y, u8c::quota<T> z) noexcept -> u8c::quota<T>; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto fma( T x, T y, T z) noexcept -> T; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto isinf( T val) noexcept -> bool; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto isnan( T val) noexcept -> bool; + template<typename T> [[u8c_attr_const]] constexpr auto isprime(u8c::quota<T> val) noexcept -> bool; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto isprime(T val) noexcept -> bool; + template<typename T> [[u8c_attr_const]] constexpr auto pow( u8c::quota<T> base,u8c::quota<T> exp) noexcept -> u8c::quota<T>; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto pow( T base,T exp) noexcept -> T; + template<typename T> [[u8c_attr_const]] constexpr auto sqrt( u8c::quota<T> val) noexcept -> u8c::quota<T>; + template<u8c::arith T> [[u8c_attr_const]] constexpr auto sqrt( T val) noexcept -> T; + template<std::integral T,std::integral T0> [[u8c_attr_const]] constexpr auto trunc( T val, T0) noexcept -> T0; +} + +#include <u8c/math.d/abs> +#include <u8c/math.d/fma> +#include <u8c/math.d/isinf> +#include <u8c/math.d/isnan> +#include <u8c/math.d/isprime> +#include <u8c/math.d/pow> +#include <u8c/math.d/quota> +#include <u8c/math.d/sqrt> +#include <u8c/math.d/trunc> + +#endif diff --git a/u8c/include/u8c/math.d/abs b/u8c/include/u8c/math.d/abs new file mode 100644 index 0000000..53eaba3 --- /dev/null +++ b/u8c/include/u8c/math.d/abs @@ -0,0 +1,37 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_DHlzI0Min44ZJ3qF) +#define u8c_key_DHlzI0Min44ZJ3qF + +#include <type_traits> /* std::is_unsigned_v */ + +template<u8c::arith T> constexpr auto u8c::abs(T const _val) noexcept -> T { + if constexpr (std::is_unsigned_v<T>) { + return _val; + } + else { + if (_val < T{0x0}) { + return -_val; + } + return _val; + } +} + +#endif diff --git a/u8c/include/u8c/math.d/fma b/u8c/include/u8c/math.d/fma new file mode 100644 index 0000000..e2b756d --- /dev/null +++ b/u8c/include/u8c/math.d/fma @@ -0,0 +1,27 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_3Xt4uPu34bdh53dB) +#define u8c_key_3Xt4uPu34bdh53dB + +template<u8c::arith T> constexpr auto u8c::fma(T const _x,T const _y,T const _z) noexcept -> T { + return _x * _y + _z; +} + +#endif diff --git a/u8c/include/u8c/math.d/isinf b/u8c/include/u8c/math.d/isinf new file mode 100644 index 0000000..e77793a --- /dev/null +++ b/u8c/include/u8c/math.d/isinf @@ -0,0 +1,32 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_aSzgnLkMAeJF6xFF) +#define u8c_key_aSzgnLkMAeJF6xFF + +#include <limits> /* std::numeric_limits */ + +template<u8c::arith T> constexpr auto u8c::isinf(T const _val) noexcept -> bool { + if (std::numeric_limits<T>::has_infinity) { + return _val == std::numeric_limits<T>::infinity(); + } + return false; +} + +#endif diff --git a/u8c/include/u8c/math.d/isnan b/u8c/include/u8c/math.d/isnan new file mode 100644 index 0000000..9e90d12 --- /dev/null +++ b/u8c/include/u8c/math.d/isnan @@ -0,0 +1,62 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_0RXxS4xdwMkbkEi6) +#define u8c_key_0RXxS4xdwMkbkEi6 + +#include <limits> /* std::numeric_limits */ + +template<u8c::arith T> constexpr auto u8c::isnan(T const _val) noexcept -> bool { + if constexpr (std::numeric_limits<T>::has_quiet_NaN) { + if constexpr (std::numeric_limits<T>::is_iec559) { + return _val != _val; + } + else { +#if defined(__cpp_if_consteval) + if consteval { + return _val != _val; + } + else { + u8c::ubyte * const u8c_restr nanval = nullptr; + u8c::ubyte * const u8c_restr valval = nullptr; + { + auto const tmp = std::numeric_limits<T>::quiet_NaN(); + nanval = reinterpret_cast<unsigned char *>(&tmp); + } + { + auto const tmp = _val; + valval = reinterpret_cast<unsigned char *>(&tmp); + } + for (std::size_t n = 0x0uz;n < sizeof(T);n += 0x1uz) { + if (valval[n] != nanval[n]) { + return false; + } + } + } +#else + return _val != _val; +#endif + } + } + else { + return false; + } +} + +#endif diff --git a/u8c/include/u8c/math.d/isprime b/u8c/include/u8c/math.d/isprime new file mode 100644 index 0000000..5074d4b --- /dev/null +++ b/u8c/include/u8c/math.d/isprime @@ -0,0 +1,35 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_rrAoPS9LdRWHCbsB) +#define u8c_key_rrAoPS9LdRWHCbsB + +template<u8c::arith T> constexpr auto u8c::isprime(T const _val) noexcept -> bool { + if (_val <= T{0x1}) [[unlikely]] { + return false; + } + for (T iter = T{0x2};iter < _val / T{0x2} + T{0x1};iter += T{0x1}) { + if (_val % iter == T{0x0}) [[unlikely]] { + return false; + } + } + return true; +} + +#endif diff --git a/u8c/include/u8c/math.d/pow b/u8c/include/u8c/math.d/pow new file mode 100644 index 0000000..53410b8 --- /dev/null +++ b/u8c/include/u8c/math.d/pow @@ -0,0 +1,40 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_JD8l4B23bwAqQ2HP) +#define u8c_key_JD8l4B23bwAqQ2HP + +#include <type_traits> /* std::is_floating_point_v */ + +template<u8c::arith T> constexpr auto u8c::pow(T const _base,T const _exp) noexcept -> T { + //if constexpr (std::is_floating_point_v<T>) { + //} + //else { + if (u8c::abs(_base) <= T{0x1}) [[unlikely]] { + return _base; + } + T res = _base; + for (T iter = T{0x1};iter < _exp;iter += T{0x1}) { + res *= _base; + } + return res; + //} +} + +#endif diff --git a/u8c/include/u8c/math.d/quota b/u8c/include/u8c/math.d/quota new file mode 100644 index 0000000..3ea9350 --- /dev/null +++ b/u8c/include/u8c/math.d/quota @@ -0,0 +1,94 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_k92caE4RpzrErnKD) +#define u8c_key_k92caE4RpzrErnKD + +#include <compare> /* std::partial_ordering */ +#include <concepts> /* std:convertible_to, std::integral, std::signed_integral */ + +template<std::signed_integral T> constexpr auto u8c::quota<T>::inf() noexcept -> u8c::quota<T> { + auto tmp = u8c::quota<T>(); + tmp._flags = u8c_ubytec(0b10); + return tmp; +} +template<std::signed_integral T> constexpr auto u8c::quota<T>::isinf() const noexcept -> bool { + return (this->_flags & u8c_ubytec(0b10)) == u8c_ubytec(0b10); +} +template<std::signed_integral T> constexpr auto u8c::quota<T>::isnan() const noexcept -> bool { + return (this->_flags & u8c_ubytec(0b1)) == u8c_ubytec(0b1); +} +template<std::signed_integral T> constexpr auto u8c::quota<T>::lower() const noexcept -> T { + return this->_lower; +} +template<std::signed_integral T> constexpr auto u8c::quota<T>::nan() noexcept -> u8c::quota<T> { + auto tmp = u8c::quota<T>(); + tmp._flags = u8c_ubytec(0b1); + return tmp; +} +template<std::signed_integral T> constexpr auto u8c::quota<T>::upper() const noexcept -> T { + return this->_upper; +} +template<std::signed_integral T> template<std::convertible_to<T> T0> constexpr auto u8c::quota<T>::operator <=> (u8c::quota<T0> const & _oth) const noexcept -> std::partial_ordering { + if (this->isinf()) { + return std::partial_ordering::greater; + } + if (_oth.isinf()) { + return std::partial_ordering::less; + } + if (this->isnan() || _oth.isnan()) [[unlikely]] { + return std::partial_ordering::unordered; + } + auto const tmp0 = this->upper() * _oth.lower(); + auto const tmp1 = _oth.upper() * this->lower(); + return tmp0 <=> tmp1; +} +template<std::signed_integral T> template<std::convertible_to<T> T0> constexpr auto u8c::quota<T>::operator == (u8c::quota<T0> const & _oth) const noexcept -> bool { + if (this->isinf() && _oth.isinf()) [[unlikely]] { + return true; + } + if (this->isnan() || _oth.isnan()) [[unlikely]] { + return false; + } + if (this->_flags != _oth._flags) [[unlikely]] { + return false; + } + return this->upper() * _oth.lower() == _oth.upper() * this->lower(); +} +template<std::signed_integral T> template<std::integral T0> constexpr u8c::quota<T>::operator T0 () const noexcept { + return u8c::trunc<T0>(this->_upper / this->_lower); +} +template<std::signed_integral T> template<std::convertible_to<T> T0> constexpr u8c::quota<T>::quota(T0 const _val) noexcept { + if (u8c::isnan(_val)) [[unlikely]] { + this->_flags |= u8c_ubytec(0b1); + } + else if (u8c::isinf(_val)) [[unlikely]] { + this->_flags |= u8c_ubytec(0b10); + } + else { + this->_upper = T{_val}; + this->_lower = T{0x1}; + } +} +template<std::signed_integral T> template<std::signed_integral T0> constexpr u8c::quota<T>::quota(T0 const _upper,T0 const _lower) noexcept { + this->_upper = _upper; + this->_lower = _lower; +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/math.d/sqrt b/u8c/include/u8c/math.d/sqrt new file mode 100644 index 0000000..d1eb5e1 --- /dev/null +++ b/u8c/include/u8c/math.d/sqrt @@ -0,0 +1,42 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_GPVreQYtljZ3JsPu) +#define u8c_key_GPVreQYtljZ3JsPu + +#include <limits> /* std::numeric_limits */ +#include <type_traits> /* std::is_integral_v */ + +template<u8c::arith T> constexpr auto u8c::sqrt(T const _val) noexcept -> T { + if (_val < T{0x0}) [[unlikely]] { + return std::numeric_limits<T>::quiet_NaN(); + } + if (_val == T{0x0}) [[unlikely]] { + return _val; + } + /*constexpr auto err = []() { + if constexpr(std::is_integral_v<T>) { + return T{0x1}; + } + return std::numeric_limits<T>::epsilon(); + }();*/ + return _val; +} + +#endif diff --git a/u8c/include/u8c/math.d/trunc b/u8c/include/u8c/math.d/trunc new file mode 100644 index 0000000..d4ff731 --- /dev/null +++ b/u8c/include/u8c/math.d/trunc @@ -0,0 +1,36 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_cQ6WEZj1q8fhHahe) +#define u8c_key_cQ6WEZj1q8fhHahe + +#include <concepts> /* std::integral */ +#include <limits> /* std::numeric_limits */ +#include <type_traits> /* std::make_unsigned_t */ + +template<std::integral T,std::integral T0> constexpr auto u8c::trunc(T const _val,T0) noexcept -> T0 { + if constexpr (std::numeric_limits<T0>::is_modulo) { + return static_cast<T0>(_val); + } + else { + return static_cast<T0>(static_cast<std::make_unsigned_t<T0>>(_val)); + } +} + +#endif diff --git a/u8c/include/u8c/misc b/u8c/include/u8c/misc new file mode 100644 index 0000000..f7deca1 --- /dev/null +++ b/u8c/include/u8c/misc @@ -0,0 +1,132 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_4grZQR1RdsRJL14e) +#define u8c_key_4grZQR1RdsRJL14e + +#include <climits> /* CHAR_BIT */ +#include <cstdint> /* std::int_least16_t, std::int_least32_t, std::int_least64_t, std::int_least8_t, std::intmax_t, std::uint_least16_t, std::uint_least32_t, std::uint_least64_t, std::uint_least8_t, std::uintmax_t */ +#include <cstdlib> /* std::abort */ +#include <iostream> /* std::cerr, std::endl */ + +#define u8c_assert(expr) \ + if constexpr (!u8c::dbg) { \ + /*if consteval { \ + static_assert(expr); \ + } \ + else*/ if (!(expr)) [[unlikely]] { \ + std::cerr << "u8c :: \"" << __FILE__ "\" @ " << __LINE__ << ": Assertion failed: expression \"" << #expr << "\" evaluates to false." << std::endl; \ + std::abort(); \ + } \ + } \ + +#if defined(__GNUC__) || defined(__clang__) +#define u8c_attr_abitag(...) gnu::abi_tag(__VA_ARGS__) +#define u8c_attr_allocsz(...) gnu::alloc_size(__VA_ARGS__) +#define u8c_attr_artif gnu::artificial +#define u8c_attr_cold gnu::cold +#define u8c_attr_const gnu::const +#define u8c_attr_fmt gnu::format +#define u8c_attr_malloc gnu::malloc +#define u8c_attr_nonnull(...) gnu::nonnull(__VA_ARGS__) +#define u8c_attr_hot gnu::hot +#define u8c_attr_inline gnu::always_inline +#define u8c_attr_pure gnu::pure +#define u8c_attr_retnonnull gnu::returns_nonnull +#define u8c_attr_sect gnu::section +#define u8c_attr_used gnu::used +#else +#define u8c_attr_abitag(...) +#define u8c_attr_allocsz(...) +#define u8c_attr_artif +#define u8c_attr_cold +#define u8c_attr_const +#define u8c_attr_fmt +#define u8c_attr_hot +#define u8c_attr_inline +#define u8c_attr_malloc +#define u8c_attr_nonnull(...) +#define u8c_attr_pure +#define u8c_attr_retnonnull +#define u8c_attr_sect +#define u8c_attr_used +#endif +#if defined(__clang__) +#define u8c_attr_noderef clang::noderef +#define u8c_attr_nodup clang::noduplicate +#define u8c_attr_noesc(...) clang::noescape(__VA_ARGS__) +#else +#define u8c_attr_noderef +#define u8c_attr_nodup +#define u8c_attr_noesc(...) +#endif + +#define u8c_bytec(expr) (static_cast<u8c::byte>(INT8_C(expr))) +#define u8c_int16c(expr) (static_cast<u8c::int16>(INT16_C(expr))) +#define u8c_int32c(expr) (static_cast<u8c::int32>(INT32_C(expr))) +#define u8c_int64c(expr) (static_cast<u8c::int64>(INT64_C(expr))) +#define u8c_intmaxc(expr) (static_cast<u8c::intmax>(INTMAX_C(expr))) +#define u8c_ubytec(expr) (static_cast<u8c::ubyte>(UINT8_C(expr))) +#define u8c_uint16c(expr) (static_cast<u8c::uint16>(UINT16_C(expr))) +#define u8c_uint32c(expr) (static_cast<u8c::uint32>(UINT32_C(expr))) +#define u8c_uint64c(expr) (static_cast<u8c::uint64>(UINT64_C(expr))) +#define u8c_uintmaxc(expr) (static_cast<u8c::uintmax>(UINTMAX_C(expr))) + +#if defined(__GNUC__) || defined(__clang__) +#define u8c_restr __restrict__ +#elif defined(__INTEL_COMPILER) || defined(_MSC_VER) +#define u8c_restr __restrict +#else +#define u8c_restr +#endif + +namespace u8c { + using byte = signed char; + using int16 = std::int_least16_t; + using int32 = std::int_least32_t; + using int64 = std::int_least64_t; + using intmax = std::intmax_t; + using size = decltype(0x0uz); + using ssize = decltype(0x0z); + using ubyte = unsigned char; + using uint16 = std::uint_least16_t; + using uint32 = std::uint_least32_t; + using uint64 = std::uint_least64_t; + using uintmax = std::uintmax_t; + template<typename T> concept utf = std::is_same_v<T,char16_t> || std::is_same_v<T,char32_t> || std::is_same_v<T,char8_t>; + enum class endi : bool { + big = true, + little = false, + }; + template<typename T> [[nodiscard,u8c_attr_allocsz(0x3),u8c_attr_malloc,u8c_attr_nonnull(0x1)]] constexpr auto renew(T * ptr,u8c::size sz,u8c::size newsz) -> T *; + template<typename T> constexpr auto renew(std::nullptr_t, u8c::size sz,u8c::size newsz) -> T * = delete; + constexpr auto bytesz = static_cast<u8c::ubyte>(CHAR_BIT); + constexpr auto dbg = +#if defined(NDEBUG) || !defined(_DEBUG) + false; +#else + true; +#endif + constexpr auto unimax = U'\U00010FFF'; + constexpr auto ver = u8c_uint64c(0x1B); +} + +#include <u8c/misc.d/renew> + +#endif diff --git a/u8c/include/u8c/misc.d/renew b/u8c/include/u8c/misc.d/renew new file mode 100644 index 0000000..8d67b78 --- /dev/null +++ b/u8c/include/u8c/misc.d/renew @@ -0,0 +1,32 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_PDOxHgtcAGQDjPjZ) +#define u8c_key_PDOxHgtcAGQDjPjZ + +#include <algorithm> /* std::copy_n, std::min */ + +template<typename T> auto constexpr u8c::renew(T * const u8c_restr _ptr,u8c::size const _sz,u8c::size const _newsz) -> T * { + T * const u8c_restr ptr = ::new T[_newsz]; + std::copy_n(_ptr,std::min(_sz,_newsz),ptr); + ::delete[] _ptr; + return ptr; +} + +#endif diff --git a/u8c/include/u8c/str b/u8c/include/u8c/str new file mode 100644 index 0000000..d8f5753 --- /dev/null +++ b/u8c/include/u8c/str @@ -0,0 +1,55 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_ywSpAOVLrorTYrkK) +#define u8c_key_ywSpAOVLrorTYrkK + +#include <u8c/arr> +#include <u8c/utf> + +namespace u8c { + class str { + public: + constexpr auto app( u8c::str const & oth) -> u8c::str const &; + constexpr auto begin() const noexcept -> char32_t *; + constexpr auto end() const noexcept -> char32_t *; + constexpr auto operator = ( u8c::str const & oth) -> u8c::str const &; + constexpr auto operator [] (u8c::str const & oth) const noexcept -> u8c::str const &; + [[nodiscard]] constexpr str() noexcept; + [[nodiscard]] constexpr str( u8c::str const & oth); + template<u8c::utf T> [[nodiscard]] constexpr str( T chr); + template<u8c::utf T,u8c::size N> [[nodiscard]] constexpr str( T const (& strlit)[N]) noexcept; + [[nodiscard]] constexpr auto u8() const -> u8c::arr<char8_t>; + private: + u8c::arr<char32_t> _arr; + }; + template<typename T> [[nodiscard,u8c_attr_hot]] constexpr auto fmt( T fmt) -> u8c::str; + auto operator << (std::ostream & strm, u8c::str const & str) -> std::ostream &; + /*class { + public: + bool ascii = false; + u8c::ubyte base = u8c_ubytec(0xB); + u8c::endi endi = u8c::endi::little; + } inline fmtsets;*/ +} + +#include <u8c/str.d/dbgprint> +#include <u8c/str.d/str> + +#endif diff --git a/u8c/include/u8c/str.d/dbgprint b/u8c/include/u8c/str.d/dbgprint new file mode 100644 index 0000000..4ecb6b9 --- /dev/null +++ b/u8c/include/u8c/str.d/dbgprint @@ -0,0 +1,32 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ +#if !defined(u8c_key_WQGQeKhIxLI16CtT) +#define u8c_key_WQGQeKhIxLI16CtT + +#if 0x0 + +auto u8c::dbgprint(u8c::str const _msg) -> void { + if constexpr (u8c::dbg) { + return u8c::println(stderr,_msg); + } +} + +#endif + +#endif diff --git a/u8c/include/u8c/str.d/fmt b/u8c/include/u8c/str.d/fmt new file mode 100644 index 0000000..3c602f0 --- /dev/null +++ b/u8c/include/u8c/str.d/fmt @@ -0,0 +1,59 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_6cyujV0FoSmPeQWl) +#define u8c_key_6cyujV0FoSmPeQWl + +#if 0x0 +auto u8c::dbgprint(u8c::str const _msg) -> void { +#if defined(NDEBUG) + constexpr auto dbg = true; +#else + constexpr auto dbg = false; +#endif + if constexpr(dbg) { + return u8c::println(stderr,_msg); + } +} +auto u8c::fmt(u8c::str const _str) -> u8c::str { + return u8c::str(_str); +} +template<typename T> auto u8c::fmt(u8c::str const _str,T const _fmt) -> u8c::str { + u8c::str str; + for(auto chr : _str) { + if(chr == U'\uFFFC') [[unlikely]] { + return str + u8c::fmter::fmt(_fmt); + } + str += chr; + } + return str; +} +template<typename T,typename... TArgs> auto u8c::fmt(u8c::str const _str,T const _fmt,TArgs... _args) -> u8c::str { + u8c::str str; + for(auto chr : _str) { + if(chr == U'\uFFFC') [[unlikely]] { + return str + u8c::fmt(str,_fmt,_args...); + } + str += chr; + } + return str; +} +#endif + +#endif diff --git a/u8c/include/u8c/str.d/str b/u8c/include/u8c/str.d/str new file mode 100644 index 0000000..f708f8a --- /dev/null +++ b/u8c/include/u8c/str.d/str @@ -0,0 +1,50 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_3zvMYqWFDYnlTEyW) +#define u8c_key_3zvMYqWFDYnlTEyW + +#include <type_traits> /* std::is_same_v */ + +constexpr auto u8c::str::begin() const noexcept -> char32_t * { + return this->_arr.begin(); +} +constexpr auto u8c::str::end() const noexcept -> char32_t * { + return this->_arr.end(); +} +constexpr auto u8c::str::operator = (u8c::str const & _oth) -> u8c::str const & { + this->_arr = _oth._arr; + return *this; +} +constexpr u8c::str::str(u8c::str const & _oth) { + *this = _oth; +} +template<u8c::utf T> constexpr u8c::str::str(T const _chr) { + if constexpr (std::is_same_v<T,char32_t>) { + this->_arr.set(_chr); + } +} +template<u8c::utf T,u8c::size N> constexpr u8c::str::str(T const (& _strlit)[N]) noexcept { + this->_arr.set(_strlit,_strlit + N); +} +constexpr auto u8c::str::u8() const -> u8c::arr<char8_t> { + return u8c::cnv<char8_t>(this->begin(),this->end()); +} + +#endif diff --git a/u8c/include/u8c/u8c b/u8c/include/u8c/u8c new file mode 100644 index 0000000..f9591c5 --- /dev/null +++ b/u8c/include/u8c/u8c @@ -0,0 +1,50 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +/* + Greater Header Dependencies: + + misc →┬─────────────────────────→┬→ u8c + ├→ arr ─→┬────────────────→┤ + │ └→ utf ─→┬───────→┤ + │ └→ str ─→┤ + └→ cstr →┬────────────────→┤ + └→ impl →┬───────→┤ + └→ math →┘ +*/ + +#if !defined(u8c_key_piDyeERQmK9By1n3) +#define u8c_key_piDyeERQmK9By1n3 + +#include <concepts> /* std::convertible_to */ +#include <ostream> /* std::ostream */ +#include <type_traits> /* std::is_same_v */ + +#include <u8c/math> +#include <u8c/str> + +namespace u8c { + [[nodiscard]] constexpr auto uniblk(char32_t chr) -> u8c::str; + [[nodiscard]] constexpr auto uninm( char32_t chr) -> u8c::str; +} + +#include <u8c/u8c.d/uniblk> +#include <u8c/u8c.d/uninm> + +#endif diff --git a/u8c/include/u8c/u8c.d/uniblk b/u8c/include/u8c/u8c.d/uniblk new file mode 100644 index 0000000..e216833 --- /dev/null +++ b/u8c/include/u8c/u8c.d/uniblk @@ -0,0 +1,497 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_Z0dBX6z5KYfITIHo) +#define u8c_key_Z0dBX6z5KYfITIHo + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::uniblk(char32_t const _chr) -> u8c::str { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + if(_chr <= U'\u007F') { + return U"BASIC LATIN"; + } + if(_chr >= U'\u0080' && _chr <= U'\u00FF') { + return U"LATIN-1 SUPPLEMENT"; + } + if(_chr >= U'\u0100' && _chr <= U'\u017F') { + return U"LATIN EXTENDED-A"; + } + if(_chr >= U'\u0180' && _chr <= U'\u024F') { + return U"LATIN EXTENDED-B"; + } + if(_chr >= U'\u0250' && _chr <= U'\u02AF') { + return U"IPA EXTENSIONS"; + } + if(_chr >= U'\u02B0' && _chr <= U'\u02FF') { + return U"SPACING MODIFIER LETTERS"; + } + if(_chr >= U'\u0300' && _chr <= U'\u036F') { + return U"COMBINING DIRACITICAL MARKS"; + } + if(_chr >= U'\u0370' && _chr <= U'\u03FF') { + return U"GREEK AND COPTIC"; + } + if(_chr >= U'\u0400' && _chr <= U'\u04FF') { + return U"CYRILLIC"; + } + if(_chr >= U'\u0500' && _chr <= U'\u052F') { + return U"CYRILLIC SUPPLEMENT"; + } + if(_chr >= U'\u0530' && _chr <= U'\u058F') { + return U"ARMENIAN"; + } + if(_chr >= U'\u0590' && _chr <= U'\u05FF') { + return U"HEBREW"; + } + if(_chr >= U'\u0600' && _chr <= U'\u06FF') { + return U"ARABIC"; + } + if(_chr >= U'\u0700' && _chr <= U'\u074F') { + return U"SYRIAC"; + } + if(_chr >= U'\u0750' && _chr <= U'\u077F') { + return U"ARABIC SUPPLEMENT"; + } + if(_chr >= U'\u0780' && _chr <= U'\u07BF') { + return U"THAANA"; + } + if(_chr >= U'\u07C0' && _chr <= U'\u07FF') { + return U"NKO"; + } + if(_chr >= U'\u0800' && _chr <= U'\u083F') { + return U"SAMARITAN"; + } + if(_chr >= U'\u0840' && _chr <= U'\u085F') { + return U"MANDAIC"; + } + if(_chr >= U'\u0860' && _chr <= U'\u086F') { + return U"SYRIAC SUPPLEMENT"; + } + if(_chr >= U'\u08A0' && _chr <= U'\u08FF') { + return U"ARABIC EXTENDED-A"; + } + if(_chr >= U'\u0900' && _chr <= U'\u097F') { + return U"DEVANAGARI"; + } + if(_chr >= U'\u0980' && _chr <= U'\u09FF') { + return U"BENGALI"; + } + if(_chr >= U'\u0A00' && _chr <= U'\u0A7F') { + return U"GURMUKHI"; + } + if(_chr >= U'\u0A80' && _chr <= U'\u0AFF') { + return U"GUJARATI"; + } + if(_chr >= U'\u0B00' && _chr <= U'\u0B7F') { + return U"ORIYAS"; + } + if(_chr >= U'\u0B80' && _chr <= U'\u0BFF') { + return U"TAMIL"; + } + if(_chr >= U'\u0C00' && _chr <= U'\u0C7F') { + return U"TELUGU"; + } + if(_chr >= U'\u0C80' && _chr <= U'\u0CFF') { + return U"KANNADA"; + } + if(_chr >= U'\u0D00' && _chr <= U'\u0D7F') { + return U"MALAYALAM"; + } + if(_chr >= U'\u0D80' && _chr <= U'\u0DFF') { + return U"SINHALA"; + } + if(_chr >= U'\u0E00' && _chr <= U'\u0E7F') { + return U"THAI"; + } + if(_chr >= U'\u0E80' && _chr <= U'\u0EFF') { + return U"LAO"; + } + if(_chr >= U'\u0F00' && _chr <= U'\u0FFF') { + return U"TIBETAN"; + } + if(_chr >= U'\u1000' && _chr <= U'\u109F') { + return U"MYANMAR"; + } + if(_chr >= U'\u10A0' && _chr <= U'\u10FF') { + return U"GEORGIAN"; + } + if(_chr >= U'\u1100' && _chr <= U'\u11FF') { + return U"HANGUL JAMO"; + } + if(_chr >= U'\u1200' && _chr <= U'\u137F') { + return U"ETHIOPIC"; + } + if(_chr >= U'\u1380' && _chr <= U'\u139F') { + return U"ETHIOPIC SUPPLEMENT"; + } + if(_chr >= U'\u13A0' && _chr <= U'\u13FF') { + return U"CHEROKEE"; + } + if(_chr >= U'\u1400' && _chr <= U'\u167F') { + return U"UNIFIED CANADIAN ABORIGINAL SYLLABICS"; + } + if(_chr >= U'\u1680' && _chr <= U'\u169F') { + return U"OGHAM"; + } + if(_chr >= U'\u16A0' && _chr <= U'\u16FF') { + return U"RUNIC"; + } + if(_chr >= U'\u1700' && _chr <= U'\u171F') { + return U"TAGALOG"; + } + if(_chr >= U'\u1720' && _chr <= U'\u173F') { + return U"HANUNOO"; + } + if(_chr >= U'\u1740' && _chr <= U'\u175F') { + return U"BUHID"; + } + if(_chr >= U'\u1760' && _chr <= U'\u177F') { + return U"TAGBANWA"; + } + if(_chr >= U'\u1700' && _chr <= U'\u17FF') { + return U"TAGALOG"; + } + if(_chr >= U'\u1780' && _chr <= U'\u171F') { + return U"KHMER"; + } + if(_chr >= U'\u1800' && _chr <= U'\u18AF') { + return U"MONGOLIAN"; + } + if(_chr >= U'\u18B0' && _chr <= U'\u18FF') { + return U"UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED"; + } + if(_chr >= U'\u1900' && _chr <= U'\u194F') { + return U"LIMBU"; + } + if(_chr >= U'\u1950' && _chr <= U'\u197F') { + return U"TAI LE"; + } + if(_chr >= U'\u1980' && _chr <= U'\u19DF') { + return U"NEW TAI LUE"; + } + if(_chr >= U'\u19E0' && _chr <= U'\u19FF') { + return U"KHMER SYMBOLS"; + } + if(_chr >= U'\u1A00' && _chr <= U'\u1A1F') { + return U"BUGINESE"; + } + if(_chr >= U'\u1A20' && _chr <= U'\u1AAF') { + return U"TAI THAM"; + } + if(_chr >= U'\u1AB0' && _chr <= U'\u1AFF') { + return U"COMBINING DIACRITICAL MARKS EXTENDED"; + } + if(_chr >= U'\u1B00' && _chr <= U'\u1B7F') { + return U"BALINESE"; + } + if(_chr >= U'\u1B80' && _chr <= U'\u1BBF') { + return U"SUNDANESE"; + } + if(_chr >= U'\u1BC0' && _chr <= U'\u1BFF') { + return U"BATAK"; + } + if(_chr >= U'\u1C00' && _chr <= U'\u1C4F') { + return U"LEPCHA"; + } + if(_chr >= U'\u1C50' && _chr <= U'\u1C7F') { + return U"OL CHIKI"; + } + if(_chr >= U'\u1C80' && _chr <= U'\u1C8F') { + return U"CYRILLIC EXTENDED C"; + } + if(_chr >= U'\u1C90' && _chr <= U'\u1CBF') { + return U"GEORGIAN EXTENDED"; + } + if(_chr >= U'\u1CC0' && _chr <= U'\u1CCF') { + return U"SUNDANESE SUPPLEMENT"; + } + if(_chr >= U'\u1CD0' && _chr <= U'\u1CFF') { + return U"VEDIC EXTENSIONS"; + } + if(_chr >= U'\u1D00' && _chr <= U'\u1D7F') { + return U"PHONETIC EXTENSIONS"; + } + if(_chr >= U'\u1D80' && _chr <= U'\u1DBF') { + return U"PHONETIC EXTENSIONS SUPPLEMENT"; + } + if(_chr >= U'\u1DC0' && _chr <= U'\u1DFF') { + return U"COMBINING DIACRITICAL MARKS SUPPLEMENT"; + } + if(_chr >= U'\u1E00' && _chr <= U'\u1EFF') { + return U"LATIN EXTENDED ADDITIONAL"; + } + if(_chr >= U'\u1F00' && _chr <= U'\u1FFF') { + return U"GREEK EXTENDED"; + } + if(_chr >= U'\u2000' && _chr <= U'\u206F') { + return U"GENERAL PUNCTUATION"; + } + if(_chr >= U'\u2070' && _chr <= U'\u209F') { + return U"SUPERSCRIPTS AND SUBSCRIPTS"; + } + if(_chr >= U'\u20A0' && _chr <= U'\u20CF') { + return U"CURRENCY SYMBOLS"; + } + if(_chr >= U'\u20D0' && _chr <= U'\u20FF') { + return U"COMBINING DIACRITICAL MARKS FOR SYMBOLS"; + } + if(_chr >= U'\u2100' && _chr <= U'\u214F') { + return U"LETTERLIKE SYMBOLS"; + } + if(_chr >= U'\u2150' && _chr <= U'\u218F') { + return U"NUMBER FORMS"; + } + if(_chr >= U'\u2190' && _chr <= U'\u21FF') { + return U"ARROWS"; + } + if(_chr >= U'\U00011A00' && _chr <= U'\U00011A4F') { + return U"ZANABAZAR SQUARE"; + } + if(_chr >= U'\U00011A50' && _chr <= U'\U00011AAF') { + return U"SOYOMBO"; + } + if(_chr >= U'\U00011AC0' && _chr <= U'\U00011AFF') { + return U"PAU CIN HAU"; + } + if(_chr >= U'\U00011C00' && _chr <= U'\U00011C6F') { + return U"BHAIKSUKI"; + } + if(_chr >= U'\U00011C70' && _chr <= U'\U00011CBF') { + return U"MARCHEN"; + } + if(_chr >= U'\U00011D00' && _chr <= U'\U00011D5F') { + return U"MASARAM GONDI"; + } + if(_chr >= U'\U00011D60' && _chr <= U'\U00011DAF') { + return U"GUNJALA GONDI"; + } + if(_chr >= U'\U00011EE0' && _chr <= U'\U00011EFF') { + return U"MAKASAR"; + } + if(_chr >= U'\U00011FB0' && _chr <= U'\U00011FBF') { + return U"LISU SUPPLEMENT"; + } + if(_chr >= U'\U00011FC0' && _chr <= U'\U00011FFF') { + return U"TAMIL SUPPLEMENT"; + } + if(_chr >= U'\U00012000' && _chr <= U'\U000123FF') { + return U"CUNEIFORM"; + } + if(_chr >= U'\U00012400' && _chr <= U'\U0001247F') { + return U"CUNEIFORM NUMBERS AND PUNCTUATION"; + } + if(_chr >= U'\U00012480' && _chr <= U'\U0001254F') { + return U"EARLY DYNASTIC CUNEIFORM"; + } + if(_chr >= U'\U00013000' && _chr <= U'\U0001342F') { + return U"EGYPTIAN HIEROGLYPHS"; + } + if(_chr >= U'\U00013430' && _chr <= U'\U0001343F') { + return U"EGYPTIAN HIEROGLYPH FORMAT CONTROLS"; + } + if(_chr >= U'\U00014400' && _chr <= U'\U0001467F') { + return U"ANATOLIAN HIEROGLYPHS"; + } + if(_chr >= U'\U00016800' && _chr <= U'\U00016A3F') { + return U"BAMUM SUPPLEMENT"; + } + if(_chr >= U'\U00016A40' && _chr <= U'\U00016A6F') { + return U"MRO"; + } + if(_chr >= U'\U00016AD0' && _chr <= U'\U00016AFF') { + return U"BASSA VAH"; + } + if(_chr >= U'\U00016B00' && _chr <= U'\U00016B8F') { + return U"PAHAWH HMONG"; + } + if(_chr >= U'\U00016E40' && _chr <= U'\U00016E9F') { + return U"MEDEFAIDRIN"; + } + if(_chr >= U'\U00016F00' && _chr <= U'\U00016F9F') { + return U"MIAO"; + } + if(_chr >= U'\U00016FE0' && _chr <= U'\U00016FFF') { + return U"IDEOGRAPHIC SYMBOLS AND PUNCTUATION"; + } + if(_chr >= U'\U00017000' && _chr <= U'\U000187FF') { + return U"TANGUT"; + } + if(_chr >= U'\U00018800' && _chr <= U'\U00018AFF') { + return U"TANGUT COMPONENTS"; + } + if(_chr >= U'\U00018B00' && _chr <= U'\U00018CFF') { + return U"KHITAN SMALL SCRIPT"; + } + if(_chr >= U'\U00018D00' && _chr <= U'\U00018D8F') { + return U"TANGUT SUPPLEMENT"; + } + if(_chr >= U'\U0001B000' && _chr <= U'\U0001B0FF') { + return U"KANA SUPPLEMENT"; + } + if(_chr >= U'\U0001B100' && _chr <= U'\U0001B12F') { + return U"KANA EXTENDED-A"; + } + if(_chr >= U'\U0001B130' && _chr <= U'\U0001B16F') { + return U"SMALL KANA EXTENSION"; + } + if(_chr >= U'\U0001B170' && _chr <= U'\U0001B2FF') { + return U"NUSHU"; + } + if(_chr >= U'\U0001BC00' && _chr <= U'\U0001BC9F') { + return U"DUPLOYAN"; + } + if(_chr >= U'\U0001BCA0' && _chr <= U'\U0001BCAF') { + return U"SHORTHAND FORMAT CONTROLS"; + } + if(_chr >= U'\U0001D000' && _chr <= U'\U0001D0FF') { + return U"BYZANTINE MUSICAL SYMBOLS"; + } + if(_chr >= U'\U0001D100' && _chr <= U'\U0001D1FF') { + return U"MUSICAL SYMBOLS"; + } + if(_chr >= U'\U0001D200' && _chr <= U'\U0001D24F') { + return U"ANCIENT GREEK MUSICAL NOTATION"; + } + if(_chr >= U'\U0001D2E0' && _chr <= U'\U0001D2FF') { + return U"MAYAN NUMERALS"; + } + if(_chr >= U'\U0001D300' && _chr <= U'\U0001D35F') { + return U"TAI XUAN JING SYMBOLS"; + } + if(_chr >= U'\U0001D360' && _chr <= U'\U0001D37F') { + return U"COUNTING ROD NUMERALS"; + } + if(_chr >= U'\U0001D400' && _chr <= U'\U0001D7FF') { + return U"MATHEMATICAL ALPHANUMERIC SYMBOLS"; + } + if(_chr >= U'\U0001D800' && _chr <= U'\U0001DAAF') { + return U"SUTTON SIGNWRITING"; + } + if(_chr >= U'\U0001E000' && _chr <= U'\U0001E02F') { + return U"GLAGOLITIC SUPPLEMENT"; + } + if(_chr >= U'\U0001E100' && _chr <= U'\U0001E14F') { + return U"NYIAKENG PUACHUE HMONG"; + } + if(_chr >= U'\U0001E2C0' && _chr <= U'\U0001E2FF') { + return U"WANCHO"; + } + if(_chr >= U'\U0001E800' && _chr <= U'\U0001E8DF') { + return U"MENDE KIKAKUI"; + } + if(_chr >= U'\U0001E900' && _chr <= U'\U0001E95F') { + return U"ADLAM"; + } + if(_chr >= U'\U0001EC70' && _chr <= U'\U0001ECBF') { + return U"INDIC SIYAQ NUMBERS"; + } + if(_chr >= U'\U0001ED00' && _chr <= U'\U0001ED4F') { + return U"OTTOMAN SIYAQ NUMBERS"; + } + if(_chr >= U'\U0001EE00' && _chr <= U'\U0001EEFF') { + return U"ARABIC MATHEMATICAL ALPHABETIC SYMBOLS"; + } + if(_chr >= U'\U0001F000' && _chr <= U'\U0001F02F') { + return U"MAHJONG TILES"; + } + if(_chr >= U'\U0001F030' && _chr <= U'\U0001F09F') { + return U"DOMINO TILES"; + } + if(_chr >= U'\U0001F0A0' && _chr <= U'\U0001F0FF') { + return U"PLAYING CARDS"; + } + if(_chr >= U'\U0001F100' && _chr <= U'\U0001F1FF') { + return U"ENCLOSED ALPHANUMERIC SUPPLEMENT"; + } + if(_chr >= U'\U0001F200' && _chr <= U'\U0001F2FF') { + return U"ENCLOSED IDEOGRAPHIC SUPPLEMENT"; + } + if(_chr >= U'\U0001F300' && _chr <= U'\U0001F5FF') { + return U"MISCELLANEOUS SYMBOLS AND PICTOGRAPHS"; + } + if(_chr >= U'\U0001F600' && _chr <= U'\U0001F64F') { + return U"EMOTICONS"; + } + if(_chr >= U'\U0001F650' && _chr <= U'\U0001F67F') { + return U"ORNAMENTAL DINGBATS"; + } + if(_chr >= U'\U0001F680' && _chr <= U'\U0001F6FF') { + return U"TRANSPORT AND MAP SYMBOLS"; + } + if(_chr >= U'\U0001F700' && _chr <= U'\U0001F77F') { + return U"ALCHEMICAL SYMBOLS"; + } + if(_chr >= U'\U0001F780' && _chr <= U'\U0001F7FF') { + return U"GEOMETRIC SHAPES EXTENDED"; + } + if(_chr >= U'\U0001F800' && _chr <= U'\U0001F8FF') { + return U"SUPPLEMENTAL ARROWS-C"; + } + if(_chr >= U'\U0001F900' && _chr <= U'\U0001F9FF') { + return U"SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS"; + } + if(_chr >= U'\U0001FA00' && _chr <= U'\U0001FA6F') { + return U"CHESS SYMBOLS"; + } + if(_chr >= U'\U0001FA70' && _chr <= U'\U0001FAFF') { + return U"SYMBOLS AND PICTOGRAPHS EXTENDED-A"; + } + if(_chr >= U'\U0001FB00' && _chr <= U'\U0001FBFF') { + return U"SYMBOLS FOR LEGACY COMPUTING"; + } + if(_chr >= U'\U00020000' && _chr <= U'\U0002A6DF') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION B"; + } + if(_chr >= U'\U0002A700' && _chr <= U'\U0002B73F') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION C"; + } + if(_chr >= U'\U0002B740' && _chr <= U'\U0002B81F') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION D"; + } + if(_chr >= U'\U0002B820' && _chr <= U'\U0002CEAF') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION E"; + } + if(_chr >= U'\U0002CEB0' && _chr <= U'\U0002EBEF') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION F"; + } + if(_chr >= U'\U0002F800' && _chr <= U'\U0002FA1F') { + return U"CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT"; + } + if(_chr >= U'\U00030000' && _chr <= U'\U0003134F') { + return U"CJK UNIFIED IDEOGRAPHS EXTENSION G"; + } + if(_chr >= U'\U000E0000' && _chr <= U'\U000E007F') { + return U"TAGS"; + } + if(_chr >= U'\U000E0100' && _chr <= U'\U000E1EFF') { + return U"VARIATION SELECTORS SUPPLEMENT"; + } + if(_chr >= U'\U000F0000' && _chr <= U'\U000FFFFF') { + return U"SUPPLEMENTARY PRIVATE USE AREA-A"; + } + if(_chr >= U'\U00100000' && _chr <= U'\U0010FFFF') { + return U"SUPPLEMENTARY PRIVATE USE AREA-B"; + } + return U"UNDEFINED IN UNICODE"; +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/u8c.d/uninm b/u8c/include/u8c/u8c.d/uninm new file mode 100644 index 0000000..3fc67f3 --- /dev/null +++ b/u8c/include/u8c/u8c.d/uninm @@ -0,0 +1,2697 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_EW3CUEOMiNBCpImA) +#define u8c_key_EW3CUEOMiNBCpImA + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::uninm(char32_t const _chr) -> u8c::str{ + switch(_chr) { + [[unlikely]] default: + if(_chr > u8c::unimax) [[unlikely]] { + throw std::out_of_range("Character out of range."); + } + return U"UNDEFINED IN UNICODE"; + /* BASIC LATIN: */ + case U'\u0000': + return U"NULL"; + case U'\u0001': + return U"START OF HEADING"; + case U'\u0002': + return U"START OF TEXT"; + case U'\u0003': + return U"END OF TEXT"; + case U'\u0004': + return U"END OF TRANSMISSION"; + case U'\u0005': + return U"ENQUIRY"; + case U'\u0006': + return U"ACKNOWLEDGE"; + case U'\u0007': + return U"BELL"; + case U'\u0008': + return U"BACKSPACE"; + case U'\u0009': + return U"HORIZONTAL TABULATION"; + case U'\u000A': + return U"NEW LINE"; + case U'\u000B': + return U"VERTICAL TABULATION"; + case U'\u000C': + return U"FORM FEED"; + case U'\u000D': + return U"CARRIAGE RETURN"; + case U'\u000E': + return U"SHIFT OUT"; + case U'\u000F': + return U"SHIFT IN"; + case U'\u0010': + return U"DATA LINK ESCAPE"; + case U'\u0011': + return U"DEVICE CONTROL ONE"; + case U'\u0012': + return U"DEVICE CONTROL TWO"; + case U'\u0013': + return U"DEVICE CONTROL THREE"; + case U'\u0014': + return U"DEVICE CONTROL FOUR"; + case U'\u0015': + return U"NEGATIVE ACKNOWLEDGE"; + case U'\u0016': + return U"SYNCHRONOUS IDLE"; + case U'\u0017': + return U"END OF TRANSMISSION BLOCk"; + case U'\u0018': + return U"CANCEL"; + case U'\u0019': + return U"END OF MEDIUM"; + case U'\u001A': + return U"SUBSTITUTE"; + case U'\u001B': + return U"ESCAPE"; + case U'\u001C': + return U"FILE SEPERATOR"; + case U'\u001D': + return U"GROUP SEPERATOR"; + case U'\u001E': + return U"RECORD SEPERATOR"; + case U'\u001F': + return U"UNIT SEPERATOR"; + case U'\u0020': + return U"SPACE"; + case U'\u0021': + return U"EXCLAMATION MARK"; + case U'\u0022': + return U"QUOTATION MARK"; + case U'\u0023': + return U"NUMBER SIGN"; + case U'\u0024': + return U"DOLLAR SIGN"; + case U'\u0025': + return U"PERCENT SIGN"; + case U'\u0026': + return U"AMPERSAND"; + case U'\u0027': + return U"APOSTROPHE"; + case U'\u0028': + return U"LEFT PARANTHESIS"; + case U'\u0029': + return U"RIGHT PARANTHESIS"; + case U'\u002A': + return U"ASTERISK"; + case U'\u002B': + return U"PLUS SIGN"; + case U'\u002C': + return U"COMMA"; + case U'\u002D': + return U"HYPHEN-MINUS"; + case U'\u002E': + return U"FULL STOP"; + case U'\u002F': + return U"SOLIDUS"; + case U'\u0030': + return U"DIGIT ZERO"; + case U'\u0031': + return U"DIGIT ONE"; + case U'\u0032': + return U"DIGIT TWO"; + case U'\u0033': + return U"DIGIT THREE"; + case U'\u0034': + return U"DIGIT FOUR"; + case U'\u0035': + return U"DIGIT FIVE"; + case U'\u0036': + return U"DIGIT SIX"; + case U'\u0037': + return U"DIGIT SEVEN"; + case U'\u0038': + return U"DIGIT EIGHT"; + case U'\u0039': + return U"DIGIT NINE"; + case U'\u003A': + return U"COLON"; + case U'\u003B': + return U"SEMICOLON"; + case U'\u003C': + return U"LESS-THAN SIGN"; + case U'\u003D': + return U"EQUALS SIGN"; + case U'\u003E': + return U"GREATER-THAN SIGN"; + case U'\u003F': + return U"QUESTION MARK"; + case U'\u0040': + return U"COMMERCIAL AT"; + case U'\u0041': + return U"LATIN CAPITAL LETTER A"; + case U'\u0042': + return U"LATIN CAPITAL LETTER B"; + case U'\u0043': + return U"LATIN CAPITAL LETTER C"; + case U'\u0044': + return U"LATIN CAPITAL LETTER D"; + case U'\u0045': + return U"LATIN CAPITAL LETTER E"; + case U'\u0046': + return U"LATIN CAPITAL LETTER F"; + case U'\u0047': + return U"LATIN CAPITAL LETTER G"; + case U'\u0048': + return U"LATIN CAPITAL LETTER H"; + case U'\u0049': + return U"LATIN CAPITAL LETTER I"; + case U'\u004A': + return U"LATIN CAPITAL LETTER J"; + case U'\u004B': + return U"LATIN CAPITAL LETTER K"; + case U'\u004C': + return U"LATIN CAPITAL LETTER L"; + case U'\u004D': + return U"LATIN CAPITAL LETTER M"; + case U'\u004E': + return U"LATIN CAPITAL LETTER N"; + case U'\u004F': + return U"LATIN CAPITAL LETTER O"; + case U'\u0050': + return U"LATIN CAPITAL LETTER P"; + case U'\u0051': + return U"LATIN CAPITAL LETTER Q"; + case U'\u0052': + return U"LATIN CAPITAL LETTER R"; + case U'\u0053': + return U"LATIN CAPITAL LETTER S"; + case U'\u0054': + return U"LATIN CAPITAL LETTER T"; + case U'\u0055': + return U"LATIN CAPITAL LETTER U"; + case U'\u0056': + return U"LATIN CAPITAL LETTER V"; + case U'\u0057': + return U"LATIN CAPITAL LETTER W"; + case U'\u0058': + return U"LATIN CAPITAL LETTER X"; + case U'\u0059': + return U"LATIN CAPITAL LETTER Y"; + case U'\u005A': + return U"LATIN CAPITAL LETTER Z"; + case U'\u005B': + return U"LEFT SQUARE BRACKET"; + case U'\u005C': + return U"REVERSE SOLIDUS"; + case U'\u005D': + return U"RIGHT SQUARE BRACKET"; + case U'\u005E': + return U"CIRCUMFLEX ACCENT"; + case U'\u005F': + return U"LOW LINE"; + case U'\u0060': + return U"GRAVE ACCENT"; + case U'\u0061': + return U"LATIN SMALL LETTER A"; + case U'\u0062': + return U"LATIN SMALL LETTER B"; + case U'\u0063': + return U"LATIN SMALL LETTER C"; + case U'\u0064': + return U"LATIN SMALL LETTER D"; + case U'\u0065': + return U"LATIN SMALL LETTER E"; + case U'\u0066': + return U"LATIN SMALL LETTER F"; + case U'\u0067': + return U"LATIN SMALL LETTER G"; + case U'\u0068': + return U"LATIN SMALL LETTER H"; + case U'\u0069': + return U"LATIN SMALL LETTER I"; + case U'\u006A': + return U"LATIN SMALL LETTER J"; + case U'\u006B': + return U"LATIN SMALL LETTER K"; + case U'\u006C': + return U"LATIN SMALL LETTER L"; + case U'\u006D': + return U"LATIN SMALL LETTER M"; + case U'\u006E': + return U"LATIN SMALL LETTER N"; + case U'\u006F': + return U"LATIN SMALL LETTER O"; + case U'\u0070': + return U"LATIN SMALL LETTER P"; + case U'\u0071': + return U"LATIN SMALL LETTER Q"; + case U'\u0072': + return U"LATIN SMALL LETTER R"; + case U'\u0073': + return U"LATIN SMALL LETTER S"; + case U'\u0074': + return U"LATIN SMALL LETTER T"; + case U'\u0075': + return U"LATIN SMALL LETTER U"; + case U'\u0076': + return U"LATIN SMALL LETTER V"; + case U'\u0077': + return U"LATIN SMALL LETTER W"; + case U'\u0078': + return U"LATIN SMALL LETTER X"; + case U'\u0079': + return U"LATIN SMALL LETTER Y"; + case U'\u007A': + return U"LATIN SMALL LETTER Z"; + case U'\u007B': + return U"LEFT CURLY BRACKET"; + case U'\u007C': + return U"VERTICAL LINE"; + case U'\u007D': + return U"RIGHT CURLY BRACKET"; + case U'\u007E': + return U"TILDE"; + case U'\u007F': + return U"DELETE"; + /* LATIN-1 SUPPLEMENT: */ + case U'\u0080': + return U"PADDING CHARACTER"; + case U'\u0081': + return U"HIGH OCTET PRESET"; + case U'\u0082': + return U"BREAK PERMITTED HERE"; + case U'\u0083': + return U"NO BREAK HERE"; + case U'\u0084': + return U"INDEX"; + case U'\u0085': + return U"NEXT LINE"; + case U'\u0086': + return U"START OF SELECTED AREA"; + case U'\u0087': + return U"END OF SELECTED AREA"; + case U'\u0088': + return U"CHARACTER TABULATION SET"; + case U'\u0089': + return U"CHARACTER TABULATION WITH JUSTIFICATION"; + case U'\u008A': + return U"LINE TABULATION SET"; + case U'\u008B': + return U"PARTIAL LINE FORWARD"; + case U'\u008C': + return U"PARTIAL LINE BACKWARD"; + case U'\u008D': + return U"REVERSE LINE FEED"; + case U'\u008E': + return U"SINGLE SHIFT TWO"; + case U'\u008F': + return U"SINGLE SHIFT THREE"; + case U'\u0090': + return U"DEVICE CONTROL STRING"; + case U'\u0091': + return U"PRIVATE USE ONE"; + case U'\u0092': + return U"PRIVATE USE TWO"; + case U'\u0093': + return U"SET TRANSMIT STATE"; + case U'\u0094': + return U"CANCEL CHARACTER"; + case U'\u0095': + return U"MESSAGE WAITING"; + case U'\u0096': + return U"START OF GUARDED AREA"; + case U'\u0097': + return U"END OF GUARDED AREA"; + case U'\u0098': + return U"START OF STRING"; + case U'\u0099': + return U"SINGLE GRAPHIC CHARACTER INTRODUCER"; + case U'\u009A': + return U"SINGLE CHARACTER INTRODUCER"; + case U'\u009B': + return U"CONTROL SEQUENCE INTRODUCER"; + case U'\u009C': + return U"STRING TERMINATOR"; + case U'\u009D': + return U"OPERATING SYSTEM COMMAND"; + case U'\u009E': + return U"PRIVACY MESSAGE"; + case U'\u009F': + return U"APPLICATION PROGRAM COMMAND"; + case U'\u00A0': + return U"NO-BREAK SPACE"; + case U'\u00A1': + return U"INVERTED EXCLAMATION MARK"; + case U'\u00A2': + return U"CENT SIGN"; + case U'\u00A3': + return U"POUND SIGN"; + case U'\u00A4': + return U"CURRENCY SIGN"; + case U'\u00A5': + return U"YEN SIGN"; + case U'\u00A6': + return U"BROKEN BAR"; + case U'\u00A7': + return U"SECTION SIGN"; + case U'\u00A8': + return U"DIAERESIS"; + case U'\u00A9': + return U"COPYRIGHT SIGN"; + case U'\u00AA': + return U"FEMININE ORDINAL INDICATOR"; + case U'\u00AB': + return U"LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"; + case U'\u00AC': + return U"NOT SIGN"; + case U'\u00AD': + return U"SOFT HYPHEN"; + case U'\u00AE': + return U"REGISTERED SIGN"; + case U'\u00AF': + return U"MACRON"; + case U'\u00B0': + return U"DEGREE SIGN"; + case U'\u00B1': + return U"PLUS MINUS SYMBOL"; + case U'\u00B2': + return U"SUPERSCRIPT TWO"; + case U'\u00B3': + return U"SUPERSCRIPT THREE"; + case U'\u00B4': + return U"ACUTE ACCENT"; + case U'\u00B5': + return U"MICRO SIGN"; + case U'\u00B6': + return U"PILCROW SIGN"; + case U'\u00B7': + return U"MIDDLE DOT"; + case U'\u00B8': + return U"CEDILLA"; + case U'\u00B9': + return U"SUPERSCRIPT ONE"; + case U'\u00BA': + return U"MASCULINE ORDINAL INDICATOR"; + case U'\u00BB': + return U"RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"; + case U'\u00BC': + return U"VULGAR FRACTION ONE QUARTER"; + case U'\u00BD': + return U"VULGAR FRACTION ONE HALF"; + case U'\u00BE': + return U"VULGAR FRACTION THREE QUARTERS"; + case U'\u00BF': + return U"INVERTED QUESTION MARK"; + case U'\u00C0': + return U"LATIN CAPITAL LETTER A WITH GRAVE"; + case U'\u00C1': + return U"LATIN CAPITAL LETTER A WITH ACUTE"; + case U'\u00C2': + return U"LATIN CAPITAL LETTER A WITH CIRCUMFLEX"; + case U'\u00C3': + return U"LATIN CAPITAL LETTER A WITH TILDE"; + case U'\u00C4': + return U"LATIN CAPITAL LETTER A WITH DIAERESIS"; + case U'\u00C5': + return U"LATIN CAPITAL LETTER A WITH RING ABOVE"; + case U'\u00C6': + return U"LATIN CAPITAL LETTER AE"; + case U'\u00C7': + return U"LATIN CAPITAL LETTER C WITH CEDILLA"; + case U'\u00C8': + return U"LATIN CAPITAL LETTER E WITH GRAVE"; + case U'\u00C9': + return U"LATIN CAPITAL LETTER E WITH ACUTE"; + case U'\u00CA': + return U"LATIN CAPITAL LETTER E WITH CIRCUMFLEX"; + case U'\u00CB': + return U"LATIN CAPITAL LETTER E WITH DIAERESIS"; + case U'\u00CC': + return U"LATIN CAPITAL LETTER I WITH GRAVE"; + case U'\u00CD': + return U"LATIN CAPITAL LETTER I WITH ACUTE"; + case U'\u00CE': + return U"LATIN CAPITAL LETTER I WITH CIRCUMFLEX"; + case U'\u00CF': + return U"LATIN CAPITAL LETTER I WITH DIAERESIS"; + case U'\u00D0': + return U"LATIN CAPITAL LETTER ETH"; + case U'\u00D1': + return U"LATIN CAPITAL LETTER N WITH TILDE"; + case U'\u00D2': + return U"LATIN CAPITAL LETTER O WITH GRAVE"; + case U'\u00D3': + return U"LATIN CAPITAL LETTER O WITH ACUTE"; + case U'\u00D4': + return U"LATIN CAPITAL LETTER O WITH CIRCUMFLEX"; + case U'\u00D5': + return U"LATIN CAPITAL LETTER O WITH TILDE"; + case U'\u00D6': + return U"LATIN CAPITAL LETTER O WITH DIAERESIS"; + case U'\u00D7': + return U"MULTIPLICATION SIGN"; + case U'\u00D8': + return U"LATIN CAPITAL LETTER O WITH STROKE"; + case U'\u00D9': + return U"LATIN CAPITAL LETTER U WITH GRAVE"; + case U'\u00DA': + return U"LATIN CAPITAL LETTER U WITH ACUTE"; + case U'\u00DB': + return U"LATIN CAPITAL LETTER U WITH CIRCUMFLEX"; + case U'\u00DC': + return U"LATIN CAPITAL LETTER U WITH DIAERESIS"; + case U'\u00DD': + return U"LATIN CAPITAL LETTER Y WITH ACUTE"; + case U'\u00DE': + return U"LATIN CAPITAL LETTER THORN"; + case U'\u00DF': + return U"LATIN SMALL LETTER SHARP S"; + case U'\u00E0': + return U"LATIN SMALL LETTER A WITH GRAVE"; + case U'\u00E1': + return U"LATIN SMALL LETTER A WITH ACUTE"; + case U'\u00E2': + return U"LATIN SMALL LETTER A WITH CIRCUMFLEX"; + case U'\u00E3': + return U"LATIN SMALL LETTER A WITH TILDE"; + case U'\u00E4': + return U"LATIN SMALL LETTER A WITH DIAERESIS"; + case U'\u00E5': + return U"LATIN SMALL LETTER A WITH RING ABOVE"; + case U'\u00E6': + return U"LATIN SMALL LETTER AE"; + case U'\u00E7': + return U"LATIN SMALL LETTER C WITH CEDILLA"; + case U'\u00E8': + return U"LATIN SMALL LETTER E WITH GRAVE"; + case U'\u00E9': + return U"LATIN SMALL LETTER E WITH ACUTE"; + case U'\u00EA': + return U"LATIN SMALL LETTER E WITH CIRCUMFLEX"; + case U'\u00EB': + return U"LATIN SMALL LETTER E WITH DIAERESIS"; + case U'\u00EC': + return U"LATIN SMALL LETTER I WITH GRAVE"; + case U'\u00ED': + return U"LATIN SMALL LETTER I WITH ACUTE"; + case U'\u00EE': + return U"LATIN SMALL LETTER I WITH CIRCUMFLEX"; + case U'\u00EF': + return U"LATIN SMALL LETTER I WITH DIAERESIS"; + case U'\u00F0': + return U"LATIN SMALL LETTER ETH"; + case U'\u00F1': + return U"LATIN SMALL LETTER N WITH TILDE"; + case U'\u00F2': + return U"LATIN SMALL LETTER O WITH GRAVE"; + case U'\u00F3': + return U"LATIN SMALL LETTER O WITH ACUTE"; + case U'\u00F4': + return U"LATIN SMALL LETTER O WITH CIRCUMFLEX"; + case U'\u00F5': + return U"LATIN SMALL LETTER O WITH TILDE"; + case U'\u00F6': + return U"LATIN SMALL LETTER O WITH DIAERESIS"; + case U'\u00F7': + return U"DIVISION SIGN"; + case U'\u00F8': + return U"LATIN SMALL LETTER O WITH STROKE"; + case U'\u00F9': + return U"LATIN SMALL LETTER U WITH GRAVE"; + case U'\u00FA': + return U"LATIN SMALL LETTER U WITH ACUTE"; + case U'\u00FB': + return U"LATIN SMALL LETTER U WITH CIRCUMFLEX"; + case U'\u00FC': + return U"U WITH TWO DOTS"; + case U'\u00FD': + return U"LATIN SMALL LETTER Y WITH ACUTE"; + case U'\u00FE': + return U"LATIN SMALL LETTER THORN"; + case U'\u00FF': + return U"LATIN SMALL LETTER Y WITH DIAERESIS"; + /* LATIN EXTENDED-A: */ + case U'\u0100': + return U"LATIN CAPITAL LETTER A WITH MACRON"; + case U'\u0101': + return U"LATIN SMALL LETTER A WITH MACRON"; + case U'\u0102': + return U"LATIN CAPITAL LETTER A WITH BREVE"; + case U'\u0103': + return U"LATIN SMALL LETTER A WITH BREVE"; + case U'\u0104': + return U"LATIN CAPITAL LETTER A WITH OGONEK"; + case U'\u0105': + return U"LATIN SMALL LETTER A WITH OGONEK"; + case U'\u0106': + return U"LATIN CAPITAL LETTER C WITH ACUTE"; + case U'\u0107': + return U"LATIN SMALL LETTER C WITH ACUTE"; + case U'\u0108': + return U"LATIN CAPITAL LETTER C WITH CIRCUMFLEX"; + case U'\u0109': + return U"LATIN SMALL LETTER C WITH CIRCUMFLEX"; + case U'\u010A': + return U"LATIN CAPITAL LETTER C WITH DOT ABOVE"; + case U'\u010B': + return U"LATIN SMALL LETTER C WITH DOT ABOVE"; + case U'\u010C': + return U"LATIN CAPITAL LETTER C WITH CARON"; + case U'\u010D': + return U"LATIN SMALL LETTER C WITH CARON"; + case U'\u010E': + return U"LATIN CAPITAL LETTER D WITH CARON"; + case U'\u010F': + return U"LATIN SMALL LETTER D WITH CARON"; + case U'\u0110': + return U"LATIN CAPITAL LETTER D WITH STROKE"; + case U'\u0111': + return U"LATIN SMALL LETTER D WITH STROKE"; + case U'\u0112': + return U"LATIN CAPITAL LETTER E WITH MACRON"; + case U'\u0113': + return U"LATIN SMALL LETTER E WITH MACRON"; + case U'\u0114': + return U"LATIN CAPITAL LETTER E WITH BREVE"; + case U'\u0115': + return U"LATIN SMALL LETTER E WITH BREVE"; + case U'\u0116': + return U"LATIN CAPITAL LETTER E WITH DOT ABOVE"; + case U'\u0117': + return U"LATIN SMALL LETTER E WITH DOT ABOVE"; + case U'\u0118': + return U"LATIN CAPITAL LETTER E WITH OGONEK"; + case U'\u0119': + return U"LATIN SMALL LETTER E WITH OGONEK"; + case U'\u011A': + return U"LATIN CAPITAL LETTER E WITH CARON"; + case U'\u011B': + return U"LATIN SMALL LETTER E WITH CARON"; + case U'\u011C': + return U"LATIN CAPITAL LETTER G WITH CIRCUMFLEX"; + case U'\u011D': + return U"LATIN SMALL LETTER G WITH CIRCUMFLEX"; + case U'\u011E': + return U"LATIN CAPITAL LETTER G WITH BREVE"; + case U'\u011F': + return U"LATIN SMALL LETTER G WITH BREVE"; + case U'\u0120': + return U"LATIN CAPITAL LETTER G WITH DOT ABOVE"; + case U'\u0121': + return U"LATIN SMALL LETTER G WITH DOT ABOVE"; + case U'\u0122': + return U"LATIN CAPITAL LETTER G WITH CEDILLA"; + case U'\u0123': + return U"LATIN SMALL LETTER G WITH CEDILLA"; + case U'\u0124': + return U"LATIN CAPITAL LETTER H WITH CIRCUMFLEX"; + case U'\u0125': + return U"LATIN SMALL LETTER H WITH CIRCUMFLEX"; + case U'\u0126': + return U"LATIN CAPITAL LETTER H WITH STROKE"; + case U'\u0127': + return U"LATIN SMALL LETTER H WITH STROKE"; + case U'\u0128': + return U"LATIN CAPITAL LETTER I WITH TILDE"; + case U'\u0129': + return U"LATIN SMALL LETTER I WITH TILDE"; + case U'\u012A': + return U"LATIN CAPITAL LETTER I WITH MACRON"; + case U'\u012B': + return U"LATIN SMALL LETTER I WITH MACRON"; + case U'\u012C': + return U"LATIN CAPITAL LETTER I WITH BREVE"; + case U'\u012D': + return U"LATIN SMALL LETTER I WITH BREVE"; + case U'\u012E': + return U"LATIN CAPITAL LETTER I WITH OGONEK"; + case U'\u012F': + return U"LATIN SMALL LETTER I WITH OGONEK"; + case U'\u0130': + return U"LATIN CAPITAL LETTER I WITH DOT ABOVE"; + case U'\u0131': + return U"LATIN SMALL LETTER DOTLESS I"; + case U'\u0132': + return U"LATIN CAPITAL LIGATURE IJ"; + case U'\u0133': + return U"LATIN SMALL LIGATURE IJ"; + case U'\u0134': + return U"LATIN CAPITAL LETTER J WITH CIRCUMFLEX"; + case U'\u0135': + return U"LATIN SMALL LETTER J WITH CIRCUMFLEX"; + case U'\u0136': + return U"LATIN CAPITAL LETTER K WITH CEDILLA"; + case U'\u0137': + return U"LATIN SMALL LETTER K WITH CEDILLA"; + case U'\u0138': + return U"LATIN SMALL LETTER KRA"; + case U'\u0139': + return U"LATIN CAPITAL LETTER L WITH ACUTE"; + case U'\u013A': + return U"LATIN SMALL LETTER L WITH ACUTE"; + case U'\u013B': + return U"LATIN CAPITAL LETTER L WITH CEDILLA"; + case U'\u013C': + return U"LATIN SMALL LETTER L WITH CEDILLA"; + case U'\u013D': + return U"LATIN CAPITAL LETTER L WITH CARON"; + case U'\u013E': + return U"LATIN SMALL LETTER L WITH CARON"; + case U'\u013F': + return U"LATIN CAPITAL LETTER L WITH MDDLE DOT"; + case U'\u0140': + return U"LATIN SMALL LETTER L WITH MIDDLE DOT"; + case U'\u0150': + return U"LATIN CAPITAL LETTER O WITH DOUBLE ACUTE"; + case U'\u0160': + return U"LATIN CAPITAL LETTER S WITH CARON"; + case U'\u0170': + return U"LATIN CAPITAL LETTER U WITH DOUBLE ACUTE"; + /* LATIN EXTENDED-B: */ + case U'\u0180': + return U"LATIN SMALL LETTER B WITH STROKE"; + case U'\u0190': + return U"LATIN CAPITAL LETTER OPEN E"; + case U'\u01A0': + return U"LATIN CAPITAL LETTER O WITH HORN"; + case U'\u01B0': + return U"LATIN SMALL LETTER U WITH HORN"; + case U'\u01C0': + return U"LATIN LETTER DENTAL CLICK"; + case U'\u01D0': + return U"LATIN SMALL LETTER I WITH CARON"; + case U'\u01E0': + return U"LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON"; + case U'\u01F0': + return U"LATIN SMALL LETTER J WITH CARON"; + case U'\u0200': + return U"LATIN CAPITAL LETTER A WITH DOUBLE GRAVE"; + case U'\u0210': + return U"LATIN CAPITAL LETTER R WITH DOUBLE GRAVE"; + case U'\u0220': + return U"LATIN CAPITAL LETTER N WITH LONG RIGHT LEG"; + case U'\u0230': + return U"LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON"; + case U'\u0240': + return U"LATIN SMALL LETTER Z WITH SWASH TAIL"; + /* IPA EXTENSIONS: */ + case U'\u0250': + return U"LATIN SMALL LETTER TURNED A"; + case U'\u0251': + return U"LATIN SMALL LETTER ALPHA"; + case U'\u0252': + return U"LATIN SMALL LETTER TURNED ALPHA"; + case U'\u0253': + return U"LATIN SMALL LETTER B WITH HOOK"; + case U'\u0254': + return U"LATIN SMALL LETTER OPEN O"; + case U'\u0255': + return U"LATIN SMALL LETTER C WITH CURL"; + case U'\u0256': + return U"LATIN SMALL LETTER D WITH TAIL"; + case U'\u0257': + return U"LATIN SMALL LETTER D WITH HOOK"; + case U'\u0258': + return U"LATIN SMALL LETTER REVERSED E"; + case U'\u0259': + return U"LATIN SMALL LETTER SCHWA"; + case U'\u025A': + return U"LATIN SMALL LETTER SCHWA WITH HOOK"; + case U'\u025B': + return U"LATIN SMALL LETTER OPEN E"; + case U'\u025C': + return U"LATIN SMALL LETTER REVERSED OPEN E"; + case U'\u025D': + return U"LATIN SMALL LETTER REVERSED OPEN E WITH HOOK"; + case U'\u025E': + return U"LATIN SMALL LETTER CLOSED REVERSED OPEN E"; + case U'\u025F': + return U"LATIN SMALL LETTER DOTLESS J WITH STROKE"; + case U'\u0260': + return U"LATIN SMALL LETTER G WITH HOOK"; + case U'\u0261': + return U"LATIN SMALL LETTER SCRIPT G"; + case U'\u0262': + return U"LATIN LETTER SMALL CAPITAL G"; + case U'\u0263': + return U"LATIN SMALL LETTER GAMMA"; + case U'\u0264': + return U"LATIN SMALL LETTER RAMS HORN"; + case U'\u0265': + return U"LATIN SMALL LETTER TURNED H"; + case U'\u0266': + return U"LATIN SMALL LETTER H WITH HOOK"; + case U'\u0267': + return U"LATIN SMALL LETTER HENG WITH HOOK"; + case U'\u0268': + return U"LATIN SMALL LETTER I WITH STROKE"; + case U'\u0269': + return U"LATIN SMALL LETTER IOTA"; + case U'\u026A': + return U"LATIN LETTER SMALL CAPITAL I"; + case U'\u026B': + return U"LATIN SMALL LETTER L WITH MIDDLE TILDE"; + case U'\u026C': + return U"LATIN SMALL LETTER L WITH BELT"; + case U'\u026D': + return U"LATIN SMALL LETTER L WITH RETROFLEX HOOK"; + case U'\u026E': + return U"LATIN SMALL LETTER LEZH"; + case U'\u026F': + return U"LATIN SMALL LETTER TURNED M"; + case U'\u0270': + return U"LATIN SMALL LETTER TURNED M WITH LONG LEG"; + case U'\u0271': + return U"LATIN SMALL LETTER M WITH HOOK"; + case U'\u0272': + return U"LATIN SMALL LETTER N WITH LEFT HOOK"; + case U'\u0273': + return U"LATIN SMALL LETTER N WITH RETROFLEX HOOK"; + case U'\u0274': + return U"LATIN LETTER SMALL CAPITAL N"; + case U'\u0275': + return U"LATIN SMALL LETTER BARRED O"; + case U'\u0276': + return U"LATIN LETTER SMALL CAPITAL OE"; + case U'\u0277': + return U"LATIN SMALL LETTER CLOSED OMEGA"; + case U'\u0278': + return U"LATIN SMALL LETTER PHI"; + case U'\u0279': + return U"LATIN SMALL LETTER TURNED R"; + case U'\u027A': + return U"LATIN SMALL LETTER TURNED R WITH LONG LEG"; + case U'\u027B': + return U"LATIN SMALL LETTER TURNED R WITH HOOK"; + case U'\u027C': + return U"LATIN SMALL LETTER R WITH LONG LEG"; + case U'\u027D': + return U"LATIN SMALL LETTER R WITH TAIL"; + case U'\u027E': + return U"LATIN SMALL LETTER R WITH FISHHOOK"; + case U'\u027F': + return U"LATIN SMALL LETTER REVERSED R WITH FISHHOOK"; + case U'\u0280': + return U"LATIN LETTER SMALL CAPITAL R"; + case U'\u0281': + return U"LATIN LETTER SMALL CAPITAL INVERTED R"; + case U'\u0282': + return U"LATIN SMALL LETTER S WITH HOOK"; + case U'\u0283': + return U"LATIN SMALL LETTER ESH"; + case U'\u0284': + return U"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK"; + case U'\u0285': + return U"LATIN SMALL LETTER SQUAT REVERSED ESH"; + case U'\u0286': + return U"LATIN SMALL LETTER SH WITH CURL"; + case U'\u0287': + return U"LATIN SMALL LETTER TURNED T"; + case U'\u0288': + return U"LATIN SMALL LETTER T WITH RETROFLEX HOOK"; + case U'\u0289': + return U"LATIN SMALL LETTER U BAR"; + case U'\u028A': + return U"LATIN SMALL LETTER UPSILON"; + case U'\u028B': + return U"LATIN SMALL LETTER V WTIH HOOK"; + case U'\u028C': + return U"LATIN SMALL LETTER TURNED V"; + case U'\u028D': + return U"LATIN SMALL LETTER TURNED W"; + case U'\u028E': + return U"LATIN SMALL LETTER TURNED Y"; + case U'\u028F': + return U"LATIN LETTER SMALL CAPITAL Y"; + case U'\u0290': + return U"LATIN SMALL LETTER Z WITH RETROFLEX HOOK"; + case U'\u0291': + return U"LATIN SMALL LETTER Z WITH RETROFLEX"; + case U'\u0292': + return U"LATIN SMALL LETTER EZH"; + case U'\u0293': + return U"LATIN SMALL LETTER EZH WITH CURL"; + case U'\u0294': + return U"LATIN LETTER GLOTTAL STOP"; + case U'\u0295': + return U"LATIN LETTER PHARYNGEAL VOICED FRICATIVE"; + case U'\u0296': + return U"LATIN LETTER INVERTED GLOTTAL STOP"; + case U'\u0297': + return U"LATIN LETTER STRETCHED C"; + case U'\u0298': + return U"LATIN LETTER BILABIAL CLICK"; + case U'\u0299': + return U"LATIN LETTER SMALL CAPITAL B"; + case U'\u029A': + return U"LATIN SMALL LETTER CLOSED OPEN E"; + case U'\u029B': + return U"LATIN LETTER SMALL CAPITAL G WITH HOOK"; + case U'\u029C': + return U"LATIN LETTER SMALL CAPITAL H"; + case U'\u029D': + return U"LATIN SMALL LETTER J WITH CROSSED-TAIL"; + case U'\u029E': + return U"LATIN SMALL LETTER TURNED K"; + case U'\u029F': + return U"LATIN LETTER SMALL CAPITAL L"; + case U'\u02A0': + return U"LATIN SMALL LETTER Q WITH HOOK"; + case U'\u02A1': + return U"LATIN LETTER GLOTTAL STOP WITH STROKE"; + case U'\u02A2': + return U"LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE"; + case U'\u02A3': + return U"LATIN SMALL LETTER DZ DIGRAPH"; + case U'\u02A4': + return U"LATIN SMALL LETTER DEZH DIGRAPH"; + case U'\u02A5': + return U"LATIN SMALL LETTER DZ DIGRAPH WITH CURL"; + case U'\u02A6': + return U"LATIN SMALL LETTER TS DIGRAPH"; + case U'\u02A7': + return U"LATIN SMALL LETTER TESH DIGRAPH"; + case U'\u02A8': + return U"LATIN SMALL LETTER TC DIGRAPH WITH CURL"; + case U'\u02A9': + return U"LATIN SMALL LETTER FENG DIGRAPH"; + case U'\u02AA': + return U"LATIN SMALL LETTER LS DIGRAPH"; + case U'\u02AB': + return U"LATIN SMALL LETTER LZ DIGRAPH"; + case U'\u02AC': + return U"LATIN LETTER BILABIAL PERCUSSIVE"; + case U'\u02AD': + return U"LATIN LETTER BIDENTAL PERCUSSIVE"; + case U'\u02AE': + return U"LATIN SMALL LETTER TURNED H WITH FISHHOOK"; + case U'\u02AF': + return U"LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL"; + /* SPACING MODIFIER LETTERS: */ + case U'\u02B0': + return U"MODIFIER LETTER SMALL H"; + case U'\u02B1': + return U"MODIFIER LETTER SMALL H WITH HOOK"; + case U'\u02B2': + return U"MODIFIER LETTER SMALL J"; + case U'\u02B3': + return U"MODIFIER LETTER SMALL R"; + case U'\u02B4': + return U"MODIFIER LETTER SMALL TURNED R"; + case U'\u02B5': + return U"MODIFIER LETTER SMALL TURNED R WITH HOOK"; + case U'\u02B6': + return U"MODIFIER LETTER SMALL CAPITAL INVERTED R"; + case U'\u02B7': + return U"MODIFIER LETTER SMALL W"; + case U'\u02B8': + return U"MODIFIER LETTER SMALL Y"; + case U'\u02B9': + return U"MODIFIER LETTER PRIME"; + case U'\u02BA': + return U"MODIFIER LETTER DOUBLE PRIME"; + case U'\u02BB': + return U"MODIFIER LETTER TURNED COMMA"; + case U'\u02BC': + return U"MODIFIER LETTER APOSTROPHE"; + case U'\u02BD': + return U"MODIFIER LETTER REVERSED COMMA"; + case U'\u02BE': + return U"MODIFIER LETTER RIGHT HALF RING"; + case U'\u02BF': + return U"MODIFIER LETTER LEFT HALF RING"; + case U'\u02C0': + return U"MODIFIER LETTER GLOTTAL STOP"; + case U'\u02C1': + return U"MODIFIER LETTER REVERSED GLOTTAL STOP"; + case U'\u02C2': + return U"MODIFIER LETTER LEFT ARROWHEAD"; + case U'\u02C3': + return U"MODIFIER LETTER RIGHT ARROWHEAD"; + case U'\u02C4': + return U"MODIFIER LETTER UP ARROWHEAD"; + case U'\u02C5': + return U"MODIFIER LETTER DOWN ARROWHEAD"; + case U'\u02C6': + return U"MODIFIER LETTER CIRCUMFLEX"; + case U'\u02C7': + return U"CARON"; + case U'\u02C8': + return U"MODIFIER LETTER VERTICAL LINE"; + case U'\u02C9': + return U"MODIFIER LETTER MACRON"; + case U'\u02CA': + return U"MODIFIER LETTER ACUTE ACCENT"; + case U'\u02CB': + return U"MODIFIER LETTER GRAVE ACCENT"; + case U'\u02CC': + return U"MODIFIER LETTER LOW VERTICAL LINE"; + case U'\u02CD': + return U"MODIFIER LETTER LOW MACRON"; + case U'\u02CE': + return U"MODIFIER LETTER LOW GRAVE ACCENT"; + case U'\u02CF': + return U"MODIFIER LETTER LOW ACUTE ACCENT"; + case U'\u02D0': + return U"MODIFIER LETTER TRIANGULAR COLON"; + case U'\u02D1': + return U"MODIFIER LETTER HALF TRIANGULAR COLON"; + case U'\u02D2': + return U"MODIFIER LETTER CENTRED RIGHT HALF RING"; + case U'\u02D3': + return U"MODIFIER LETTER CENTRED LEFT HALF RING"; + case U'\u02D4': + return U"MODIFIER LETTER UP TACK"; + case U'\u02D5': + return U"MODIFIER LETTER DOWN TACK"; + case U'\u02D6': + return U"MODIFIER LETTER PLUS SIGN"; + case U'\u02D7': + return U"MODIFIER LETTER MINUS SIGN"; + case U'\u02D8': + return U"BREVE"; + case U'\u02D9': + return U"DOT ABOVE"; + case U'\u02DA': + return U"RING ABOVE"; + case U'\u02DB': + return U"OGONEK"; + case U'\u02DC': + return U"SMALL TILDE"; + case U'\u02DD': + return U"DOUBLE ACUTE ACCENT"; + case U'\u02DE': + return U"MODIFIER LETTER RHOTIC HOOK"; + case U'\u02DF': + return U"MODIFIER LETTER CROSS ACCENT"; + case U'\u02E0': + return U"MODIFIER LETTER SMALL GAMMA"; + case U'\u02E1': + return U"MODIFIER LETTER SMALL L"; + case U'\u02E2': + return U"MODIFIER LETTER SMALL S"; + case U'\u02E3': + return U"MODIFIER LETTER SMALL X"; + case U'\u02E4': + return U"MODIFIER LETTER SMALL REVERSED GLOTTAL STOP"; + case U'\u02E5': + return U"MODIFIER LETTER EXTRA-HIGH TONE BAR"; + case U'\u02E6': + return U"MODIFIER LETTER HIGH TONE BAR"; + case U'\u02E7': + return U"MODIFIER LETTER MID TONE BAR"; + case U'\u02E8': + return U"MODIFIER LETTER LOW TONE BAR"; + case U'\u02E9': + return U"MODIFIER LETTER EXTRA-LOW TONE BAR"; + case U'\u02EA': + return U"MODIFIER LETTER YIN DEPARTING TONE MARK"; + case U'\u02EB': + return U"MODIFIER LETTER YANG DEPARTING TONE MARK"; + case U'\u02EC': + return U"MODIFIER LETTER VOICING"; + case U'\u02ED': + return U"MODIFIER LETTER UNASPIRATED"; + case U'\u02EE': + return U"MODIFIER LETTER DOUBLE APOSTROPHE"; + case U'\u02EF': + return U"MODIFIER LETTER LOW DOWN ARROWHEAD"; + case U'\u02F0': + return U"MODIFIER LETTER LOW UP ARROWHEAD"; + case U'\u02F1': + return U"MODIFIER LETTER LOW LEFT ARROWHEAD"; + case U'\u02F2': + return U"MODIFIER LETTER LOW RIGHT ARROWHEAD"; + case U'\u02F3': + return U"MODIFIER LETTER LOW RING"; + case U'\u02F4': + return U"MODIFIER LETTER MIDDLE GRAVE ACCENT"; + case U'\u02F5': + return U"MODIFIER LETTER MIDDLE DOUBLE GRAVE ACCENT"; + case U'\u02F6': + return U"MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT"; + case U'\u02F7': + return U"MODIFIER LETTER LOW TILDE"; + case U'\u02F8': + return U"MODIFIER LETTER RAISED COLON"; + case U'\u02F9': + return U"MODIFIER LETTER BEGIN HIGH TONE"; + case U'\u02FA': + return U"MODIFIER LETTER END HIGH TONE"; + case U'\u02FB': + return U"MODIFIER LETTER BEGIN LOW TONE"; + case U'\u02FC': + return U"MODIFIER LETTER END LOW TONE"; + case U'\u02FD': + return U"MODIFIER LETTER SHELF"; + case U'\u02FE': + return U"MODIFIER LETTER OPEN SHELF"; + case U'\u02FF': + return U"MODIFIER LETTER LOW LEFT ARROWHEAD"; + /* COMBINING DIACRITICAL MARKS: */ + case U'\u0300': + return U"COMBINING GRAVE ACCENT"; + case U'\u0301': + return U"COMBINING ACUTE ACCENT"; + case U'\u0302': + return U"COMBINING CIRCUMFLEX ACCENT"; + case U'\u0303': + return U"COMBINING TILDE"; + case U'\u0304': + return U"COMBINING MACRON"; + case U'\u0305': + return U"COMBINING OVERLINE"; + case U'\u0306': + return U"COMBINING BREVE"; + case U'\u0307': + return U"COMBINING DOT ABOVE"; + case U'\u0308': + return U"COMBINING DIAERESIS"; + case U'\u0309': + return U"COMBINING HOOK ABOVE"; + case U'\u030A': + return U"COMBINING RING ABOVE"; + case U'\u030B': + return U"COMBINING DOUBLE ACUTE ACCENT"; + case U'\u030C': + return U"COMBINING CARON"; + case U'\u030D': + return U"COMBINING VERTICAL LINE ABOVE"; + case U'\u030E': + return U"COMBINING DOUBLE VERTICAL LINE ABOVE"; + case U'\u030F': + return U"COMBINING DOUBLE GRAVE ACCENT"; + case U'\u0310': + return U"COMBINING CANDRABINDU"; + case U'\u0311': + return U"COMBINING INVERTED BREVE"; + case U'\u0312': + return U"COMBINING TURNED COMMA ABOVE"; + case U'\u0313': + return U"COMBINING COMMA ABOVE"; + case U'\u0314': + return U"COMBINING REVERSED COMMA ABOVE"; + case U'\u0315': + return U"COMBINING COMMA ABOVE RIGHT"; + case U'\u0316': + return U"COMBINING GRAVE ACCENT BELOW"; + case U'\u0317': + return U"COMBINING ACUTE ACCENT BELOW"; + case U'\u0318': + return U"COMBINING LEFT TACK BELOW"; + case U'\u0319': + return U"COMBINING RIGHT TACK BELOW"; + case U'\u031A': + return U"COMBINING LEFT ANGLE ABOVE"; + case U'\u031B': + return U"COMBINING HORN"; + case U'\u031C': + return U"COMBINING LEFT HALF RING BELOW"; + case U'\u031D': + return U"COMBINING UP TACK BELOW"; + case U'\u031E': + return U"COMBINING DOWN TACK BELOW"; + case U'\u031F': + return U"COMBINING PLUS SIGN BELOW"; + case U'\u0320': + return U"COMBINING MINUS SIGN BELOW"; + case U'\u0321': + return U"COMBINING PALATALIZED HOOK BELOW"; + case U'\u0322': + return U"COMBINING RETROFLEX HOOK BELOW"; + case U'\u0323': + return U"COMBINING DOT BELOW"; + case U'\u0324': + return U"COMBINING DIAERESIS BELOW"; + case U'\u0325': + return U"COMBINING RING BELOW"; + case U'\u0326': + return U"COMBINING COMMA BELOW"; + case U'\u0327': + return U"COMBINING CEDILLA"; + case U'\u0328': + return U"COMBINING OGONEK"; + case U'\u0329': + return U"COMBINING VERTICAL LINE BELOW"; + case U'\u032A': + return U"COMBINING BRDIGE BELOW"; + case U'\u032B': + return U"COMBINING INVERTED DOUBLE ARCH BELOW"; + case U'\u032C': + return U"COMBINING CARON BELOW"; + case U'\u032D': + return U"COMBINING CIRCUMFLEX ACCENT BELOW"; + case U'\u032E': + return U"COMBINING BREVE BELOW"; + case U'\u032F': + return U"COMBINING INVERTED BREVE BELOW"; + case U'\u0330': + return U"COMBINING TILDE BELOW"; + case U'\u0331': + return U"COMBINING MACRON BELOW"; + case U'\u0332': + return U"COMBINING LOW LINE"; + case U'\u0333': + return U"COMBINING DOUBLE LOW LINE"; + case U'\u0334': + return U"COMBINING TILDE OVERLAY"; + case U'\u0335': + return U"COMBINING SHORT STROKE OVERLAY"; + case U'\u0336': + return U"COMBINING LONG STROKE OVERLAY"; + case U'\u0337': + return U"COMBINING SHORT SOLIDUS OVERLAY"; + case U'\u0338': + return U"COMBINING LONG SOLIDUS OVERLAY"; + case U'\u0339': + return U"COMBINING RIGHT HALF RING BELOW"; + case U'\u033A': + return U"COMBINING INVERTED BRIDGE BELOW"; + case U'\u033B': + return U"COMBINING SQUARE BELOW"; + case U'\u033C': + return U"COMBINING SEAGULL BELOW"; + case U'\u033D': + return U"COMBINING X ABOVE"; + case U'\u033E': + return U"COMBINING VERTICAL TILDE"; + case U'\u033F': + return U"COMBINING DOUBLE OVERLINE"; + case U'\u0340': + return U"COMBINING GRAVE TONE MARK"; + case U'\u0341': + return U"COMBINING ACUTE TONE MARK"; + case U'\u0342': + return U"COMBINING GREEK PERISPOMENI"; + case U'\u0343': + return U"COMBINING GREEK KORONIS"; + case U'\u0344': + return U"COMBINING GREEK DIALYTIKA TONOS"; + case U'\u0345': + return U"COMBINING GREEK YPOGEGRAMMENI"; + case U'\u0346': + return U"COMBINING BRIDGE ABOVE"; + case U'\u0347': + return U"COMBINING EQUALS SIGN BELOW"; + case U'\u0348': + return U"COMBINING DOUBLE VERTICAL LINE BELOW"; + case U'\u0349': + return U"COMBINING LEFT ANGLE BELOW"; + case U'\u034A': + return U"COMBINING NOT TILDE ABOVE"; + case U'\u034B': + return U"COMBINING HOMOTHETIC ABOVE"; + case U'\u034C': + return U"COMBINING ALMOST EQUAL TO ABOVE"; + case U'\u034D': + return U"COMBINING LEFT RIGHT ARROW BELOW"; + case U'\u034E': + return U"COMBINING UPWARDS ARROW BELOW"; + case U'\u034F': + return U"COMBINING GRAPHEME JOINER"; + case U'\u0350': + return U"COMBINING RIGHT ARROWHEAD ABOVE"; + case U'\u0351': + return U"COMBINING LEFT HALF RING ABOVE"; + case U'\u0352': + return U"COMBINING FERMATA"; + case U'\u0353': + return U"COMBINING X BELOW"; + case U'\u0354': + return U"COMBINING LEFT ARROWHEAD BELOW"; + case U'\u0355': + return U"COMBINING RIGHT ARROWHEAD BELOW"; + case U'\u0356': + return U"COMBINING RIGHT ARROWHEAD AND UP ARROWHEAD BELOW"; + case U'\u0357': + return U"COMBINING RIGHT HALF RING ABOVE"; + case U'\u0358': + return U"COMBINING DOT ABOVE RIGHT"; + case U'\u0359': + return U"COMBINING ASTERISK BELOW"; + case U'\u035A': + return U"COMBINING DOUBLE RING BELOW"; + case U'\u035B': + return U"COMBINING ZIGZAG ABOVE"; + case U'\u035C': + return U"COMBINING DOUBLE BREVE BELOW"; + case U'\u035D': + return U"COMBINING DOUBLE BREVE"; + case U'\u035E': + return U"COMBINING DOUBLE MACRON"; + case U'\u035F': + return U"COMBINING DOUBLE MACRON BELOW"; + case U'\u0360': + return U"COMBINING DOUBLE TILDE"; + case U'\u0361': + return U"COMBINING DOUBLE INVERTED BREVE"; + case U'\u0362': + return U"COMBINING DOUBLE RIGHTWARDS ARROW BELOW"; + case U'\u0363': + return U"COMBINING LATIN SMALL LETTER A"; + case U'\u0364': + return U"COMBINING LATIN SMALL LETTER E"; + case U'\u0365': + return U"COMBINING LATIN SMALL LETTER I"; + case U'\u0366': + return U"COMBINING LATIN SMALL LETTER O"; + case U'\u0367': + return U"COMBINING LATIN SMALL LETTER U"; + case U'\u0368': + return U"COMBINING LATIN SMALL LETTER C"; + case U'\u0369': + return U"COMBINING LATIN SMALL LETTER D"; + case U'\u036A': + return U"COMBINING LATIN SMALL LETTER H"; + case U'\u036B': + return U"COMBINING LATIN SMALL LETTER M"; + case U'\u036C': + return U"COMBINING LATIN SMALL LETTER R"; + case U'\u036D': + return U"COMBINING LATIN SMALL LETTER T"; + case U'\u036E': + return U"COMBINING LATIN SMALL LETTER V"; + case U'\u036F': + return U"COMBINING LATIN SMALL LETTER X"; + /* GREEK AND COPTIC: */ + case U'\u0370': + return U"GREEK CAPITAL LETTER HETA"; + case U'\u0371': + return U"GREEK SMALL LETTER HETA"; + case U'\u0372': + return U"GREEK CAPITAL LETTER ARCHAIC SAMPI"; + case U'\u0373': + return U"GREEK SMALL LETTER ARCHAIC SAMPI"; + case U'\u0374': + return U"GREEK NUMERAL SIGN"; + case U'\u0375': + return U"GREEK LOWER NUMERAL SIGN"; + case U'\u0376': + return U"GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA"; + case U'\u0377': + return U"GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"; + case U'\u037A': + return U"GREEK YPOGEGRAMMENI"; + case U'\u037B': + return U"GREEK SMALL REVERSED LUNATE SIGMA SYMBOL"; + case U'\u037C': + return U"GREEK SMALL DOTTED LUNATE SIGMA SYMBOL"; + case U'\u037D': + return U"GREEK SMALL REVERSED DOTTED LUNATE SIGMAL SYMBOL"; + case U'\u037E': + return U"GREEK QUESTION MARK"; + case U'\u037F': + return U"GREEK CAPITAL LETTER YOT"; + case U'\u0384': + return U"GREEK TONOS"; + case U'\u0385': + return U"GREEK DIALYTIKA TONOS"; + case U'\u0386': + return U"GREEK CAPITAL LETTER ALPHA WITH TONOS"; + case U'\u0387': + return U"GREEK ANO TELEIA"; + case U'\u0388': + return U"GREEK CAPITAL LETTER EPSILON WITH TONOS"; + case U'\u0389': + return U"GREEK CAPITAL LETTER ETA WITH TONOS"; + case U'\u038A': + return U"GREEK CAPITAL LETTER IOTA WITH TONOS"; + case U'\u038C': + return U"GREEK CAPITAL LETTER OMICRON WITH TONOS"; + case U'\u038E': + return U"GREEK CAPITAL LETTER USPILON WITH TONOS"; + case U'\u038F': + return U"GREEK CAPITAL LETTER OMEGA WITH TONOS"; + case U'\u0390': + return U"GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"; + case U'\u0391': + return U"GREEK CAPITAL LETTER ALPHA"; + case U'\u0392': + return U"GREEK CAPITAL LETTER BETA"; + case U'\u0393': + return U"GREEK CAPITAL LETTER GAMMA"; + case U'\u0394': + return U"GREEK CAPITAL LETTER DELTA"; + case U'\u0395': + return U"GREEK CAPITAL LETTER EPSILON"; + case U'\u0396': + return U"GREEK CAPITAL LETTER ZETA"; + case U'\u0397': + return U"GREEK CAPITAL LETTER ETA"; + case U'\u0398': + return U"GREEK CAPITAL LETTER THETA"; + case U'\u0399': + return U"GREEK CAPITAL LETTER IOTA"; + case U'\u039A': + return U"GREEK CAPITAL LETTER KAPPA"; + case U'\u039B': + return U"GREEK CAPITAL LETTER LAMBDA"; + case U'\u039C': + return U"GREEK CAPITAL LETTER MU"; + case U'\u039D': + return U"GREEK CAPITAL LETTER NU"; + case U'\u039E': + return U"GREEK CAPITAL LETTER XI"; + case U'\u039F': + return U"GREEK CAPITAL LETTER OMICRON"; + case U'\u03A0': + return U"GREEK CAPITAL LETTER PI"; + case U'\u03A1': + return U"GREEK CAPITAL LETTER RHO"; + case U'\u03A3': + return U"GREEK CAPITAL LETTER SIGMA"; + case U'\u03A4': + return U"GREEK CAPITAL LETTER TAU"; + case U'\u03A5': + return U"GREEK CAPITAL LETTER UPSILON"; + case U'\u03A6': + return U"GREEK CAPITAL LETTER PHI"; + case U'\u03A7': + return U"GREEK CAPITAL LETTER CHI"; + case U'\u03A8': + return U"GREEK CAPITAL LETTER PSI"; + case U'\u03A9': + return U"GREEK CAPITAL LETTER OMEGA"; + case U'\u03AA': + return U"GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"; + case U'\u03AB': + return U"GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"; + case U'\u03AC': + return U"GREEK SMALL LETTER ALPHA WITH TONOS"; + case U'\u03AD': + return U"GREEK SMALL LETTER EPSILON WITH TONOS"; + case U'\u03AE': + return U"GREEK SMALL LETTER ETA WITH TONOS"; + case U'\u03AF': + return U"GREEK SMALL LETTER IOTA WITH TONOS"; + case U'\u03B0': + return U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"; + case U'\u03B1': + return U"GREEK SMALL LETTER ALPHA"; + case U'\u03B2': + return U"GREEK SMALL LETTER BETA"; + case U'\u03B3': + return U"GREEK SMALL LETTER GAMMA"; + case U'\u03B4': + return U"GREEK SMALL LETTER DELTA"; + case U'\u03B5': + return U"GREEK SMALL LETTER EPSILON"; + case U'\u03B6': + return U"GREEK SMALL LETTER ZETA"; + case U'\u03B7': + return U"GREEK SMALL LETTER ETA"; + case U'\u03B8': + return U"GREEK SMALL LETTER THETA"; + case U'\u03B9': + return U"GREEK SMALL LETTER IOTA"; + case U'\u03BA': + return U"GREEK SMALL LETTER KAPPA"; + case U'\u03BB': + return U"GREEK SMALL LETTER LAMBDA"; + case U'\u03BC': + return U"GREEK SMALL LETTER MU"; + case U'\u03BD': + return U"GREEK SMALL LETTER NU"; + case U'\u03BE': + return U"GREEK SMALL LETTER XI"; + case U'\u03BF': + return U"GREEK SMALL LETTER OMICRON"; + case U'\u03C0': + return U"GREEK SMALL LETTER PI"; + case U'\u03C1': + return U"GREEK SMALL LETTER RHO"; + case U'\u03C2': + return U"GREEK SMALL LETTER FINAL SIGMA"; + case U'\u03C3': + return U"GREEK SMALL LETTER SIGMA"; + case U'\u03C4': + return U"GREEK SMALL LETTER TAU"; + case U'\u03C5': + return U"GREEK SMALL LETTER UPSILON"; + case U'\u03C6': + return U"GREEK SMALL LETTER PHI"; + case U'\u03C7': + return U"GREEK SMALL LETTER CHI"; + case U'\u03C8': + return U"GREEK SMALL LETTER PSI"; + case U'\u03C9': + return U"GREEK SMALL LETTER OMEGA"; + case U'\u03CA': + return U"GREEK SMALL LETTER IOTA WITH DIALYTIKA"; + case U'\u03CB': + return U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA"; + case U'\u03CC': + return U"GREEK SMALL LETTER OMICRON WITH TONOS"; + case U'\u03CD': + return U"GREEK SMALL LETTER UPSILON WITH TONOS"; + case U'\u03CE': + return U"GREEK SMALL LETTER OMEGA WITH TONOS"; + case U'\u03CF': + return U"GREEK CAPITAL KAI SYMBOL"; + case U'\u03D0': + return U"GREEK BETA SYMBOL"; + case U'\u03D1': + return U"GREEK THETA SYMBOL"; + case U'\u03D2': + return U"GREEK UPSILON WITH HOOK SYMBOL"; + case U'\u03D3': + return U"GREEK UPSILON WITH ACUTE AND HOOK SYMBOL"; + case U'\u03D4': + return U"GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL"; + case U'\u03D5': + return U"GREEK PHI SYMBOL"; + case U'\u03D6': + return U"GREEK PI SYMBOL"; + case U'\u03D7': + return U"GREEK KAI SYMBOL"; + case U'\u03D8': + return U"GREEK LETTER ARCHAIC KOPPA"; + case U'\u03D9': + return U"GREEK SMALL LETTER ARCHAIC KOPPA"; + case U'\u03DA': + return U"GREEK LETTER STIGMA"; + case U'\u03DB': + return U"GREEK SMALL LETTER STIGMA"; + case U'\u03DC': + return U"GREEK LETTER DIGAMMA"; + case U'\u03DD': + return U"GREEK SMALL LETTER DIGAMMA"; + case U'\u03DE': + return U"GREEK LETTER KOPPA"; + case U'\u03DF': + return U"GREEK SMALL LETTER KOPPA"; + case U'\u03E0': + return U"GREEK LETTER SAMPI"; + case U'\u03F0': + return U"GREEK KAPPA SYMBOL"; + /* HEBREW: */ + case U'\u05D0': + return U"HEBREW LETTER ALEF"; + case U'\u05D1': + return U"HEBREW LETTER BET"; + case U'\u05D2': + return U"HEBREW LETTER GIMEL"; + case U'\u05D3': + return U"HEBREW LETTER DALET"; + case U'\u05D4': + return U"HEBREW LETTER HE"; + case U'\u05D5': + return U"HEBREW LETTER VAV"; + case U'\u05D6': + return U"HEBREW LETTER ZAYIN"; + case U'\u05D7': + return U"HEBREW LETTER HET"; + case U'\u05D8': + return U"HEBREW LETTER TET"; + case U'\u05D9': + return U"HEBREW LETTER YOD"; + case U'\u05DA': + return U"HEBREW LETTER FINAL KAF"; + case U'\u05DB': + return U"HEBREW LETTER KAF"; + case U'\u05DC': + return U"HEBREW LETTER LAMED"; + case U'\u05DD': + return U"HEBREW LETTER FINAL MEM"; + case U'\u05DE': + return U"HEBREW LETTER MEM"; + case U'\u05DF': + return U"HEBREW LETTER FINAL NUN"; + case U'\u05E0': + return U"HEBREW LETTER NUN"; + case U'\u05E1': + return U"HEBREW LETTER SAMEKH"; + case U'\u05E2': + return U"HEBREW LETTER AYIN"; + case U'\u05E3': + return U"HEBREW LETTER FINAL PE"; + case U'\u05E4': + return U"HEBREW LETTER PE"; + case U'\u05E5': + return U"HEBREW LETTER FINAL TSADI"; + case U'\u05E6': + return U"HEBREW LETTER TSADI"; + case U'\u05E7': + return U"HEBREW LETTER QOF"; + case U'\u05E8': + return U"HEBREW LETTER RESH"; + case U'\u05E9': + return U"HEBREW LETTER SHIN"; + case U'\u05EA': + return U"HEBREW LETTER TAV"; + case U'\u05EF': + return U"HEBREW YOD TRIANGLE"; + /* CYRILLIC: */ + case U'\u0400': + return U"CYRILLIC CAPITAL LETTER LE WITH GRAVE"; + case U'\u0401': + return U"CYRILLIC CAPITAL LETTER LO"; + case U'\u0402': + return U"CYRILLIC CAPITAL LETTER DJE"; + case U'\u0403': + return U"CYRILLIC CAPITAL LETTER GJE"; + case U'\u0404': + return U"CYRILLIC CAPITAL LETTER UKRAINIAN LE"; + case U'\u0405': + return U"CYRILLIC CAPITAL LETTER DZE"; + case U'\u0406': + return U"CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I"; + case U'\u0407': + return U"CYRILLIC CAPITAL LETTER YI"; + case U'\u0408': + return U"CYRILLIC CAPITAL LETTER JE"; + case U'\u0409': + return U"CYRILLIC CAPITAL LETTER LJE"; + case U'\u040A': + return U"CYRILLIC CAPITAL LETTER NJE"; + case U'\u040B': + return U"CYRILLIC CAPITAL LETTER TSHE"; + case U'\u040C': + return U"CYRILLIC CAPITAL LETTER KJE"; + case U'\u040D': + return U"CYRILLIC CAPITAL LETTER I WITH GRAVE"; + case U'\u040E': + return U"CYRILLIC CAPITAL LETTER SHORT U"; + case U'\u040F': + return U"CYRILLIC CAPITAL LETTER DZHE"; + case U'\u0410': + return U"CYRILLIC CAPITAL LETTER A"; + case U'\u0420': + return U"CYRILLIC CAPITAL LETTER ER"; + case U'\u0430': + return U"CYRILLIC SMALL LETTER A"; + case U'\u0440': + return U"CYRILLIC SMALL LETTER ER"; + case U'\u0450': + return U"CYRILLIC SMALL LETTER LE WITH GRAVE"; + case U'\u0460': + return U"CYRILLIC CAPITAL LETTER OMEGA"; + case U'\u0470': + return U"CYRILLIC CAPITAL LETTER PSI"; + case U'\u0480': + return U"CYRILLIC CAPITAL LETTER KOPPA"; + case U'\u0490': + return U"CYRILLIC CAPITAL LETTER GHE WITH UPTURN"; + case U'\u04A0': + return U"CYRILLIC CAPITAL LETTER BASHKIR KA"; + case U'\u04B0': + return U"CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE"; + case U'\u04C0': + return U"CYRILLIC LETTER PALOCHKA"; + case U'\u04D0': + return U"CYRILLIC CAPITAL LETTER A WITH BREVE"; + case U'\u04E0': + return U"CYRILLIC CAPITAL LETTER ABKHASIAN DZE"; + case U'\u04F0': + return U"CYRILLIC CAPITAL LETTER U WITH DIAERESIS"; + /* SYRIAC SUPPLEMENT: */ + case U'\u0860': + return U"SYRIAC LETTER MALAYALAM NGA"; + case U'\u0861': + return U"SYRIAC LETTER MALAYALAM JA"; + case U'\u0862': + return U"SYRIAC LETTER MALAYALAM NYA"; + case U'\u0863': + return U"SYRIAC LETTER MALAYALAM TTA"; + case U'\u0864': + return U"SYRIAC LETTER MALAYALAM NNA"; + case U'\u0865': + return U"SYRIAC LETTER MALAYALAM NNNA"; + case U'\u0866': + return U"SYRIAC LETTER MALAYALAM BHA"; + case U'\u0867': + return U"SYRIAC LETTER MALAYALAM RA"; + case U'\u0868': + return U"SYRIAC LETTER MALAYALAM LLA"; + case U'\u0869': + return U"SYRIAC LETTER MALAYALAM LLLA"; + case U'\u086A': + return U"SYRIAC LETTER MALAYALAM SSA"; + /* RUNIC: */ + case U'\u16A0': + return U"RUNIC LETTER FEHU FEOH FE F"; + case U'\u16A1': + return U"RUNIC LETTER V"; + case U'\u16A2': + return U"RUNIC LETTER URUZ UR U"; + case U'\u16A3': + return U"RUNIC LETTER YR"; + case U'\u16A4': + return U"RUNIC LETTER Y"; + case U'\u16A5': + return U"RUNIC LETTER W"; + case U'\u16A6': + return U"RUNIC LETTER THURISAZ THURS THORN"; + case U'\u16A7': + return U"RUNIC LETTER ETH"; + case U'\u16A8': + return U"RUNIC LETTER ANSUZ A"; + case U'\u16A9': + return U"RUNIC LETTER OS O"; + case U'\u16AA': + return U"RUNIC LETTER AC A"; + case U'\u16AB': + return U"RUNIC LETTER AESC"; + case U'\u16AC': + return U"RUNIC LETTER LONG-BRANCHED-OSS O"; + case U'\u16AD': + return U"RUNIC LETTER SHORT-TWIG-OSS O"; + case U'\u16AE': + return U"RUNIC LETTER O"; + case U'\u16AF': + return U"RUNIC LETTER OE"; + case U'\u16B0': + return U"RUNIC LETTER ON"; + case U'\u16C0': + return U"RUNIC LETTER DOTTED-N"; + case U'\u16D0': + return U"RUNIC LETTER SHORT-TWIG-TYR T"; + case U'\u16E0': + return U"RUNIC LETTER EAR"; + case U'\u16F0': + return U"RUNIC BELGTHOR SYMBOL"; + /* CYRILLIC EXTENDED C: */ + case U'\u1C80': + return U"CYRILLIC SMALL LETTER ROUNDED VE"; + case U'\u1C81': + return U"CYRILLIC SMALL LETTER LONG-LEGGED DE"; + case U'\u1C82': + return U"CYRILLIC SMALL LETTER NARROW O"; + case U'\u1C83': + return U"CYRILLIC SMALL LETTER WIDE ES"; + case U'\u1C84': + return U"CYRILLIC SMALL LETTER TALL TE"; + case U'\u1C85': + return U"CYRILLIC SMALL LETTER THREE-LEGGED TE"; + case U'\u1C86': + return U"CYRILLIC SMALL LETTER TALL HARD SIGN"; + case U'\u1C87': + return U"CYRILLIC SMALL LETTER TALL YAT"; + case U'\u1C88': + return U"CYRILLIC SMALL LETTER UNBLENDED UK"; + /* GENERAL PUNCTUATION: */ + case U'\u2000': + return U"EN QUAD"; + case U'\u2001': + return U"EM QUAD"; + case U'\u2002': + return U"EN SPACE"; + case U'\u2003': + return U"EM SPACE"; + case U'\u2004': + return U"THREE-PER-EM SPACE"; + case U'\u2005': + return U"FOUR-PER-EM SPACE"; + case U'\u2006': + return U"SIX-PER-EM SPACE"; + case U'\u2007': + return U"FIGURE SPACE"; + case U'\u2008': + return U"PUNCTUATION SPACE"; + case U'\u2009': + return U"THIN SPACE"; + case U'\u200A': + return U"HAIR SPACE"; + case U'\u203C': + return U"DOUBLE EXCLAMATION MARK"; + case U'\u2047': + return U"DOUBLE QUOTATION MARK"; + case U'\u2048': + return U"QUESTION EXCLAMATION MARK"; + case U'\u2049': + return U"EXCLAMATION QUESTION MARK"; + /* CURRENCY SYMBOLS: */ + case U'\u20A0': + return U"EURO-CURRENCY SIGN"; + case U'\u20A1': + return U"COLON SIGN"; + case U'\u20A2': + return U"CRUZEIRO SIGN"; + case U'\u20A3': + return U"FRENCH FRANC SIGN"; + case U'\u20A4': + return U"LIRA SIGN"; + case U'\u20A5': + return U"MILL SIGN"; + case U'\u20A6': + return U"NAIRA SIGN"; + case U'\u20A7': + return U"PESETA SIGN"; + case U'\u20A8': + return U"RUPEE SIGN"; + case U'\u20A9': + return U"WON SIGN"; + case U'\u20AA': + return U"NEW SHEQEL SIGN"; + case U'\u20AB': + return U"DONG SIGN"; + case U'\u20AC': + return U"EURO SIGN"; + case U'\u20AD': + return U"KIP SIGN"; + case U'\u20AE': + return U"TUGRIK SIGN"; + case U'\u20AF': + return U"DRACHMA SIGN"; + case U'\u20B0': + return U"GERMAN PENNY SIGN"; + case U'\u20B1': + return U"PESO SIGN"; + case U'\u20B2': + return U"GUARANI SIGN"; + case U'\u20B3': + return U"AUSTRAL SIGN"; + case U'\u20B4': + return U"HRYVNIA SIGN"; + case U'\u20B5': + return U"CEDI SIGN"; + case U'\u20B6': + return U"LIVRE TOURNOIS SIGN"; + case U'\u20B7': + return U"SPESMILO SIGN"; + case U'\u20B8': + return U"TENGE SIGN"; + case U'\u20BA': + return U"TURKISH LIRA SIGN"; + case U'\u20BB': + return U"NORDIC MARK SIGN"; + case U'\u20BC': + return U"MANAT SIGN"; + case U'\u20BD': + return U"RUBLE SYMBOL"; + case U'\u20BE': + return U"LARI SIGN"; + case U'\u20BF': + return U"BITCOIN SIGN"; + /* LETTERLIKE SYMBOLS: */ + case U'\u2100': + return U"ACCOUNT OF"; + case U'\u2101': + return U"ADRESSED TO THE SUBJECT"; + case U'\u2102': + return U"DOUBLE-STRUCK CAPITAL C"; + case U'\u2103': + return U"DEGREE CELSIUS"; + case U'\u2104': + return U"CENTRE LINE SYMBOL"; + case U'\u2105': + return U"CARE OF"; + case U'\u2106': + return U"CADA UNA"; + case U'\u2107': + return U"EULER CONSTANT"; + case U'\u2108': + return U"SCRUPLE"; + case U'\u2109': + return U"DEGREE FAHRENHEIT"; + case U'\u210A': + return U"SCRIPT SMALL G"; + case U'\u210B': + return U"SCRIPT CAPITAL H"; + case U'\u210C': + return U"BLACK-LETTER CAPITAL H"; + case U'\u210D': + return U"DOUBLE-STRUCK CAPITAL H"; + case U'\u210E': + return U"PLANCK CONSTANT"; + case U'\u210F': + return U"PLANCK CONSTANT OVER TWO PI"; + case U'\u2110': + return U"SCRIPT CAPITAL I"; + case U'\u2111': + return U"BLACK-LETTER CAPITAL I"; + case U'\u2112': + return U"SCRIPT CAPITAL L"; + case U'\u2113': + return U"SCRIPT SMALL L"; + case U'\u2114': + return U"L B BAR SYMBOL"; + case U'\u2115': + return U"DOUBLE-STRUCK CAPITAL N"; + case U'\u2116': + return U"NUMERO SIGN"; + case U'\u2117': + return U"SOUND RECORDING COPYRIGHT"; + case U'\u2118': + return U"SCRIPT CAPITAL P"; + case U'\u2119': + return U"DOUBLE-STRUCK CAPITAL P"; + case U'\u211A': + return U"DOUBLE-STRUCK CAPITAL Q"; + case U'\u211B': + return U"SCRIPT CAPITAL R"; + case U'\u211C': + return U"BLACK-LETTER CAPITAL R"; + case U'\u211D': + return U"DOUBLE-STRUCK CAPITAL R"; + case U'\u211E': + return U"PRESCRIPTION TAKE"; + case U'\u211F': + return U"RESPONSE"; + case U'\u2120': + return U"SERVICE MARK"; + case U'\u2121': + return U"TELEPHONE SIGN"; + case U'\u2122': + return U"TRADE MARK SIGN"; + case U'\u2123': + return U"VERSICLE"; + case U'\u2124': + return U"DOUBLE-STRUCK CAPITAL Z"; + case U'\u2125': + return U"OUNCE SIGN"; + case U'\u2126': + return U"OHM SIGN"; + case U'\u2127': + return U"INVERTED OHM SIGN"; + case U'\u2128': + return U"BLACK-LETTER CAPITAL Z"; + case U'\u2129': + return U"TURNED GREEK SMALL LETTER IOTA"; + case U'\u212A': + return U"KELVIN SIGN"; + case U'\u212B': + return U"ANGSTROM SIGN"; + case U'\u212C': + return U"SCRIPT CAPITAL B"; + case U'\u212D': + return U"BLACK-LETTER CAPITAL C"; + case U'\u212E': + return U"ESTIMATED SYMBOL"; + case U'\u212F': + return U"SCRIPT SMALL E"; + case U'\u2130': + return U"SCRIPT CAPITAL E"; + case U'\u2131': + return U"SCRIPT CAPITAL F"; + case U'\u2132': + return U"TURNED CAPITAL F"; + case U'\u2133': + return U"SCRIPT CAPITAL M"; + case U'\u2134': + return U"SCRIPT SMALL O"; + case U'\u2135': + return U"ALEF SYMBOL"; + case U'\u2136': + return U"BET SYMBOL"; + case U'\u2137': + return U"GIMEL SYMBOL"; + case U'\u2138': + return U"DALET SYMBOL"; + case U'\u2139': + return U"INFORMATION SOURCE"; + case U'\u213A': + return U"ROTATED CAPITAL Q"; + case U'\u213B': + return U"FACSIMILE SIGN"; + case U'\u213C': + return U"DOUBLE-STRUCK SMALL PI"; + case U'\u213D': + return U"DOUBLE-STRUCK SMALL GAMMA"; + case U'\u213E': + return U"DOUBLE-STRUCK CAPITAL GAMMA"; + case U'\u213F': + return U"DOUBLE-STRUCK CAPITAL PI"; + case U'\u2140': + return U"DOUBLE-STRUCK N-ARY SUMMATION"; + case U'\u2141': + return U"TURNED SANS-SERIF CAPITAL G"; + case U'\u2142': + return U"TURNED SANS-SERIF CAPITAL L"; + case U'\u2143': + return U"REVERSED SANS-SERIF CAPITAL L"; + case U'\u2144': + return U"TURNED SANS-SERIF CAPITAL Y"; + case U'\u2145': + return U"DOUBLE-STRUCK ITALIC CAPITAL D"; + case U'\u2146': + return U"DOUBLE-STRUCK ITALIC SMALL D"; + case U'\u2147': + return U"DOUBLE-STRUCK ITALIC SMALL E"; + case U'\u2148': + return U"DOUBLE-STRUCK ITALIC SMALL I"; + case U'\u2149': + return U"DOUBLE-STRUCK ITALIC SMALL J"; + case U'\u214A': + return U"PROPERTY LINE"; + case U'\u214B': + return U"TURNED AMPERSAND"; + case U'\u214C': + return U"PER SIGN"; + case U'\u214D': + return U"AKTIESELSKAB"; + case U'\u214E': + return U"TURNED SMALL F"; + case U'\u214F': + return U"SYMBOL FOR SAMARITAN SOURCE"; + /* NUMBER FORMS: */ + case U'\u2150': + return U"VULGAR FRACTION ONE SEVENTH"; + case U'\u2151': + return U"VULGAR FRACTION ONE NINTH"; + case U'\u2152': + return U"VULGAR FRACTION ONE TENTH"; + case U'\u2153': + return U"VULGAR FRACTION ONE THIRD"; + case U'\u2154': + return U"VULGAR FRACTION TWO THIRDS"; + case U'\u2155': + return U"VULGAR FRACTION ONE FIFTH"; + case U'\u2156': + return U"VULGAR FRACTION TWO FIFTHS"; + case U'\u2157': + return U"VULGAR FRACTION THREE FIFTHS"; + case U'\u2158': + return U"VULGAR FRACTION FOUR FIFTHS"; + case U'\u2159': + return U"VULGAR FRACTION ONE SIXTH"; + case U'\u215A': + return U"VULGAR FRACTION FIVE SIXTHS"; + case U'\u215B': + return U"VULGAR FRACTION ONE EIGTH"; + case U'\u215C': + return U"VULGAR FRACTION THREE EIGTHS"; + case U'\u215D': + return U"VULGAR FRACTION FIVE EIGHTS"; + case U'\u215E': + return U"VULGAR FRACTION SEVEN EIGTHS"; + case U'\u215F': + return U"FRACTION NUMERATOR ONE"; + case U'\u2160': + return U"ROMAN NUMERAL ONE"; + case U'\u2161': + return U"ROMAN NUMERAL TWO"; + case U'\u2162': + return U"ROMAN NUMERAL THREE"; + case U'\u2163': + return U"ROMAN NUMERAL FOUR"; + case U'\u2164': + return U"ROMAN NUMERAL FIVE"; + case U'\u2165': + return U"ROMAN NUMERAL SIX"; + case U'\u2166': + return U"ROMAN NUMERAL SEVEN"; + case U'\u2167': + return U"ROMAN NUMERAL EIGHT"; + case U'\u2168': + return U"ROMAN NUMERAL NINE"; + case U'\u2169': + return U"ROMAN NUMERAL TEN"; + case U'\u216A': + return U"ROMAN NUMERAL ELEVEN"; + case U'\u216B': + return U"ROMAN NUMERAL TWELVE"; + case U'\u216C': + return U"ROMAN NUMERAL FIFTY"; + case U'\u216D': + return U"ROMAN NUMERAL ONE HUNDRED"; + case U'\u216E': + return U"ROMAN NUMERAL FIVE HUNDRED"; + case U'\u216F': + return U"ROMAN NUMERAL ONE THOUSAND"; + case U'\u2170': + return U"SMALL ROMAN NUMERAL ONE"; + case U'\u2171': + return U"SMALL ROMAN NUMERAL TWO"; + case U'\u2172': + return U"SMALL ROMAN NUMERAL THREE"; + case U'\u2173': + return U"SMALL ROMAN NUMERAL FOUR"; + case U'\u2174': + return U"SMALL ROMAN NUMERAL FIVE"; + case U'\u2175': + return U"SMALL ROMAN NUMERAL SIX"; + case U'\u2176': + return U"SMALL ROMAN NUMERAL SEVEN"; + case U'\u2177': + return U"SMALL ROMAN NUMERAL EIGHT"; + case U'\u2178': + return U"SMALL ROMAN NUMERAL NINE"; + case U'\u2179': + return U"SMALL ROMAN NUMERAL TEN"; + case U'\u217A': + return U"SMALL ROMAN NUMERAL ELEVEN"; + case U'\u217B': + return U"SMALL ROMAN NUMERAL TWELVE"; + case U'\u217C': + return U"SMALL ROMAN NUMERAL FIFTY"; + case U'\u217D': + return U"SMALL ROMAN NUMERAL ONE HUNDRED"; + case U'\u217E': + return U"SMALL ROMAN NUMERAL FIVE HUNDRED"; + case U'\u217F': + return U"SMALL ROMAN NUMERAL ONE THOUSAND"; + case U'\u2180': + return U"ROMAN NUMERAL ONE THOUSAND C D"; + case U'\u2181': + return U"ROMAN NUMERAL FIVE THOUSAND"; + case U'\u2182': + return U"ROMAN NUMERAL TEN THOUSAND"; + case U'\u2183': + return U"ROMAN NUMERAL REVERSED ONE HUNDRED"; + case U'\u2184': + return U"LATIN SMALL LETTER REVERSED C"; + case U'\u2185': + return U"ROMAN NUMERAL SIX LATE FORM"; + case U'\u2186': + return U"ROMAN NUMERAL FIFTY EARLY FORM"; + case U'\u2187': + return U"ROMAN NUMERAL FIFTY THOUSAND"; + case U'\u2188': + return U"ROMAN NUMERAL ONE HUNDRED THOUSAND"; + case U'\u2189': + return U"VULGAR FRACTION ZERO THIRDS"; + case U'\u218A': + return U"TURNED DIGIT TWO"; + case U'\u218B': + return U"TURNED DIGIT THREE"; + /* MISCELLANEOUS SYMBOLS: */ + case U'\u2630': + return U"TRIGRAM FOR HEAVEN"; + case U'\u2631': + return U"TRIGRAM FOR LAKE"; + case U'\u2632': + return U"TRIGRAM FOR FIRE"; + case U'\u2633': + return U"TRIGRAM FOR THUNDER"; + case U'\u2634': + return U"TRIGRAM FOR WIND"; + case U'\u2635': + return U"TRIGRAM FOR WATER"; + case U'\u2636': + return U"TRIGRAM FOR MOUNTAIN"; + case U'\u2637': + return U"TRIGRAM FOR EARTH"; + case U'\u2638': + return U"WHEEL OF DHARMA"; + case U'\u2639': + return U"WHITE FROWNING FACE"; + case U'\u263A': + return U"WHITE SMILING FACE"; + case U'\u263B': + return U"BLACK SMILING FACE"; + case U'\u263C': + return U"WHITE SUN WITH RAYS"; + case U'\u263D': + return U"FIRST QUARTER MOON"; + case U'\u263E': + return U"LAST QUARTER MOON"; + case U'\u263F': + return U"MERCURY"; + case U'\u2640': + return U"FEMALE SIGN"; + case U'\u2641': + return U"EARTH"; + case U'\u2642': + return U"MALE SIGN"; + case U'\u2643': + return U"JUPITER"; + case U'\u2644': + return U"SATURN"; + case U'\u2645': + return U"URANUS"; + case U'\u2646': + return U"NEPTUNE"; + case U'\u2647': + return U"PLUTO"; + case U'\u2648': + return U"ARIES"; + case U'\u2649': + return U"TAURUS"; + case U'\u264A': + return U"GEMNINI"; + case U'\u264B': + return U"CANCER"; + case U'\u264C': + return U"LEO"; + case U'\u264D': + return U"VIRGO"; + case U'\u264E': + return U"LIBRA"; + case U'\u264F': + return U"SCORPIUS"; + case U'\u2650': + return U"SAGITTARIUS"; + case U'\u2651': + return U"CAPRICORN"; + case U'\u2652': + return U"AQUARIUS"; + case U'\u2653': + return U"PISCES"; + case U'\u2654': + return U"WHITE CHESS KING"; + case U'\u2655': + return U"WHITE CHESS QUEEN"; + case U'\u2656': + return U"WHITE CHESS ROOK"; + case U'\u2657': + return U"WHITE CHESS BISHOP"; + case U'\u2658': + return U"WHITE CHESS KNIGHT"; + case U'\u2659': + return U"WHITE CHESS PAWN"; + case U'\u265A': + return U"BLACK CHESS KING"; + case U'\u265B': + return U"BLACK CHESS QUEEN"; + case U'\u265C': + return U"BLACK CHESS ROOK"; + case U'\u265D': + return U"BLACK CHESS BISHOP"; + case U'\u265E': + return U"BLACK CHESS KNIGHT"; + case U'\u265F': + return U"BLACK CHESS PAWN"; + case U'\u2660': + return U"BLACK SPADE SUIT"; + case U'\u2661': + return U"WHITE HEART SUIT"; + case U'\u2662': + return U"WHITE DIAMOND SUIT"; + case U'\u2663': + return U"BLACK CLUB SUIT"; + case U'\u2664': + return U"WHITE SPADE SUIT"; + case U'\u2665': + return U"BLACK HEART SUIT"; + case U'\u2666': + return U"BLACK DIAMOND SUIT"; + case U'\u2667': + return U"WHITE CLUB SUIT"; + case U'\u2668': + return U"HOT SPRINGS"; + case U'\u2669': + return U"QUARTER NOTE"; + case U'\u266A': + return U"EIGHT NOTE"; + case U'\u266B': + return U"BEAMED EIGTH NOTES"; + case U'\u266C': + return U"BEAMED SIXTEENTH NOTES"; + case U'\u266D': + return U"MUSIC FLAT SIGN"; + case U'\u266E': + return U"MUSIC NEUTRAL SIGN"; + case U'\u266F': + return U"MUSIC SHARP SIGN"; + case U'\u2670': + return U"WEST SYRIAC CROSS"; + case U'\u2671': + return U"EAST SYRIAC CROSS"; + case U'\u2672': + return U"UNIVERSAL RECYCLING SYMBOL"; + case U'\u2673': + return U"RECYCLING SYMBOL FOR TYPE-1 PLASTICS"; + case U'\u2674': + return U"RECYCLING SYMBOL FOR TYPE-2 PLASTICS"; + case U'\u2675': + return U"RECYCLING SYMBOL FOR TYPE-3 PLASTICS"; + case U'\u2676': + return U"RECYCLING SYMBOL FOR TYPE-4 PLASTICS"; + case U'\u2677': + return U"RECYCLING SYMBOL FOR TYPE-5 PLASTICS"; + case U'\u2678': + return U"RECYCLING SYMBOL FOR TYPE-6 PLASTICS"; + case U'\u2679': + return U"RECYCLING SYMBOL FOR TYPE-7 PLASTICS"; + case U'\u267A': + return U"RECYCLING SYMBOL FOR GENERIC MATERIALS"; + case U'\u267B': + return U"BLACK UNIVERSAL RECYCLING SYMBOL"; + case U'\u267C': + return U"RECYCLED PAPER SYMBOL"; + case U'\u267D': + return U"PARTIALLY-RECYCLED PAPER SYMBOL"; + case U'\u267E': + return U"PERMANENT PAPER SIGN"; + case U'\u267F': + return U"WHEELCHAIR SYMBOL"; + case U'\u26B9': + return U"SEXTILE"; + /* DINGBATS: */ + case U'\u271D': + return U"LATIN CROSS"; + case U'\u2721': + return U"STAR OF DAVID"; + /* SUPPLEMENTAL PUNCTUATION: */ + case U'\u2E3B': + return U"THREE-EM DASH"; + /* ARABIC PRESENTATION FORMS-A: */ + case U'\uFDFD': + return U"ARABIC LIGATURE BISMILLAH AL-RAHMAN AR-RAHEEM"; + /* ANCIENT SYMBOLS: */ + case U'\U00010190': + return U"ROMAN SEXTANS SIGN"; + case U'\U00010191': + return U"ROMAN UNCIA SIGN"; + case U'\U00010192': + return U"ROMAN SEMUNCIA SIGN"; + case U'\U00010193': + return U"ROMAN SEXTULA SIGN"; + case U'\U00010194': + return U"ROMAN DIMIDIA SEXTULA SIGN"; + case U'\U00010195': + return U"ROMAN SILIQUA SIGN"; + case U'\U00010196': + return U"ROMAN DENARIUS SIGN"; + case U'\U00010197': + return U"ROMAN QUINARIUS SIGN"; + case U'\U00010198': + return U"ROMAN SESTERTIUS SIGN"; + case U'\U00010199': + return U"ROMAN DUPONDIUS SIGN"; + case U'\U0001019A': + return U"ROMAN AS SIGN"; + case U'\U0001019B': + return U"ROMAN CENTURIAL SIGN"; + case U'\U0001019C': + return U"ASCIA SIGN"; + /* BRAHMI: */ + case U'\U00011066': + return U"BRAHMI DIGIT ZERO"; + case U'\U00011067': + return U"BRAHMI DIGIT ONE"; + case U'\U00011068': + return U"BRAHMI DIGIT TWO"; + case U'\U00011069': + return U"BRAHMI DIGIT THREE"; + case U'\U0001106A': + return U"BRAHMI DIGIT FOUR"; + case U'\U0001106B': + return U"BRAHMI DIGIT FIVE"; + case U'\U0001106C': + return U"BRAHMI DIGIT SIX"; + case U'\U0001106D': + return U"BRAHMI DIGIT SEVEN"; + case U'\U0001106E': + return U"BRAHMI DIGIT EIGHT"; + case U'\U0001106F': + return U"BRAHMI DIGIT NINE"; + /* CUNEIFORM: */ + case U'\U00012031': + return U"CUNEIFORM SIGN AN PLUS NAGA SQUARED"; + /* CUNEIFORM NUMBERS AND PUNCTUATION: */ + case U'\U0001242B': + return U"CUNEIFORM NUMERIC SIGN NINE SHAR2"; + /* EGYPTIAN HIEROGLYPHS: */ + case U'\U000130B8': + return U"EGYPTIAN HIEROGLYPH D052"; + /* COUNTING ROD NUMERALS: */ + case U'\U0001D372': + return U"IDEOGRAPHIC TALLY MARK ONE"; + case U'\U0001D373': + return U"IDEOGRAPHIC TALLY MARK TWO"; + case U'\U0001D374': + return U"IDEOGRAPHIC TALLY MARK THREE"; + case U'\U0001D375': + return U"IDEOGRAPHIC TALLY MARK FOUR"; + case U'\U0001D376': + return U"IDEOGRAPHIC TALLY MARK FIVE"; + case U'\U0001D377': + return U"TALLY MARK ONE"; + case U'\U0001D378': + return U"TALLY MARK FIVE"; + /* ENCLOSED ALPHANUMERIC SUPPLEMENT: */ + case U'\U0001F10D': + return U"CIRCLED ZERO WITH SLASH"; + case U'\U0001F10E': + return U"CIRCLED ANTICKLOCKWISE ARROW"; + case U'\U0001F10F': + return U"CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH"; + case U'\U0001F12F': + return U"COPYLEFT SYMBOL"; + case U'\U0001F16D': + return U"CIRCLED CC"; + case U'\U0001F16E': + return U"CIRCLED C WITH OVERLAID BACKSLASH"; + case U'\U0001F16F': + return U"CIRCLED HUMAN FIGURE"; + /* EMOTICONS: */ + case U'\U0001F600': + return U"GRINNING FACE"; + case U'\U0001F601': + return U"GRINNING FACE WITH SMIRKING EYES"; + case U'\U0001F602': + return U"FACE WITH TEARS OF JOY"; + case U'\U0001F603': + return U"SMILING FACE WITH OPEN MOUTH"; + case U'\U0001F604': + return U"SMILING FACE WITH OPEN MOUTH AND SMILING EYES"; + case U'\U0001F605': + return U"SMILING FACE WITH OPEN MOUTH AND COULD SWEAT"; + case U'\U0001F606': + return U"SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES"; + case U'\U0001F607': + return U"SMILING FACE WITH HALO"; + case U'\U0001F608': + return U"SMILING FACE WITH HORNS"; + case U'\U0001F609': + return U"WINKING FACE"; + case U'\U0001F60A': + return U"SMILING FACE WITH SMILING EYES"; + case U'\U0001F60B': + return U"FACE SAVOURING DELICIOUS FOOD"; + case U'\U0001F60C': + return U"RELIEVED FACE"; + case U'\U0001F60D': + return U"SMILLING FACE HEART-SHAPED EYES"; + case U'\U0001F60E': + return U"SMILLING FACE WITH SUNGLASSES"; + case U'\U0001F60F': + return U"SMIRKING FACE"; + case U'\U0001F610': + return U"NEUTRAL FACE"; + case U'\U0001F611': + return U"EXPRESSIONLESS FACE"; + case U'\U0001F612': + return U"UNAMUSED FACE"; + case U'\U0001F613': + return U"FACE WITH COLD SWEAT"; + case U'\U0001F614': + return U"PENSIVE FACE"; + case U'\U0001F615': + return U"CONFUSED FACE"; + case U'\U0001F616': + return U"CONFOUNDED FACE"; + case U'\U0001F617': + return U"KISSING FACE"; + case U'\U0001F618': + return U"FACE THROWING A KISS"; + case U'\U0001F619': + return U"KISSING FACE WITH SMILLING EYES"; + case U'\U0001F61A': + return U"KISSING FACE WITH CLOSED EYES"; + case U'\U0001F61B': + return U"FACE WITH STUCK-OUT TONGUE"; + case U'\U0001F61C': + return U"FACE WITH STUCK-OUT TONGUE AND WINKING EYE"; + case U'\U0001F61D': + return U"FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES"; + case U'\U0001F61E': + return U"DISSAPOINTED FACE"; + case U'\U0001F61F': + return U"WORRIED FACE"; + case U'\U0001F620': + return U"ANGRY FACE"; + case U'\U0001F621': + return U"POUTING FACE"; + case U'\U0001F622': + return U"CRYING FACE"; + case U'\U0001F623': + return U"PERSEVERING FACE"; + case U'\U0001F624': + return U"FACE WITH LOOK OF TRIUMPH"; + case U'\U0001F625': + return U"DISSAPOINTED BUT RELIEVED FACE"; + case U'\U0001F626': + return U"FROWNING FACE WITH OPEN MOUTH"; + case U'\U0001F627': + return U"ANGUISHED FACE"; + case U'\U0001F628': + return U"FEARFUL FACE"; + case U'\U0001F629': + return U"WEARY FACE"; + case U'\U0001F62A': + return U"SLEEPY FACE"; + case U'\U0001F62B': + return U"TIRED FACE"; + case U'\U0001F62C': + return U"GRIMACING FACE"; + case U'\U0001F62D': + return U"LOUDLY CRYING FACE"; + case U'\U0001F62E': + return U"FACE WITH OPEN MOUTH"; + case U'\U0001F62F': + return U"HUSHED FACE"; + case U'\U0001F630': + return U"FACE WITH OPEN MOUTH AND COLD SWEAT"; + case U'\U0001F631': + return U"FACE SCREAMING IN FEAR"; + case U'\U0001F632': + return U"ASTONISHED FACE"; + case U'\U0001F633': + return U"FLUSHED FACE"; + case U'\U0001F634': + return U"SLEEPING FACE"; + case U'\U0001F635': + return U"DIZZY FACE"; + case U'\U0001F636': + return U"FACE WITHOUT MOUTH"; + case U'\U0001F637': + return U"FACE WITH MEDICAL MASK"; + case U'\U0001F641': + return U"SLIGHTLY FROWNING FACE"; + case U'\U0001F642': + return U"SLIGHTLY SMILING FACE"; + case U'\U0001F643': + return U"UPSIDE-DOWN FACE"; + case U'\U0001F644': + return U"FACE WITH ROLLING EYES"; + /* ORNAMENTAL DINGBATS: */ + case U'\U0001F670': + return U"SCRIPT LIGATURE ET ORNAMENT"; + case U'\U0001F671': + return U"HEAVY SCRIPT LIGATURE ET ORNAMENT"; + case U'\U0001F672': + return U"LIGATURE OPEN ET ORNAMENT"; + case U'\U0001F673': + return U"HEAVY LIGATURE OPEN ET ORNAMENT"; + case U'\U0001F674': + return U"HEAVY AMPERSAND ORNAMENT"; + case U'\U0001F675': + return U"SWASH AMPERSAND ORNAMENT"; + case U'\U0001F676': + return U"SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT"; + case U'\U0001F677': + return U"SANS-SERIF HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT"; + case U'\U0001F678': + return U"SANS-SERIF HEAVY LOW DOUBLE QUOTATION MARK ORNAMENT"; + case U'\U0001F679': + return U"HEAVY INTERROBANG ORNAMENT"; + case U'\U0001F67A': + return U"SANS-SERIF INTERROBANG ORNAMENT"; + case U'\U0001F67B': + return U"HEAVY SANS-SERIF INTERROBANG ORNAMENT"; + case U'\U0001F67C': + return U"VERY HEAVY SOLIDUS"; + case U'\U0001F67D': + return U"VERY HEAVY REVERSE SOLIDUS"; + case U'\U0001F67E': + return U"CHECKER BOARD"; + case U'\U0001F67F': + return U"REVERSE CHECKER BOARD"; + /* CJK UNIFIED IDEOGRAPHS EXTENSION G: */ + case U'\U0003106C': + return U"CJK UNIFIED IDEOGRAPH-3106C"; + /* TAGS: */ + case U'\U000E0001': + return U"LANGUAGE TAG"; + case U'\U000E0020': + return U"TAG SPACE"; + case U'\U000E0021': + return U"TAG EXCLAMATION MARK"; + case U'\U000E0022': + return U"TAG QUOTATION MARK"; + case U'\U000E0023': + return U"TAG NUMBER SIGN"; + case U'\U000E0024': + return U"TAG DOLLAR SIGN"; + case U'\U000E0025': + return U"TAG PERCENT SIGN"; + case U'\U000E0026': + return U"TAG AMPERSAND"; + case U'\U000E0027': + return U"TAG APOSTROPHE"; + case U'\U000E0028': + return U"TAG LEFT PARANTHESIS"; + case U'\U000E0029': + return U"TAG RIGHT PARANTHESIS"; + case U'\U000E002A': + return U"TAG ASTERISK"; + case U'\U000E002B': + return U"TAG PLUS SIGN"; + case U'\U000E002C': + return U"TAG COMMA"; + case U'\U000E002D': + return U"TAG HYPHEN-MINUS"; + case U'\U000E002E': + return U"TAG FULL STOP"; + case U'\U000E002F': + return U"TAG SOLIDUS"; + case U'\U000E0030': + return U"TAG DIGIT ZERO"; + case U'\U000E0031': + return U"TAG DIGIT ONE"; + case U'\U000E0032': + return U"TAG DIGIT TWO"; + case U'\U000E0033': + return U"TAG DIGIT THREE"; + case U'\U000E0034': + return U"TAG DIGIT FOUR"; + case U'\U000E0035': + return U"TAG DIGIT FIVE"; + case U'\U000E0036': + return U"TAG DIGIT SIX"; + case U'\U000E0037': + return U"TAG DIGIT SEVEN"; + case U'\U000E0038': + return U"TAG DIGIT EIGHT"; + case U'\U000E0039': + return U"TAG DIGIT NINE"; + case U'\U000E003A': + return U"TAG COLON"; + case U'\U000E003B': + return U"TAG SEMICOLON"; + case U'\U000E003C': + return U"TAG LESS-THAN SIGN"; + case U'\U000E003D': + return U"TAG EQUALS SIGN"; + case U'\U000E003E': + return U"TAG GREATER-THAN SIGN"; + case U'\U000E003F': + return U"TAG QUESTION MARK"; + case U'\U000E0040': + return U"TAG COMMERCIAL AT"; + case U'\U000E0041': + return U"TAG LATIN CAPITAL LETTER A"; + case U'\U000E0042': + return U"TAG LATIN CAPITAL LETTER B"; + case U'\U000E0043': + return U"TAG LATIN CAPITAL LETTER C"; + case U'\U000E0044': + return U"TAG LATIN CAPITAL LETTER D"; + case U'\U000E0045': + return U"TAG LATIN CAPITAL LETTER E"; + case U'\U000E0046': + return U"TAG LATIN CAPITAL LETTER F"; + case U'\U000E0047': + return U"TAG LATIN CAPITAL LETTER G"; + case U'\U000E0048': + return U"TAG LATIN CAPITAL LETTER H"; + case U'\U000E0049': + return U"TAG LATIN CAPITAL LETTER I"; + case U'\U000E004A': + return U"TAG LATIN CAPITAL LETTER J"; + case U'\U000E004B': + return U"TAG LATIN CAPITAL LETTER K"; + case U'\U000E004C': + return U"TAG LATIN CAPITAL LETTER L"; + case U'\U000E004D': + return U"TAG LATIN CAPITAL LETTER M"; + case U'\U000E004E': + return U"TAG LATIN CAPITAL LETTER N"; + case U'\U000E004F': + return U"TAG LATIN CAPITAL LETTER O"; + case U'\U000E0050': + return U"TAG LATIN CAPITAL LETTER P"; + case U'\U000E0051': + return U"TAG LATIN CAPITAL LETTER Q"; + case U'\U000E0052': + return U"TAG LATIN CAPITAL LETTER R"; + case U'\U000E0053': + return U"TAG LATIN CAPITAL LETTER S"; + case U'\U000E0054': + return U"TAG LATIN CAPITAL LETTER T"; + case U'\U000E0055': + return U"TAG LATIN CAPITAL LETTER U"; + case U'\U000E0056': + return U"TAG LATIN CAPITAL LETTER V"; + case U'\U000E0057': + return U"TAG LATIN CAPITAL LETTER W"; + case U'\U000E0058': + return U"TAG LATIN CAPITAL LETTER X"; + case U'\U000E0059': + return U"TAG LATIN CAPITAL LETTER Y"; + case U'\U000E005A': + return U"TAG LATIN CAPITAL LETTER Z"; + case U'\U000E005B': + return U"TAG LEFT SQUARE BRACKET"; + case U'\U000E005C': + return U"TAG REVERSE SOLIDUS"; + case U'\U000E005D': + return U"TAG RIGHT SQUARE BRACKET"; + case U'\U000E005E': + return U"TAG CIRCUMFLEX ACCENT"; + case U'\U000E005F': + return U"TAG LOW LINE"; + case U'\U000E0060': + return U"TAG GRAVE ACCENT"; + case U'\U000E0061': + return U"TAG LATIN SMALL LETTER A"; + case U'\U000E0062': + return U"TAG LATIN SMALL LETTER B"; + case U'\U000E0063': + return U"TAG LATIN SMALL LETTER C"; + case U'\U000E0064': + return U"TAG LATIN SMALL LETTER D"; + case U'\U000E0065': + return U"TAG LATIN SMALL LETTER E"; + case U'\U000E0066': + return U"TAG LATIN SMALL LETTER F"; + case U'\U000E0067': + return U"TAG LATIN SMALL LETTER G"; + case U'\U000E0068': + return U"TAG LATIN SMALL LETTER H"; + case U'\U000E0069': + return U"TAG LATIN SMALL LETTER I"; + case U'\U000E006A': + return U"TAG LATIN SMALL LETTER J"; + case U'\U000E006B': + return U"TAG LATIN SMALL LETTER K"; + case U'\U000E006C': + return U"TAG LATIN SMALL LETTER L"; + case U'\U000E006D': + return U"TAG LATIN SMALL LETTER M"; + case U'\U000E006E': + return U"TAG LATIN SMALL LETTER N"; + case U'\U000E006F': + return U"TAG LATIN SMALL LETTER O"; + case U'\U000E0070': + return U"TAG LATIN SMALL LETTER P"; + case U'\U000E0071': + return U"TAG LATIN SMALL LETTER Q"; + case U'\U000E0072': + return U"TAG LATIN SMALL LETTER R"; + case U'\U000E0073': + return U"TAG LATIN SMALL LETTER S"; + case U'\U000E0074': + return U"TAG LATIN SMALL LETTER T"; + case U'\U000E0075': + return U"TAG LATIN SMALL LETTER U"; + case U'\U000E0076': + return U"TAG LATIN SMALL LETTER V"; + case U'\U000E0077': + return U"TAG LATIN SMALL LETTER W"; + case U'\U000E0078': + return U"TAG LATIN SMALL LETTER X"; + case U'\U000E0079': + return U"TAG LATIN SMALL LETTER Y"; + case U'\U000E007A': + return U"TAG LATIN SMALL LETTER Z"; + case U'\U000E007B': + return U"TAG LEFT CURLY BRACKET"; + case U'\U000E007C': + return U"TAG VERTICAL LINE"; + case U'\U000E007D': + return U"TAG RIGHT CURLY BRACKET"; + case U'\U000E007E': + return U"TAG TILDE"; + case U'\U000E007F': + return U"CANCEL TAG"; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf b/u8c/include/u8c/utf new file mode 100644 index 0000000..15bdc44 --- /dev/null +++ b/u8c/include/u8c/utf @@ -0,0 +1,51 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_xtmbVPu5vGoJz4tw) +#define u8c_key_xtmbVPu5vGoJz4tw + +#include <u8c/arr> + +namespace u8c { + template<u8c::utf T,u8c::utf T0> [[nodiscard]] constexpr auto cnv( T0 const * begin,T0 const * end) -> u8c::arr<T>; + [[nodiscard,u8c_attr_const]] constexpr auto isalnum( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto isalpha( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto iscntrl( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto isdigit( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto islower( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto ispunct( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto isspace( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto issurro( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto isupper( char32_t chr) -> bool; + [[nodiscard,u8c_attr_const]] constexpr auto isxdigit(char32_t chr) -> bool; +} + +#include <u8c/utf.d/cnv> +#include <u8c/utf.d/isalnum> +#include <u8c/utf.d/isalpha> +#include <u8c/utf.d/iscntrl> +#include <u8c/utf.d/isdigit> +#include <u8c/utf.d/islower> +#include <u8c/utf.d/ispunct> +#include <u8c/utf.d/isspace> +#include <u8c/utf.d/issurro> +#include <u8c/utf.d/isupper> +#include <u8c/utf.d/isxdigit> + +#endif diff --git a/u8c/include/u8c/utf.d/cnv b/u8c/include/u8c/utf.d/cnv new file mode 100644 index 0000000..95b66e9 --- /dev/null +++ b/u8c/include/u8c/utf.d/cnv @@ -0,0 +1,116 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_nVkgRbXZfcq3BG8J) +#define u8c_key_nVkgRbXZfcq3BG8J + +#include <algorithm> /* std::copy */ +#include <cstddef> /* u8c::size */ +#include <stdexcept> /* std::invalid_argument, std::out_of_range */ +#include <type_traits> /* std::is_same_v */ +#include <vector> /* std::vector */ + +template<u8c::utf T,u8c::utf T0> constexpr auto u8c::cnv(T0 const * const u8c_restr _begin,T0 const * const u8c_restr _end) -> u8c::arr<T> { + if (_begin == nullptr || _end == nullptr) [[unlikely]] { + throw std::invalid_argument("Null pointer provided as parameter."); + } + u8c::arr<T0> in(_begin,_end); + u8c::arr<T> out; + if constexpr (std::is_same_v<T0,T>) { + out.alloc(static_cast<u8c::size>(_end - _begin)); + std::copy(_begin,_end,out.begin()); + return out; + } + else { + if constexpr (std::is_same_v<T0,char16_t>) { + } + else if constexpr (std::is_same_v<T0,char32_t>) { + for (auto const tmp : in) { + if constexpr (std::is_same_v<T,char16_t>) { + if (tmp >= u8c_uint32c(0x10000)) { /* Two hextets. */ + char16_t const tmp0 = tmp - u8c_uint16c(0x10000); + out.app((tmp0 / u8c_uint16c(0x400) + u8c_uint16c(0xD800))); + out.app((tmp0 % u8c_uint16c(0x400) + u8c_uint16c(0xDC00))); + } + else { + /* One hextet. */ + out.app((static_cast<char16_t>(tmp))); + } + } + else { + if (tmp >= u8c_uint32c(0x10000)) { /* Four octets. */ + out.app((u8c_ubytec(0b11110000) + static_cast<char8_t>(tmp >> u8c_uint32c(0x12) & u8c_uint32c(0b00000111)))); + out.app((u8c_ubytec(0b10000000) + static_cast<char8_t>(tmp >> u8c_uint32c(0xC) & u8c_uint32c(0b00111111)))); + out.app((u8c_ubytec(0b10000000) + static_cast<char8_t>(tmp >> u8c_uint32c(0x6) & u8c_uint32c(0b00111111)))); + out.app((u8c_ubytec(0b10000000) + static_cast<char8_t>(tmp & u8c_uint32c(0b00111111)))); + } + else if (tmp >= U'\u0800') { /* Three octets. */ + out.app((u8c_ubytec(0xE0) + static_cast<char8_t>(tmp >> u8c_uint32c(0xC) & u8c_uint32c(0b00001111)))); + out.app((u8c_ubytec(0x80) + static_cast<char8_t>(tmp >> u8c_uint32c(0x6) & u8c_uint32c(0b00111111)))); + out.app((u8c_ubytec(0x80) + static_cast<char8_t>(tmp & u8c_uint32c(0b00111111)))); + } + else if (tmp >= U'\u0080') { /* Two octets. */ + out.app((u8c_ubytec(0xC0) + static_cast<char8_t>(tmp >> u8c_uint32c(0x6) & u8c_uint32c(0b00111111)))); + out.app((u8c_ubytec(0x80) + static_cast<char8_t>(tmp & u8c_uint32c(0b00111111)))); + } + else { + /* One octet. */ + out.app(static_cast<char8_t>(tmp)); + } + } + } + return out; + } + else { + if constexpr (std::is_same_v<T,char16_t>) { + } + else { + for (u8c::size n = 0x0uz;n < in.sz();n += 0x1uz) { + auto const tmp = in[n]; + auto chr = U'\u0000'; + if (tmp >= u8c_ubytec(0b11110000)) { /* Four octets. */ + chr = (tmp ^ u8c_uint32c(0b11110000)) << u8c_uint32c(0x12); + chr += (in[n + 0x1uz] ^ u8c_uint32c(0b10000000)) << u8c_uint32c(0xC); + chr += (in[n + 0x2uz] ^ u8c_uint32c(0b10000000)) << u8c_uint32c(0x6); + chr += in[n + 0x3uz] ^ u8c_uint32c(0b10000000); + n += 0x3uz; + } + else if (tmp >= u8c_ubytec(0b11100000)) { /* Three octets. */ + chr = (tmp ^ u8c_uint32c(0b11100000)) << u8c_uint32c(0xC); + chr += (in[n + 0x1uz] ^ u8c_uint32c(0b10000000)) << u8c_uint32c(0x6); + chr += in[n + 0x2uz] ^ u8c_uint32c(0b10000000); + n += 0x2uz; + } + else if (tmp >= u8c_ubytec(0b11000000)) { /* Two octets. */ + chr = (tmp ^ u8c_uint32c(0b11000000)) << u8c_uint32c(0x6); + chr += in[n + 0x1uz] ^ u8c_uint32c(0b10000000); + n += 0x1uz; + } + else { + /* One octet. */ + chr = tmp; + } + out.app(chr); + } + } + } + } +} + +#endif diff --git a/u8c/include/u8c/utf.d/isalnum b/u8c/include/u8c/utf.d/isalnum new file mode 100644 index 0000000..0960e57 --- /dev/null +++ b/u8c/include/u8c/utf.d/isalnum @@ -0,0 +1,190 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_C8fUI0HFQi6fZDUx) +#define u8c_key_C8fUI0HFQi6fZDUx + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isalpha(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + if(u8c::islower(_chr) || u8c::isupper(_chr)) [[unlikely]] { + return true; + } + switch(_chr) { + [[likely]] default: + return false; + case U'\u0297': /* LATIN LETTER GLOTTAL STOP */ + [[fallthrough]]; + case U'\u16A0': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16A1': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16A2': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16A3': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16A4': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16A5': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16A6': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16A7': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16A8': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16A9': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16AA': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16AB': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16AC': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16AD': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16AE': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16AF': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16B0': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16B1': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16B2': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16B3': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16B4': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16B5': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16B6': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16B7': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16B8': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16B9': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16BA': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16BB': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16BC': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16BD': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16BE': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16BF': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16C0': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16C1': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16C2': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16C3': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16C4': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16C5': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16C6': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16C7': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16C8': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16C9': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16CA': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16CB': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16CC': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16CD': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16CE': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16CF': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16D0': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16D1': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16D2': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16D3': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16D4': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16D5': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16D6': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16D7': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16D8': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16D9': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16DA': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16DB': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16DC': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16DD': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16DE': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16DF': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16E0': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16E1': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16E2': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16E3': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16E4': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16E5': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16E6': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16E7': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16E8': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16E9': /* RUNIC LETTER FEHU FEOH FE F */ + [[fallthrough]]; + case U'\u16EA': /* RUNIC LETTER FEHU FEOH FE F */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isalpha b/u8c/include/u8c/utf.d/isalpha new file mode 100644 index 0000000..3a0bb9d --- /dev/null +++ b/u8c/include/u8c/utf.d/isalpha @@ -0,0 +1,29 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_YnTiCcefC1wLH21w) +#define u8c_key_YnTiCcefC1wLH21w + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isalnum(char32_t const _chr) -> bool { + return u8c::isalpha(_chr) || u8c::isdigit(_chr); +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/iscntrl b/u8c/include/u8c/utf.d/iscntrl new file mode 100644 index 0000000..083bf52 --- /dev/null +++ b/u8c/include/u8c/utf.d/iscntrl @@ -0,0 +1,180 @@ +// --C++-- +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_KkxufRi4dPQDAbxV) +#define u8c_key_KkxufRi4dPQDAbxV + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::iscntrl(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + case U'\u0000': /* NULL */ + [[fallthrough]]; + case U'\u0001': /* START OF HEADING */ + [[fallthrough]]; + case U'\u0002': /* START OF TEXT */ + [[fallthrough]]; + case U'\u0003': /* END OF TEXT */ + [[fallthrough]]; + case U'\u0004': /* END OF TRANSMISSION */ + [[fallthrough]]; + case U'\u0005': /* ENQUIRY */ + [[fallthrough]]; + case U'\u0006': /* ACKNOWLEDGE */ + [[fallthrough]]; + case U'\a': /* BELL */ + [[fallthrough]]; + case U'\b': /* BACKSPACE */ + [[fallthrough]]; + case U'\t': /* HORIZONTAL TABULATION */ + [[fallthrough]]; + case U'\n': /* NEW LINE */ + [[fallthrough]]; + case U'\v': /* VERTICAL TABULATION */ + [[fallthrough]]; + case U'\f': /* FORM FEED */ + [[fallthrough]]; + case U'\r': /* CARRIAGE RETURN */ + [[fallthrough]]; + case U'\u000E': /* SHIFT OUT */ + [[fallthrough]]; + case U'\u000F': /* SHIFT IN */ + [[fallthrough]]; + case U'\x10': /* DATA LINK ESCAPE */ + [[fallthrough]]; + case U'\x11': /* DEVICE CONTROL ONE */ + [[fallthrough]]; + case U'\x12': /* DEVICE CONTROL TWO */ + [[fallthrough]]; + case U'\x13': /* DEVICE CONTROL THREE */ + [[fallthrough]]; + case U'\x14': /* DEVICE CONTROL FOUR */ + [[fallthrough]]; + case U'\x15': /* NEGATIVE ACKNOWLEDGE */ + [[fallthrough]]; + case U'\x16': /* SYNCHRONOUS IDLE */ + [[fallthrough]]; + case U'\x17': /* END OF TRANSMISSION BLOCK */ + [[fallthrough]]; + case U'\x18': /* CANCEL */ + [[fallthrough]]; + case U'\x19': /* END OF MEDIUM */ + [[fallthrough]]; + case U'\x1A': /* SUBSTITUTE */ + [[fallthrough]]; + case U'\u001B': /* ESCAPE */ + [[fallthrough]]; + case U'\x1C': /* FILE SEPERATOR */ + [[fallthrough]]; + case U'\x1D': /* GROUP SEPERATOR */ + [[fallthrough]]; + case U'\x1E': /* RECORD SEPERATOR */ + [[fallthrough]]; + case U'\x1F': /* UNIT SEPERATOR */ + [[fallthrough]]; + case U'\x7F': /* DELETE */ + [[fallthrough]]; + case U'\x80': /* <CONTROL> */ + [[fallthrough]]; + case U'\x81': /* <CONTROL */ + [[fallthrough]]; + case U'\x82': /* BREAK PERMITTED HERE */ + [[fallthrough]]; + case U'\x83': /* NO BREAK HERE */ + [[fallthrough]]; + case U'\x84': /* INDEX */ + [[fallthrough]]; + case U'\x85': /* NEXT LINE */ + [[fallthrough]]; + case U'\x86': /* START OF SELECTED AREA */ + [[fallthrough]]; + case U'\x87': /* END OF SELECTED AREA */ + [[fallthrough]]; + case U'\x88': /* CHARACTER TABULATION SET */ + [[fallthrough]]; + case U'\x89': /* CHARACTER TABULATION SET WITH JUSTIFICATION */ + [[fallthrough]]; + case U'\x8A': /* LINE TABULATION SET */ + [[fallthrough]]; + case U'\x8B': /* PARTIAL LINE FORWARD */ + [[fallthrough]]; + case U'\x8C': /* PARTIAL LINE BACKWARD */ + [[fallthrough]]; + case U'\x8D': /* REVERSE LINE FEED */ + [[fallthrough]]; + case U'\x8E': /* SINGLE SHIFT TWO */ + [[fallthrough]]; + case U'\x8F': /* SINGLE SHIFT THREE */ + [[fallthrough]]; + case U'\x90': /* DEVICE CONTROL STRING */ + [[fallthrough]]; + case U'\x91': /* PRIVATE USE ONE */ + [[fallthrough]]; + case U'\x92': /* PRIVATE USE TWO */ + [[fallthrough]]; + case U'\x93': /* SET TRANSMIT STATE */ + [[fallthrough]]; + case U'\x94': /* CANCEL CHARACTER */ + [[fallthrough]]; + case U'\x95': /* MESSAGE WAITING */ + [[fallthrough]]; + case U'\x96': /* START OF GUARDED AREA */ + [[fallthrough]]; + case U'\x97': /* END OF GUARDED AREA */ + [[fallthrough]]; + case U'\x98': /* START OF STRING */ + [[fallthrough]]; + case U'\x99': /* <CONTROL> */ + [[fallthrough]]; + case U'\x9A': /* SINGLE CHARACTER INTRODUCER */ + [[fallthrough]]; + case U'\x9B': /* CONTROL SEQUENCE INTRODUCER */ + [[fallthrough]]; + case U'\x9C': /* STRING TERMINATOR */ + [[fallthrough]]; + case U'\x9D': /* OPERATING SYSTEM COMMAND */ + [[fallthrough]]; + case U'\x9E': /* PRIVACY MESSAGE */ + [[fallthrough]]; + case U'\x9F': /* APPLICATION PROGRAM COMMAND */ + [[fallthrough]]; + case U'\xA0': /* NO-BREAK SPACE */ + [[fallthrough]]; + case U'\u2028': /* LINE SEPERATOR */ + [[fallthrough]]; + case U'\u2029': /* PARAGRAPH SEPERATOR */ + [[fallthrough]]; + case U'\u202D': /* LEFT-TO-RIGHT OVERRIDE */ + [[fallthrough]]; + case U'\u202E': /* RIGHT-TO-LEFT OVERRIDE */ + [[fallthrough]]; + case U'\u2068': /* FIRST STRONG ISOLATE */ + [[fallthrough]]; + case U'\u2069': /* POP DIRECTIONAL ISOLATE */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isdigit b/u8c/include/u8c/utf.d/isdigit new file mode 100644 index 0000000..84179e6 --- /dev/null +++ b/u8c/include/u8c/utf.d/isdigit @@ -0,0 +1,59 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_8r8RPCDLujofbg3k) +#define u8c_key_8r8RPCDLujofbg3k + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isdigit(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + case U'\u0030': /* DIGIT ZERO */ + [[fallthrough]]; + case U'\u0031': /* DIGIT ONE */ + [[fallthrough]]; + case U'\u0032': /* DIGIT TWO */ + [[fallthrough]]; + case U'\u0033': /* DIGIT THREE */ + [[fallthrough]]; + case U'\u0034': /* DIGIT FOUR */ + [[fallthrough]]; + case U'\u0035': /* DIGIT FIVE */ + [[fallthrough]]; + case U'\u0036': /* DIGIT SIX */ + [[fallthrough]]; + case U'\u0037': /* DIGIT SEVEN */ + [[fallthrough]]; + case U'\u0038': /* DIGIT EIGHT */ + [[fallthrough]]; + case U'\u0039': /* DIGIT NINE */ + [[fallthrough]]; + case U'\u218A': /* TURNED DIGIT TWO */ + [[fallthrough]]; + case U'\u218B': /* TURNED DIGIT THREE */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/islower b/u8c/include/u8c/utf.d/islower new file mode 100644 index 0000000..9fd40e6 --- /dev/null +++ b/u8c/include/u8c/utf.d/islower @@ -0,0 +1,345 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_AtxlGqMDj7uXYyKb) +#define u8c_key_AtxlGqMDj7uXYyKb + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::islower(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + case U'\u0061': /* LATIN SMALL LETTER A */ + [[fallthrough]]; + case U'\u0062': /* LATIN SMALL LETTER B */ + [[fallthrough]]; + case U'\u0063': /* LATIN SMALL LETTER C */ + [[fallthrough]]; + case U'\u0064': /* LATIN SMALL LETTER D */ + [[fallthrough]]; + case U'\u0065': /* LATIN SMALL LETTER E */ + [[fallthrough]]; + case U'\u0066': /* LATIN SMALL LETTER F */ + [[fallthrough]]; + case U'\u0067': /* LATIN SMALL LETTER G */ + [[fallthrough]]; + case U'\u0068': /* LATIN SMALL LETTER H */ + [[fallthrough]]; + case U'\u0069': /* LATIN SMALL LETTER I */ + [[fallthrough]]; + case U'\u006A': /* LATIN SMALL LETTER J */ + [[fallthrough]]; + case U'\u006B': /* LATIN SMALL LETTER K */ + [[fallthrough]]; + case U'\u006C': /* LATIN SMALL LETTER L */ + [[fallthrough]]; + case U'\u006D': /* LATIN SMALL LETTER M */ + [[fallthrough]]; + case U'\u006E': /* LATIN SMALL LETTER N */ + [[fallthrough]]; + case U'\u006F': /* LATIN SMALL LETTER O */ + [[fallthrough]]; + case U'\u0070': /* LATIN SMALL LETTER P */ + [[fallthrough]]; + case U'\u0071': /* LATIN SMALL LETTER Q */ + [[fallthrough]]; + case U'\u0072': /* LATIN SMALL LETTER R */ + [[fallthrough]]; + case U'\u0073': /* LATIN SMALL LETTER S */ + [[fallthrough]]; + case U'\u0074': /* LATIN SMALL LETTER T */ + [[fallthrough]]; + case U'\u0075': /* LATIN SMALL LETTER U */ + [[fallthrough]]; + case U'\u0076': /* LATIN SMALL LETTER V */ + [[fallthrough]]; + case U'\u0077': /* LATIN SMALL LETTER W */ + [[fallthrough]]; + case U'\u0078': /* LATIN SMALL LETTER X */ + [[fallthrough]]; + case U'\u0079': /* LATIN SMALL LETTER Y */ + [[fallthrough]]; + case U'\u007A': /* LATIN SMALL LETTER Z */ + [[fallthrough]]; + case U'\u00DF': /* LATIN SMALL LETTER SHARP S */ + [[fallthrough]]; + case U'\u00E0': /* LATIN SMALL LETTER A WITH GRAVE */ + [[fallthrough]]; + case U'\u00E1': /* LATIN SMALL LETTER A WITH ACUTE */ + [[fallthrough]]; + case U'\u00E2': /* LATIN SMALL LETTER A WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u00E3': /* LATIN SMALL LETTER A WITH TILDE */ + [[fallthrough]]; + case U'\u00E4': /* LATIN SMALL LETTER A WITH DIAERESIS */ + [[fallthrough]]; + case U'\u00E5': /* LATIN SMALL LETTER A WITH RING ABOVE */ + [[fallthrough]]; + case U'\u00E6': /* LATIN SMALL LETTER AE */ + [[fallthrough]]; + case U'\u00E7': /* LATIN SMALL LETTER C WITH CEDILLA */ + [[fallthrough]]; + case U'\u00E8': /* LATIN SMALL LETTER E WITH GRAVE */ + [[fallthrough]]; + case U'\u00E9': /* LATIN SMALL LETTER E WITH ACUTE */ + [[fallthrough]]; + case U'\u00EA': /* LATIN SMALL LETTER E WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u00EB': /* LATIN SMALL LETTER E WITH DIAERESIS */ + [[fallthrough]]; + case U'\u00EC': /* LATIN SMALL LETTER I WITH GRAVE */ + [[fallthrough]]; + case U'\u00ED': /* LATIN SMALL LETTER I WITH ACUTE */ + [[fallthrough]]; + case U'\u00EE': /* LATIN SMALL LETTER I WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u00EF': /* LATIN SMALL LETTER I WITH DIAERESIS */ + [[fallthrough]]; + case U'\u00F0': /* LATIN SMALL LETTER ETH */ + [[fallthrough]]; + case U'\u00F1': /* LATIN SMALL LETTER N WITH TILDE */ + [[fallthrough]]; + case U'\u00F2': /* LATIN SMALL LETTER O WITH GRAVE */ + [[fallthrough]]; + case U'\u00F3': /* LATIN SMALL LETTER O WITH ACUTE */ + [[fallthrough]]; + case U'\u00F4': /* LATIN SMALL LETTER O WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u00F5': /* LATIN SMALL LETTER O WITH TILDE */ + [[fallthrough]]; + case U'\u00F6': /* LATIN SMALL LETTER O WITH DIAERESIS */ + [[fallthrough]]; + case U'\u00F8': /* LATIN SMALL LETTER O WITH STROKE */ + [[fallthrough]]; + case U'\u00F9': /* LATIN SMALL LETTER U WITH GRAVE */ + [[fallthrough]]; + case U'\u00FA': /* LATIN SMALL LETTER U WITH ACUTE */ + [[fallthrough]]; + case U'\u00FB': /* LATIN SMALL LETTER U WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u00FC': /* U WITH TWO DOTS */ + [[fallthrough]]; + case U'\u00FD': /* LATIN SMALL LETTER Y WITH ACUTE */ + [[fallthrough]]; + case U'\u00FE': /* LATIN SMALL LETTER THORN */ + [[fallthrough]]; + case U'\u00FF': /* LATIN SMALL LETTER Y WITH DIAERESIS */ + [[fallthrough]]; + case U'\u0105': /* LATIN SMALL LETTER A WITH OGONEK */ + [[fallthrough]]; + case U'\u0107': /* LATIN SMALL LETTER C WITH ACUTE */ + [[fallthrough]]; + case U'\u010D': /* LATIN SMALL LETTER C WITH CARON */ + [[fallthrough]]; + case U'\u010F': /* LATIN SMALL LETTER D WITH CARON */ + [[fallthrough]]; + case U'\u0119': /* LATIN SMALL LETTER E WITH OGONEK */ + [[fallthrough]]; + case U'\u011B': /* LATIN SMALL LETTER E WITH CARON */ + [[fallthrough]]; + case U'\u011F': /* LATIN SMALL LETTER G WITH BREVE */ + [[fallthrough]]; + case U'\u0131': /* LATIN SMALL LETTER DOTLESS I */ + [[fallthrough]]; + case U'\u0133': /* LATIN SMALL LIGATURE IJ */ + [[fallthrough]]; + case U'\u0138': /* LATIN SMALL LETTER KRA */ + [[fallthrough]]; + case U'\u0142': /* LATIN SMALL LETTER L WITH STROKE */ + [[fallthrough]]; + case U'\u0144': /* LATIN SMALL LETTER N WITH ACUTE */ + [[fallthrough]]; + case U'\u0148': /* LATIN SMALL LETTER N WITH CARON */ + [[fallthrough]]; + case U'\u014B': /* LATIN SMALL LETTER ENG */ + [[fallthrough]]; + case U'\u0153': /* LATIN SMALL LIGATURE OE */ + [[fallthrough]]; + case U'\u0159': /* LATIN SMALL LETTER R WITH CARON */ + [[fallthrough]]; + case U'\u015B': /* LATIN SMALL LETTER S WITH ACUTE */ + [[fallthrough]]; + case U'\u015F': /* LATIN SMALL LETTER S WITH CEDILLA */ + [[fallthrough]]; + case U'\u0161': /* LATIN SMALL LETTER S WITH CARON */ + [[fallthrough]]; + case U'\u0165': /* LATIN SMALL LETTER T WITH CARON */ + [[fallthrough]]; + case U'\u016F': /* LATIN SMALL LETTER U WITH RING ABOVE */ + [[fallthrough]]; + case U'\u017A': /* LATIN SMALL LETTER Z WITH ACUTE */ + [[fallthrough]]; + case U'\u017C': /* LATIN SMALL LETTER Z WITH DOT ABOVE */ + [[fallthrough]]; + case U'\u017E': /* LATIN SMALL LETTER Z WITH CARON */ + [[fallthrough]]; + case U'\u01BF': /* LATIN LETTER WYNN */ + [[fallthrough]]; + case U'\u01DD': /* LATIN SMALL LETTER TURNED E */ + [[fallthrough]]; + case U'\u021D': /* LATIN SMALL LETTER YOGH */ + [[fallthrough]]; + case U'\u0242': /* LATIN SMALL LETTER GLOTTAL STOP */ + [[fallthrough]]; + case U'\u0250': /* LATIN SMALL LETTER TURNED A */ + [[fallthrough]]; + case U'\u0251': /* LATIN SMALL LETTER ALPHA */ + [[fallthrough]]; + case U'\u0252': /* LATIN SMALL LETTER TURNED ALPHA */ + [[fallthrough]]; + case U'\u0253': /* LATIN SMALL LETTER B WITH HOOk */ + [[fallthrough]]; + case U'\u0254': /* LATIN SMALL LETTER OPEN O */ + [[fallthrough]]; + case U'\u0255': /* LATIN SMALL LETTER C WITH CURL */ + [[fallthrough]]; + case U'\u0256': /* LATIN SMALL LETTER D WITH TAIL */ + [[fallthrough]]; + case U'\u0257': /* LATIN SMALL LETTER D WITH HOOk */ + [[fallthrough]]; + case U'\u0258': /* LATIN SMALL LETTER REVERSED E */ + [[fallthrough]]; + case U'\u0259': /* LATIN SMALL LETTER SCHWA */ + [[fallthrough]]; + case U'\u025A': /* LATIN SMALL LETTER SCHWA WITH HOOK */ + [[fallthrough]]; + case U'\u025B': /* LATIN SMALL LETTER OPEN E */ + [[fallthrough]]; + case U'\u025C': /* LATIN SMALL LETTER REVERSED OPEN E */ + [[fallthrough]]; + case U'\u025D': /* LATIN SMALL LETTER REVERSED OPEN E WITH HOOK */ + [[fallthrough]]; + case U'\u025E': /* LATIN SMALL LETTER CLOSED REVERSED OPEN E */ + [[fallthrough]]; + case U'\u025F': /* LATIN SMALL LETTER DOTLESS J WITH STROKE */ + [[fallthrough]]; + case U'\u0260': /* LATIN SMALL LETTER G WITH HOOK */ + [[fallthrough]]; + case U'\u0261': /* LATIN SMALL LETTER SCRIPT G */ + [[fallthrough]]; + case U'\u0262': /* LATIN LETTER SMALL CAPITAL G */ + [[fallthrough]]; + case U'\u0263': /* LATIN SMALL LETTER GAMMA */ + [[fallthrough]]; + case U'\u0264': /* LATIN SMALL LETTER RAMS HORN */ + [[fallthrough]]; + case U'\u0265': /* LATIN SMALL LETTER TURNED H */ + [[fallthrough]]; + case U'\u0266': /* LATIN SMALL LETTER H WITH HOOK */ + [[fallthrough]]; + case U'\u0267': /* LATIN SMALL LETTER HENG WITH HOOK */ + [[fallthrough]]; + case U'\u0268': /* LATIN SMALL LETTER I WITH STROKE */ + [[fallthrough]]; + case U'\u0269': /* LATIN SMALL LETTER IOTA */ + [[fallthrough]]; + case U'\u026A': /* LATIN LETTER SMALL CAPITAL I */ + [[fallthrough]]; + case U'\u026B': /* LATIN SMALL LETTER L WITH MIDDLE TILDE */ + [[fallthrough]]; + case U'\u026C': /* LATIN SMALL LETTER L WITH BELT */ + [[fallthrough]]; + case U'\u026D': /* LATIN SMALL LETTER L WITH RETROFLEX HOOK */ + [[fallthrough]]; + case U'\u026E': /* LATIN SMALL LETTER LEZH */ + [[fallthrough]]; + case U'\u026F': /* LATIN SMALL LETTER TURNED M */ + [[fallthrough]]; + case U'\u0270': /* LATIN SMALL LETTER TURNED M WITH LONG LEG */ + [[fallthrough]]; + case U'\u0271': /* LATIN SMALL LETTER M WITH HOOK */ + [[fallthrough]]; + case U'\u0272': /* LATIN SMALL LETTER N WITH LEFT HOOK */ + [[fallthrough]]; + case U'\u0273': /* LATIN SMALL LETTER N WITH RETROFLEX HOOK */ + [[fallthrough]]; + case U'\u0283': /* LATIN SMALL LETTER ESH */ + [[fallthrough]]; + case U'\u028A': /* LATIN SMALL LETTER UPSILON */ + [[fallthrough]]; + case U'\u028B': /* LATIN SMALL LETTER V WITH HOOK */ + [[fallthrough]]; + case U'\u0292': /* LATIN SMALL LETTER EZH */ + [[fallthrough]]; + case U'\u0294': /* LATIN SMALL LETTER GLOTTAL STOP */ + [[fallthrough]]; + case U'\u03B1': /* GREEK SMALL LETTER ALPHA */ + [[fallthrough]]; + case U'\u03B2': /* GREEK SMALL LETTER BETA */ + [[fallthrough]]; + case U'\u03B3': /* GREEK SMALL LETTER GAMMA */ + [[fallthrough]]; + case U'\u03B4': /* GREEK SMALL LETTER DELTA */ + [[fallthrough]]; + case U'\u03B5': /* GREEK SMALL LETTER EPSILON */ + [[fallthrough]]; + case U'\u03B6': /* GREEK SMALL LETTER ZETA */ + [[fallthrough]]; + case U'\u03B7': /* GREEK SMALL LETTER ETA */ + [[fallthrough]]; + case U'\u03B8': /* GREEK SMALL LETTER THETA */ + [[fallthrough]]; + case U'\u03B9': /* GREEK SMALL LETTER IOTA */ + [[fallthrough]]; + case U'\u03BA': /* GREEK SMALL LETTER KAPPA */ + [[fallthrough]]; + case U'\u03BB': /* GREEK SMALL LETTER LAMBDA */ + [[fallthrough]]; + case U'\u03BC': /* GREEK SMALL LETTER MU */ + [[fallthrough]]; + case U'\u03BD': /* GREEK SMALL LETTER NU */ + [[fallthrough]]; + case U'\u03BE': /* GREEK SMALL LETTER XI */ + [[fallthrough]]; + case U'\u03BF': /* GREEK SMALL LETTER OMICRON */ + [[fallthrough]]; + case U'\u03C0': /* GREEK SMALL LETTER PI */ + [[fallthrough]]; + case U'\u03C1': /* GREEK SMALL LETTER RHO */ + [[fallthrough]]; + case U'\u03C2': /* GREEK SMALL LETTER FINAL SIGMA */ + [[fallthrough]]; + case U'\u03C3': /* GREEK SMALL LETTER SIGMA */ + [[fallthrough]]; + case U'\u03C4': /* GREEK SMALL LETTER TAU */ + [[fallthrough]]; + case U'\u03C5': /* GREEK SMALL LETTER UPSILON */ + [[fallthrough]]; + case U'\u03C6': /* GREEK SMALL LETTER PHI */ + [[fallthrough]]; + case U'\u03C7': /* GREEK SMALL LETTER CHI */ + [[fallthrough]]; + case U'\u03C8': /* GREEK SMALL LETTER PSI */ + [[fallthrough]]; + case U'\u03C9': /* GREEK SMALL LETTER OMEGA */ + [[fallthrough]]; + case U'\u1D79': /* LATIN SMALL LETTER INSULAR G */ + [[fallthrough]]; + case U'\uA7B7': /* LATIN SMALL LETTER OMEGA */ + [[fallthrough]]; + case U'\uFB00': /* LATIN SMALL LIGATURE FF */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/ispunct b/u8c/include/u8c/utf.d/ispunct new file mode 100644 index 0000000..f82f11e --- /dev/null +++ b/u8c/include/u8c/utf.d/ispunct @@ -0,0 +1,329 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_tmcwLOAAPKkIUthE) +#define u8c_key_tmcwLOAAPKkIUthE + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::ispunct(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + case U'\u0021': /* EXCLAMATION MARK */ + [[fallthrough]]; + case U'\u0022': /* QUOTATION MARK */ + [[fallthrough]]; + case U'\u0023': /* NUMBER SIGN */ + [[fallthrough]]; + case U'\u0024': /* DOLLAR SIGN */ + [[fallthrough]]; + case U'\u0025': /* PERCENT SIGN */ + [[fallthrough]]; + case U'\u0026': /* AMPERSAND */ + [[fallthrough]]; + case U'\u0027': /* APOSTROPHE */ + [[fallthrough]]; + case U'\u0028': /* LEFT PARANTHESIS */ + [[fallthrough]]; + case U'\u0029': /* RIGHT PARANTHESIS */ + [[fallthrough]]; + case U'\u002A': /* ASTERISK */ + [[fallthrough]]; + case U'\u002B': /* PLUS SIGN */ + [[fallthrough]]; + case U'\u002C': /* COMMA */ + [[fallthrough]]; + case U'\u002D': /* HYPHEN-MINUS */ + [[fallthrough]]; + case U'\u002E': /* FULL STOP */ + [[fallthrough]]; + case U'\u002F': /* SOLIDUS */ + [[fallthrough]]; + case U'\u003A': /* COLON */ + [[fallthrough]]; + case U'\u003B': /* SEMICOLON */ + [[fallthrough]]; + case U'\u003C': /* LESS-THAN SIGN */ + [[fallthrough]]; + case U'\u003D': /* EQUALS SIGN */ + [[fallthrough]]; + case U'\u003E': /* GREATER-THAN SIGN */ + [[fallthrough]]; + case U'\u003F': /* QUESTION MARK */ + [[fallthrough]]; + case U'\u0040': /* COMMERCIAL AT */ + [[fallthrough]]; + case U'\u005B': /* LEFT SQUARE BRACKET */ + [[fallthrough]]; + case U'\u005C': /* REVERSE SOLIDUS */ + [[fallthrough]]; + case U'\u005D': /* RIGHT SQUARE BRACKET */ + [[fallthrough]]; + case U'\u005E': /* CIRCUMFLEX ACCENT */ + [[fallthrough]]; + case U'\u005F': /* LOW LINE */ + [[fallthrough]]; + case U'\u0060': /* GRAVE ACCENT */ + [[fallthrough]]; + case U'\u007B': /* LEFT CURLY BRACKET */ + [[fallthrough]]; + case U'\u007C': /* VERTICAL LINE */ + [[fallthrough]]; + case U'\u007D': /* RIGHT CURLY BRACKET */ + [[fallthrough]]; + case U'\u007E': /* TILDE */ + [[fallthrough]]; + case U'\u00A1': /* INVERT EXCLAMATION MARK */ + [[fallthrough]]; + case U'\u00A2': /* CENT SIGN */ + [[fallthrough]]; + case U'\u00A3': /* POUND SIGN */ + [[fallthrough]]; + case U'\u00A4': /* CURRENCY SIGN */ + [[fallthrough]]; + case U'\u00A5': /* YEN SIGN */ + [[fallthrough]]; + case U'\u00A6': /* BROKEN BAR */ + [[fallthrough]]; + case U'\u00A7': /* SECTION SIGN */ + [[fallthrough]]; + case U'\u00A8': /* DIAERESIS */ + [[fallthrough]]; + case U'\u00A9': /* COPYRIGHT SIGN */ + [[fallthrough]]; + case U'\u00AA': /* FEMININE ORDINAL INDICATOR */ + [[fallthrough]]; + case U'\u00AB': /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u00AC': /* NOT SIGN */ + [[fallthrough]]; + case U'\u00AE': /* REGISTERED SIGN */ + [[fallthrough]]; + case U'\u00AF': /* MACRON */ + [[fallthrough]]; + case U'\u00B0': /* DEGREE SIGN */ + [[fallthrough]]; + case U'\u00B1': /* PLUS MINUS SYMBOL */ + [[fallthrough]]; + case U'\u00B2': /* SUPERSCRIPT TWO */ + [[fallthrough]]; + case U'\u00B3': /* SUPERSCRIPT THREE */ + [[fallthrough]]; + case U'\u00B4': /* ACUTE ACCENT */ + [[fallthrough]]; + case U'\u00B5': /* MICRO SIGN */ + [[fallthrough]]; + case U'\u00B6': /* PILCROW SIGN */ + [[fallthrough]]; + case U'\u00B7': /* MIDDLE DOT */ + [[fallthrough]]; + case U'\u00B8': /* CEDILLA */ + [[fallthrough]]; + case U'\u00B9': /* SUPERSCRIPT ONE */ + [[fallthrough]]; + case U'\u00BA': /* MASCULINE ORDINAL INDICATOR */ + [[fallthrough]]; + case U'\u00BB': /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u00BC': /* VULGAR FRACTION ONE QUARTER */ + [[fallthrough]]; + case U'\u00BD': /* VULGAR FRACTION ONE HALF */ + [[fallthrough]]; + case U'\u00BE': /* VULGAR FRACTION THREE QUARTERS */ + [[fallthrough]]; + case U'\u00BF': /* INVERT QUESTION MARK */ + [[fallthrough]]; + case U'\u00D7': /* MULTIPLICATION SIGN */ + [[fallthrough]]; + case U'\u00F7': /* DIVISION SIGN */ + [[fallthrough]]; + case U'\u2010': /* HYPHEN */ + [[fallthrough]]; + case U'\u2013': /* EN DASH */ + [[fallthrough]]; + case U'\u2014': /* EM DASH */ + [[fallthrough]]; + case U'\u2018': /* LEFT SINGLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u2019': /* RIGHT SINGLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u201C': /* LEFT DOUBLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u201D': /* RIGHT DOUBLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u2026': /* HORIZONTAL ELLIPSIS */ + [[fallthrough]]; + case U'\u2030': /* PER MILLE SIGN */ + [[fallthrough]]; + case U'\u2031': /* PER TEN THOUSAND SIGN */ + [[fallthrough]]; + case U'\u2032': /* PRIME */ + [[fallthrough]]; + case U'\u2033': /* DOUBLE PRIME */ + [[fallthrough]]; + case U'\u2034': /* TRIPLE PRIME */ + [[fallthrough]]; + case U'\u2035': /* REVERSED PRIME */ + [[fallthrough]]; + case U'\u2036': /* REVERSED DOUBLE PRIME */ + [[fallthrough]]; + case U'\u2037': /* REVERSED TRIPLE PRIME */ + [[fallthrough]]; + case U'\u203C': /* DOUBLE EXCLAMATION MARK */ + [[fallthrough]]; + case U'\u203D': /* INTERROBANG */ + [[fallthrough]]; + case U'\u2047': /* DOUBLE QUOTATION MARK */ + [[fallthrough]]; + case U'\u2048': /* QUESTION EXCLAMATION MARK */ + [[fallthrough]]; + case U'\u2049': /* EXCLAMATION QUESTION MARK */ + [[fallthrough]]; + case U'\u20A3': /* FRENCH FRANC SIGN */ + [[fallthrough]]; + case U'\u20A4': /* LIRA SIGN */ + [[fallthrough]]; + case U'\u20A8': /* RUPEE SIGN */ + [[fallthrough]]; + case U'\u20A9': /* WON SIGN */ + [[fallthrough]]; + case U'\u20AC': /* EURO SIGN */ + [[fallthrough]]; + case U'\u20B9': /* INDIAN RUPEE SIGN */ + [[fallthrough]]; + case U'\u20BF': /* BITCOIN SIGN */ + [[fallthrough]]; + case U'\u2103': /* DEGREE CELSIUS */ + [[fallthrough]]; + case U'\u2107': /* EULER CONSTANT */ + [[fallthrough]]; + case U'\u2109': /* DEGREE FAHRENHEIT */ + [[fallthrough]]; + case U'\u210E': /* PLANCK CONSTANT */ + [[fallthrough]]; + case U'\u2117': /* SOUND RECORDING COPYRIGHT */ + [[fallthrough]]; + case U'\u2122': /* TRADE MARK SIGN */ + [[fallthrough]]; + case U'\u2125': /* OUNCE SIGN */ + [[fallthrough]]; + case U'\u2126': /* OHM SIGN */ + [[fallthrough]]; + case U'\u212A': /* KELVIN SIGN */ + [[fallthrough]]; + case U'\u214D': /* AKTIESELSKAB */ + [[fallthrough]]; + case U'\u2205': /* EMPTY SET */ + [[fallthrough]]; + case U'\u2212': /* MINUS SIGN */ + [[fallthrough]]; + case U'\u221A': /* SQUARE ROOT */ + [[fallthrough]]; + case U'\u221B': /* CUBE ROOT */ + [[fallthrough]]; + case U'\u221C': /* FOURTH ROOT */ + [[fallthrough]]; + case U'\u221E': /* INFINITY */ + [[fallthrough]]; + case U'\u2228': /* LOGICAL OR */ + [[fallthrough]]; + case U'\u2248': /* ALMOST EQUAL TO */ + [[fallthrough]]; + case U'\u2260': /* NOT EQUAL TO */ + [[fallthrough]]; + case U'\u2264': /* LESS-THAN OR EQUAL TO */ + [[fallthrough]]; + case U'\u2265': /* GREATER-THAN OR EQUAL TO */ + [[fallthrough]]; + case U'\u2609': /* SUN */ + [[fallthrough]]; + case U'\u263F': /* MERCURY */ + [[fallthrough]]; + case U'\u2640': /* FEMALE SIGN */ + [[fallthrough]]; + case U'\u2641': /* EARTH */ + [[fallthrough]]; + case U'\u2642': /* MALE SIGN */ + [[fallthrough]]; + case U'\u2643': /* JUPITER */ + [[fallthrough]]; + case U'\u2644': /* SATURN */ + [[fallthrough]]; + case U'\u2645': /* URANUS */ + [[fallthrough]]; + case U'\u2646': /* NEPTUNE */ + [[fallthrough]]; + case U'\u2647': /* PLUTO */ + [[fallthrough]]; + case U'\u26A2': /* DOUBLED FEMALE SIGN */ + [[fallthrough]]; + case U'\u26A3': /* DOUBLED MALE SIGN */ + [[fallthrough]]; + case U'\u26A4': /* INTERLOCKED FEMALE AND MALE SIGN */ + [[fallthrough]]; + case U'\u26A5': /* MALE AND FEMALE SIGN */ + [[fallthrough]]; + case U'\u26B3': /* CERES */ + [[fallthrough]]; + case U'\u26B4': /* PALLAS */ + [[fallthrough]]; + case U'\u26B5': /* JUNO */ + [[fallthrough]]; + case U'\u26B6': /* VESTA */ + [[fallthrough]]; + case U'\u26B7': /* CHIRON */ + [[fallthrough]]; + case U'\u2BD8': /* PROSERPINA */ + [[fallthrough]]; + case U'\u2BD9': /* ASTRAEA */ + [[fallthrough]]; + case U'\u2BDA': /* HYGIEA */ + [[fallthrough]]; + case U'\u2BDB': /* PHOLOS */ + [[fallthrough]]; + case U'\u2BDC': /* NESSUS */ + [[fallthrough]]; + case U'\u2E2E': /* INVERTED QUESTION MARK */ + [[fallthrough]]; + case U'\u33D7': /* SQUARE PH */ + [[fallthrough]]; + case U'\uFDFC': /* RIAL SIGN */ + [[fallthrough]]; + case U'\U0001F10D': /* CIRCLED ZERO WITH SLASH */ + [[fallthrough]]; + case U'\U0001F10E': /* CIRCLED ANTICKLOCKWISE ARROW */ + [[fallthrough]]; + case U'\U0001F10F': /* CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH */ + [[fallthrough]]; + case U'\U0001F12F': /* COPYLEFT SYMBOL */ + [[fallthrough]]; + case U'\U0001F16D': /* CIRCLED CC */ + [[fallthrough]]; + case U'\U0001F16E': /* CIRCLED C WITH OVERLAID BACKSLASH */ + [[fallthrough]]; + case U'\U0001F16F': /* CIRCLED HUMAN FIGURE */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isspace b/u8c/include/u8c/utf.d/isspace new file mode 100644 index 0000000..ccf191a --- /dev/null +++ b/u8c/include/u8c/utf.d/isspace @@ -0,0 +1,47 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_fRbwVyyBwfrm0Slq) +#define u8c_key_fRbwVyyBwfrm0Slq + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isspace(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + case U'\u0009': /* HORIZONTAL TABULATION */ + [[fallthrough]]; + case U'\u000A': /* NEW LINE */ + [[fallthrough]]; + case U'\u000B': /* VERTICAL TABULATION */ + [[fallthrough]]; + case U'\u000C': /* FORM FEED */ + [[fallthrough]]; + case U'\u000D': /* CARRIAGE RETURN */ + [[fallthrough]]; + case U'\u0020': /* SPACE */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/issurro b/u8c/include/u8c/utf.d/issurro new file mode 100644 index 0000000..119c2e5 --- /dev/null +++ b/u8c/include/u8c/utf.d/issurro @@ -0,0 +1,35 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_f4d3TezxF0FFmbn4) +#define u8c_key_f4d3TezxF0FFmbn4 + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::issurro(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + if(_chr >= U'\xD800' && _chr <= U'\xDFFF') [[unlikely]] { + return true; + } + return false; +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isupper b/u8c/include/u8c/utf.d/isupper new file mode 100644 index 0000000..4a18fd5 --- /dev/null +++ b/u8c/include/u8c/utf.d/isupper @@ -0,0 +1,265 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_CNx6iimb2pI6RXGS) +#define u8c_key_CNx6iimb2pI6RXGS + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isupper(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + case U'\u0041': /* LATIN CAPITAL LETTER A */ + [[fallthrough]]; + case U'\u0042': /* LATIN CAPITAL LETTER B */ + [[fallthrough]]; + case U'\u0043': /* LATIN CAPITAL LETTER C */ + [[fallthrough]]; + case U'\u0044': /* LATIN CAPITAL LETTER D */ + [[fallthrough]]; + case U'E': /* LATIN CAPITAL LETTER E */ + [[fallthrough]]; + case U'F': /* LATIN CAPITAL LETTER F */ + [[fallthrough]]; + case U'G': /* LATIN CAPITAL LETTER G */ + [[fallthrough]]; + case U'H': /* LATIN CAPITAL LETTER H */ + [[fallthrough]]; + case U'I': /* LATIN CAPITAL LETTER I */ + [[fallthrough]]; + case U'J': /* LATIN CAPITAL LETTER J */ + [[fallthrough]]; + case U'K': /* LATIN CAPITAL LETTER K */ + [[fallthrough]]; + case U'L': /* LATIN CAPITAL LETTER L */ + [[fallthrough]]; + case U'M': /* LATIN CAPITAL LETTER M */ + [[fallthrough]]; + case U'N': /* LATIN CAPITAL LETTER N */ + [[fallthrough]]; + case U'O': /* LATIN CAPITAL LETTER O */ + [[fallthrough]]; + case U'P': /* LATIN CAPITAL LETTER P */ + [[fallthrough]]; + case U'Q': /* LATIN CAPITAL LETTER Q */ + [[fallthrough]]; + case U'R': /* LATIN CAPITAL LETTER R */ + [[fallthrough]]; + case U'S': /* LATIN CAPITAL LETTER S */ + [[fallthrough]]; + case U'T': /* LATIN CAPITAL LETTER T */ + [[fallthrough]]; + case U'U': /* LATIN CAPITAL LETTER U */ + [[fallthrough]]; + case U'V': /* LATIN CAPITAL LETTER V */ + [[fallthrough]]; + case U'X': /* LATIN CAPITAL LETTER Y */ + [[fallthrough]]; + case U'W': /* LATIN CAPITAL LETTER X */ + [[fallthrough]]; + case U'Y': /* LATIN CAPITAL LETTER Y */ + [[fallthrough]]; + case U'Z': /* LATIN CAPITAL LETTER Z */ + [[fallthrough]]; + case U'\u00C0': /* LATIN CAPITAL LETTER A WITH GRAVE */ + [[fallthrough]]; + case U'\u00C1': /* LATIN CAPITAL LETTER A WITH ACUTE */ + [[fallthrough]]; + case U'\u00C2': /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u00C3': /* LATIN CAPITAL LETTER A WITH TILDE */ + [[fallthrough]]; + case U'\u00C4': /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + [[fallthrough]]; + case U'\u00C5': /* LATIN CAPITAL LETTER A WITH RING ABOVE */ + [[fallthrough]]; + case U'\u00C6': /* LATIN CAPITAL LETTER AE */ + [[fallthrough]]; + case U'\u00C7': /* LATIN CAPITAL LETTER C WITH CEDILLA */ + [[fallthrough]]; + case U'\u00C8': /* LATIN CAPITAL LETTER E WITH GRAVE */ + [[fallthrough]]; + case U'\u00C9': /* LATIN CAPITAL LETTER E WITH ACUTE */ + [[fallthrough]]; + case U'\u00CA': /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u00CB': /* LATIN CAPITAL LETTER E WITH DIAERESIS */ + [[fallthrough]]; + case U'\u00CC': /* LATIN CAPITAL LETTER I WITH GRAVE */ + [[fallthrough]]; + case U'\u00CD': /* LATIN CAPITAL LETTER I WITH ACUTE */ + [[fallthrough]]; + case U'\u00CE': /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u00CF': /* LATIN CAPITAL LETTER I WITH DIAERESIS */ + [[fallthrough]]; + case U'\u00D0': /* LATIN CAPITAL LETTER ETH */ + [[fallthrough]]; + case U'\u00D1': /* LATIN CAPITAL LETTER N WITH TILDE */ + [[fallthrough]]; + case U'\u00D2': /* LATIN CAPITAL LETTER O WITH GRAVE */ + [[fallthrough]]; + case U'\u00D3': /* LATIN CAPITAL LETTER O WITH ACUTE */ + [[fallthrough]]; + case U'\u00D4': /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u00D5': /* LATIN CAPITAL LETTER O WITH TILDE */ + [[fallthrough]]; + case U'\u00D6': /* LATIN CAPITAL LETTER O WITH DIAERESIS */ + [[fallthrough]]; + case U'\u00D8': /* LATIN CAPITAL LETTER O WITH STROKE */ + [[fallthrough]]; + case U'\u00D9': /* LATIN CAPITAL LETTER U WITH GRAVE */ + [[fallthrough]]; + case U'\u00DA': /* LATIN CAPITAL LETTER U WITH STROKE */ + [[fallthrough]]; + case U'\u00DB': /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u00DC': /* LATIN CAPITAL LETTER U WITH DIAERESIS */ + [[fallthrough]]; + case U'\u00DD': /* LATIN CAPITAL LETTER Y WITH ACUTE */ + [[fallthrough]]; + case U'\u00DE': /* LATIN CAPITAL LETTER THORN */ + [[fallthrough]]; + case U'\u0100': /* LATIN CAPITAL LETTER A WITH MACRON */ + [[fallthrough]]; + case U'\u0102': /* LATIN CAPITAL LETTER A WITH BREVE */ + [[fallthrough]]; + case U'\u0104': /* LATIN CAPITAL LETTER A WITH OGONEK */ + [[fallthrough]]; + case U'\u0106': /* LATIN CAPITAL LETTER C WITH ACUTE */ + [[fallthrough]]; + case U'\u0108': /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u010A': /* LATIN CAPITAL LETTER C WITH DOT ABOVE */ + [[fallthrough]]; + case U'\u010C': /* LATIN CAPITAL LETTER C WITH CARON */ + [[fallthrough]]; + case U'\u010E': /* LATIN CAPITAL LETTER D WITH CARON */ + [[fallthrough]]; + case U'\u0110': /* LATIN CAPITAL LETTER D WITH STROKE */ + [[fallthrough]]; + case U'\u0112': /* LATIN CAPITAL LETTER E WITH MACRON */ + [[fallthrough]]; + case U'\u0114': /* LATIN CAPITAL LETTER E WITH BREVE */ + [[fallthrough]]; + case U'\u0116': /* LATIN CAPITAL LETTER E WITH DOT ABOVE */ + [[fallthrough]]; + case U'\u0118': /* LATIN CAPITAL LETTER E WITH OGONEK */ + [[fallthrough]]; + case U'\u011A': /* LATIN CAPITAL LETTER E WITH CARON */ + [[fallthrough]]; + case U'\u011C': /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX */ + [[fallthrough]]; + case U'\u014A': /* LATIN CAPITAL LETTER ENG */ + [[fallthrough]]; + case U'\u0152': /* LATIN CAPITAL LIGATURE OE */ + [[fallthrough]]; + case U'\u0186': /* LATIN CAPITAL LETTER OPEN O */ + [[fallthrough]]; + case U'\u018E': /* LATIN CAPITAL LETTER REVERSED E */ + [[fallthrough]]; + case U'\u018F': /* LATIN CAPITAL LETTER SCHWA */ + [[fallthrough]]; + case U'\u0190': /* LATIN CAPITAL LETTER OPEN E */ + [[fallthrough]]; + case U'\u0194': /* LATIN CAPITAL LETTER GAMMA */ + [[fallthrough]]; + case U'\u0196': /* LATIN CAPITAL LETTER IOTA */ + [[fallthrough]]; + case U'\u01A9': /* LATIN CAPITAL LETTER ESH */ + [[fallthrough]]; + case U'\u01B1': /* LATIN CAPITAL LETTER UPSILON */ + [[fallthrough]]; + case U'\u01B2': /* LATIN CAPITAL LETTER V WITH HOOk */ + [[fallthrough]]; + case U'\u01B7': /* LATIN CAPITAL LETTER EZH */ + [[fallthrough]]; + case U'\u01F7': /* LATIN CAPITAL LETTER WYNN */ + [[fallthrough]]; + case U'\u021C': /* LATIN CAPITAL LETTER YOGH */ + [[fallthrough]]; + case U'\u0241': /* LATIN CAPITAL LETTER GLOTTAL STOP */ + [[fallthrough]]; + case U'\u0391': /* GREEK CAPITAL LETTER ALPHA */ + [[fallthrough]]; + case U'\u0392': /* GREEK CAPITAL LETTER BETA */ + [[fallthrough]]; + case U'\u0393': /* GREEK CAPITAL LETTER GAMMA */ + [[fallthrough]]; + case U'\u0394': /* GREEK CAPITAL LETTER DELTA */ + [[fallthrough]]; + case U'\u0395': /* GREEK CAPITAL LETTER EPSILON */ + [[fallthrough]]; + case U'\u0396': /* GREEK CAPITAL LETTER ZETA */ + [[fallthrough]]; + case U'\u0397': /* GREEK CAPITAL LETTER ETA */ + [[fallthrough]]; + case U'\u0398': /* GREEK CAPITAL LETTER THETA */ + [[fallthrough]]; + case U'\u0399': /* GREEK CAPITAL LETTER IOTA */ + [[fallthrough]]; + case U'\u039A': /* GREEK CAPITAL LETTER KAPPA */ + [[fallthrough]]; + case U'\u039B': /* GREEK CAPITAL LETTER LAMBDA */ + [[fallthrough]]; + case U'\u039C': /* GREEK CAPITAL LETTER MU */ + [[fallthrough]]; + case U'\u039D': /* GREEK CAPITAL LETTER NU */ + [[fallthrough]]; + case U'\u039E': /* GREEK CAPITAL LETTER XI */ + [[fallthrough]]; + case U'\u039F': /* GREEK CAPITAL LETTER OMICRON */ + [[fallthrough]]; + case U'\u03A0': /* GREEK CAPITAL LETTER PI */ + [[fallthrough]]; + case U'\u03A1': /* GREEK CAPITAL LETTER RHO */ + [[fallthrough]]; + case U'\u03A3': /* GREEK CAPITAL LETTER SIGMA */ + [[fallthrough]]; + case U'\u03A4': /* GREEK CAPITAL LETTER TAU */ + [[fallthrough]]; + case U'\u03A5': /* GREEK CAPITAL LETTER UPSILON */ + [[fallthrough]]; + case U'\u03A6': /* GREEK CAPITAL LETTER PHI */ + [[fallthrough]]; + case U'\u03A7': /* GREEK CAPITAL LETTER CHI */ + [[fallthrough]]; + case U'\u03A8': /* GREEK CAPITAL LETTER PSI */ + [[fallthrough]]; + case U'\u03A9': /* GREEK CAPITAL LETTER OMEGA */ + [[fallthrough]]; + case U'\u1E9E': /* LATIN CAPITAL LETTER SHARP S */ + [[fallthrough]]; + case U'\u2C6D': /* LATIN CAPITAL LETTER ALPHA */ + [[fallthrough]]; + case U'\uA77D': /* LATIN CAPITAL LETTER INSULAR G */ + [[fallthrough]]; + case U'\uA7B4': /* LATIN CAPITAL LETTER BETA */ + [[fallthrough]]; + case U'\uA7B6': /* LATIN CAPITAL LETTER OMEGA */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/include/u8c/utf.d/isxdigit b/u8c/include/u8c/utf.d/isxdigit new file mode 100644 index 0000000..cc73526 --- /dev/null +++ b/u8c/include/u8c/utf.d/isxdigit @@ -0,0 +1,68 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#if !defined(u8c_key_NdERYC9ToUZX0vHE) +#define u8c_key_NdERYC9ToUZX0vHE + +#include <stdexcept> /* std::domain_error */ + +constexpr auto u8c::isxdigit(char32_t const _chr) -> bool { + if(_chr > u8c::unimax) [[unlikely]] { + throw std::domain_error("Unicode codepoint too big!"); + } + switch(_chr) { + [[likely]] default: + return false; + break; + case U'\u0030': /* DIGIT ZERO */ + [[fallthrough]]; + case U'\u0031': /* DIGIT ONE */ + [[fallthrough]]; + case U'\u0032': /* DIGIT TWO */ + [[fallthrough]]; + case U'\u0033': /* DIGIT THREE */ + [[fallthrough]]; + case U'\u0034': /* DIGIT FOUR */ + [[fallthrough]]; + case U'\u0035': /* DIGIT FIVE */ + [[fallthrough]]; + case U'\u0036': /* DIGIT SIX */ + [[fallthrough]]; + case U'\u0037': /* DIGIT SEVEN */ + [[fallthrough]]; + case U'\u0038': /* DIGIT EIGHT */ + [[fallthrough]]; + case U'\u0039': /* DIGIT NINE */ + [[fallthrough]]; + case U'\u0041': /* LATIN CAPITAL LETTER A */ + [[fallthrough]]; + case U'\u0042': /* LATIN CAPITAL LETTER B */ + [[fallthrough]]; + case U'\u0043': /* LATIN CAPITAL LETTER C */ + [[fallthrough]]; + case U'\u0044': /* LATIN CAPITAL LETTER D */ + [[fallthrough]]; + case U'\u0045': /* LATIN CAPITAL LETTER E */ + [[fallthrough]]; + case U'\u0046': /* LATIN CAPITAL LETTER F */ + return true; + } +} + +#endif
\ No newline at end of file diff --git a/u8c/src/operator.cc b/u8c/src/operator.cc new file mode 100644 index 0000000..3ea8eae --- /dev/null +++ b/u8c/src/operator.cc @@ -0,0 +1,28 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#include <algorithm> /* std::copy */ +#include <cstdint> /* u8c_ubytec */ +#include <ostream> /* std::ostream */ +#include <u8c/str> + +auto u8c::operator << (std::ostream & _strm,u8c::str const & _str) -> std::ostream & { + auto const u8 = _str.u8().app(u8'\u0000'); + return _strm << reinterpret_cast<unsigned char *>(u8.begin()); +} diff --git a/u8c/src/u8c/fmt.cc b/u8c/src/u8c/fmt.cc new file mode 100644 index 0000000..654fb98 --- /dev/null +++ b/u8c/src/u8c/fmt.cc @@ -0,0 +1,39 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#include <cstddef> /* std::nullptr_t */ +#include <string> /* std::u32string, std::u8string */ +#include <u8c/u8c> +#include <u8c/str> + +template<> auto u8c::fmt(char32_t const _chr) -> u8c::str { + return _chr; +} +template<> auto u8c::fmt(std::nullptr_t) -> u8c::str { + return U"nullptr"; +} +template<> auto u8c::fmt(u8c::str const _str) -> u8c::str { + return _str; +} +template<> auto u8c::fmt(void * _ptr) -> u8c::str { + if(_ptr == nullptr) [[unlikely]] { + return U"nullptr"; + } + return U"PTR"; +} diff --git a/u8c/src/u8c/print.cc b/u8c/src/u8c/print.cc new file mode 100644 index 0000000..bb08cb4 --- /dev/null +++ b/u8c/src/u8c/print.cc @@ -0,0 +1,25 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#include <ostream> /* std::ostream */ + +#if 0x0 +auto u8c::print([[maybe_unused]] std::ostream & _strm,[[maybe_unused]] u8c::str _msg) -> void { +} +#endif diff --git a/u8c/src/u8c/println.cc b/u8c/src/u8c/println.cc new file mode 100644 index 0000000..f607ad7 --- /dev/null +++ b/u8c/src/u8c/println.cc @@ -0,0 +1,25 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your + option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public + License for more details. + + You should have received a copy of the GNU Affero General Public License + along with u8c. If not, see <https://www.gnu.org/licenses/>. +*/ + +#include <ostream> /* std::ostream */ + +#if 0x0 +auto u8c::println([[maybe_unused]] std::ostream & _strm,[[maybe_unused]] u8c::str _msg) -> void { +} +#endif
\ No newline at end of file |