diff options
Diffstat (limited to 'src/u8c')
-rw-r--r-- | src/u8c/chk.h.d/isalnum.c (renamed from src/u8c/is.h.d/isalnum.c) | 20 | ||||
-rw-r--r-- | src/u8c/chk.h.d/isalpha.c | 109 | ||||
-rw-r--r-- | src/u8c/chk.h.d/iscntrl.c | 104 | ||||
-rw-r--r-- | src/u8c/chk.h.d/isdigit.c (renamed from src/u8c/is.h.d/isdigit.c) | 15 | ||||
-rw-r--r-- | src/u8c/chk.h.d/islower.c | 187 | ||||
-rw-r--r-- | src/u8c/chk.h.d/ispunct.c (renamed from src/u8c/is.h.d/ispunct.c) | 27 | ||||
-rw-r--r-- | src/u8c/chk.h.d/isspace.c (renamed from src/u8c/is.h.d/isspace.c) | 15 | ||||
-rw-r--r-- | src/u8c/chk.h.d/issurro.c (renamed from src/u8c/u32.h.d/u32sz.c) | 21 | ||||
-rw-r--r-- | src/u8c/chk.h.d/isupper.c | 147 | ||||
-rw-r--r-- | src/u8c/chk.h.d/isxdigit.c (renamed from src/u8c/is.h.d/isxdigit.c) | 15 | ||||
-rw-r--r-- | src/u8c/err.h.d/geterr.c | 27 | ||||
-rw-r--r-- | src/u8c/err.h.d/regerrhandl.c | 9 | ||||
-rw-r--r-- | src/u8c/err.h.d/seterr.c | 17 | ||||
-rw-r--r-- | src/u8c/fmt.h.d/fmt.c | 12 | ||||
-rw-r--r-- | src/u8c/fmt.h.d/print.c | 8 | ||||
-rw-r--r-- | src/u8c/fmt.h.d/println.c | 22 | ||||
-rw-r--r-- | src/u8c/fmt.h.d/setfmt.c | 14 | ||||
-rw-r--r-- | src/u8c/fmt.h.d/vfmt.c | 13 | ||||
-rw-r--r-- | src/u8c/fmt.h.d/vprint.c | 30 | ||||
-rw-r--r-- | src/u8c/is.h.d/isalpha.c | 139 | ||||
-rw-r--r-- | src/u8c/is.h.d/iscntrl.c | 63 | ||||
-rw-r--r-- | src/u8c/main.h.d/abrtfn.c | 2 | ||||
-rw-r--r-- | src/u8c/main.h.d/end.c | 13 | ||||
-rw-r--r-- | src/u8c/main.h.d/init.c | 26 | ||||
-rw-r--r-- | src/u8c/main.h.d/thrdsafe.c | 190 | ||||
-rw-r--r-- | src/u8c/main.h.d/uniblk.c | 517 | ||||
-rw-r--r-- | src/u8c/main.h.d/uninm.c | 2582 | ||||
-rw-r--r-- | src/u8c/str.h.d/stralloc.c (renamed from src/u8c/u32.h.d/u32alloc.c) | 16 | ||||
-rw-r--r-- | src/u8c/str.h.d/strcat.c (renamed from src/u8c/u32.h.d/u32cat.c) | 37 | ||||
-rw-r--r-- | src/u8c/str.h.d/strcmp.c (renamed from src/u8c/u32.h.d/u32cmp.c) | 30 | ||||
-rw-r--r-- | src/u8c/str.h.d/strcp.c (renamed from src/u8c/u32.h.d/u32cp.c) | 34 | ||||
-rw-r--r-- | src/u8c/str.h.d/strfndchr.c (renamed from src/u8c/u32.h.d/u32fndchr.c) | 29 | ||||
-rw-r--r-- | src/u8c/str.h.d/strfndpat.c (renamed from src/u8c/u32.h.d/u32fndpat.c) | 36 | ||||
-rw-r--r-- | src/u8c/str.h.d/strfree.c (renamed from src/u8c/u32.h.d/u32free.c) | 12 | ||||
-rw-r--r-- | src/u8c/str.h.d/strins.c | 38 | ||||
-rw-r--r-- | src/u8c/str.h.d/strsubstr.c (renamed from src/u8c/u32.h.d/u32substr.c) | 29 | ||||
-rw-r--r-- | src/u8c/str.h.d/strsz.c | 35 | ||||
-rw-r--r-- | src/u8c/u16.h.d/u16alloc.c | 14 | ||||
-rw-r--r-- | src/u8c/u16.h.d/u16free.c | 10 | ||||
-rw-r--r-- | src/u8c/u32.h.d/u32ins.c | 45 | ||||
-rw-r--r-- | src/u8c/u8.h.d/u8alloc.c | 14 | ||||
-rw-r--r-- | src/u8c/u8.h.d/u8dec.c | 41 | ||||
-rw-r--r-- | src/u8c/u8.h.d/u8enc.c | 50 | ||||
-rw-r--r-- | src/u8c/u8.h.d/u8free.c | 10 |
44 files changed, 4274 insertions, 550 deletions
diff --git a/src/u8c/is.h.d/isalnum.c b/src/u8c/chk.h.d/isalnum.c index 13834c3..2c8115b 100644 --- a/src/u8c/is.h.d/isalnum.c +++ b/src/u8c/chk.h.d/isalnum.c @@ -13,20 +13,14 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/is.h> -bool u8c_isalnum(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); - uint_least8_t res = UINT8_C(0x0); - u8c_isalpha(&res,_chr); - if(res) { - *_res = res; - return false; - } - u8c_isdigit(&res,_chr); - *_res = res; - return false; +# include <u8c/chk.h> +struct u8c_isalnum_tuple u8c_isalnum(char32_t const _chr) { + struct u8c_isalnum_tuple ret = { + .stat = false, + }; + ret.res = u8c_isalpha(_chr).res || u8c_isdigit(_chr).res; + return ret; } diff --git a/src/u8c/chk.h.d/isalpha.c b/src/u8c/chk.h.d/isalpha.c new file mode 100644 index 0000000..2631095 --- /dev/null +++ b/src/u8c/chk.h.d/isalpha.c @@ -0,0 +1,109 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <u8c/chk.h> +struct u8c_isalpha_tuple u8c_isalpha(char32_t const _chr) { + struct u8c_isalpha_tuple ret = { + .stat = false, + }; + ret.res = u8c_islower(_chr).res || u8c_isupper(_chr).res; + if(ret.res) { + return ret; + } + switch(_chr) { + case U'\u0297': /* LATIN LETTER GLOTTAL STOP */ + case U'\u16A0': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A1': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A2': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A3': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A4': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A5': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A6': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A7': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A8': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16A9': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AA': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AB': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AC': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AD': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AE': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16AF': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B0': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B1': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B2': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B3': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B4': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B5': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B6': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B7': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B8': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16B9': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BA': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BB': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BC': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BD': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BE': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16BF': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C0': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C1': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C2': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C3': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C4': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C5': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C6': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C7': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C8': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16C9': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CA': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CB': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CC': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CD': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CE': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16CF': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D0': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D1': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D2': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D3': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D4': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D5': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D6': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D7': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D8': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16D9': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DA': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DB': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DC': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DD': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DE': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16DF': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E0': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E1': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E2': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E3': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E4': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E5': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E6': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E7': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E8': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16E9': /* RUNIC LETTER FEHU FEOH FE F */ + case U'\u16EA': /* RUNIC LETTER FEHU FEOH FE F */ + ret.res = true; + break; + } + return ret; +} diff --git a/src/u8c/chk.h.d/iscntrl.c b/src/u8c/chk.h.d/iscntrl.c new file mode 100644 index 0000000..c8532fc --- /dev/null +++ b/src/u8c/chk.h.d/iscntrl.c @@ -0,0 +1,104 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <u8c/chk.h> +struct u8c_iscntrl_tuple u8c_iscntrl(char32_t const _chr) { + struct u8c_iscntrl_tuple ret = { + .stat = false, + }; + switch(_chr) { + default: + ret.res = false; + break; + case U'\x0': /* NULL */ + case U'\x1': /* START OF HEADING */ + case U'\x2': /* START OF TEXT */ + case U'\x3': /* END OF TEXT */ + case U'\x4': /* END OF TRANSMISSION */ + case U'\x5': /* ENQUIRY */ + case U'\x6': /* ACKNOWLEDGE */ + case U'\a': /* BELL */ + case U'\b': /* BACKSPACE */ + case U'\t': /* HORIZONTAL TABULATION */ + case U'\n': /* NEW LINE */ + case U'\v': /* VERTICAL TABULATION */ + case U'\f': /* FORM FEED */ + case U'\r': /* CARRIAGE RETURN */ + case U'\xE': /* SHIFT OUT */ + case U'\xF': /* SHIFT IN */ + case U'\x10': /* DATA LINK ESCAPE */ + case U'\x11': /* DEVICE CONTROL ONE */ + case U'\x12': /* DEVICE CONTROL TWO */ + case U'\x13': /* DEVICE CONTROL THREE */ + case U'\x14': /* DEVICE CONTROL FOUR */ + case U'\x15': /* NEGATIVE ACKNOWLEDGE */ + case U'\x16': /* SYNCHRONOUS IDLE */ + case U'\x17': /* END OF TRANSMISSION BLOCK */ + case U'\x18': /* CANCEL */ + case U'\x19': /* END OF MEDIUM */ + case U'\x1A': /* SUBSTITUTE */ + case U'\x1B': /* ESCAPE */ + case U'\x1C': /* FILE SEPERATOR */ + case U'\x1D': /* GROUP SEPERATOR */ + case U'\x1E': /* RECORD SEPERATOR */ + case U'\x1F': /* UNIT SEPERATOR */ + case U'\x7F': /* DELETE */ + case U'\x80': /* <CONTROL> */ + case U'\x81': /* <CONTROL */ + case U'\x82': /* BREAK PERMITTED HERE */ + case U'\x83': /* NO BREAK HERE */ + case U'\x84': /* <CONTROL> */ + case U'\x85': /* NEXT LINE */ + case U'\x86': /* START OF SELECTED AREA */ + case U'\x87': /* END OF SELECTED AREA */ + case U'\x88': /* CHARACTER TABULATION SET */ + case U'\x89': /* CHARACTER TABULATION SET WITH JUSTIFICATION */ + case U'\x8A': /* LINE TABULATION SET */ + case U'\x8B': /* PARTIAL LINE FORWARD */ + case U'\x8C': /* PARTIAL LINE BACKWARD */ + case U'\x8D': /* REVERSE LINE FEED */ + case U'\x8E': /* SINGLE SHIFT TWO */ + case U'\x8F': /* SINGLE SHIFT THREE */ + case U'\x90': /* DEVICE CONTROL STRING */ + case U'\x91': /* PRIVATE USE ONE */ + case U'\x92': /* PRIVATE USE TWO */ + case U'\x93': /* SET TRANSMIT STATE */ + case U'\x94': /* CANCEL CHARACTER */ + case U'\x95': /* MESSAGE WAITING */ + case U'\x96': /* START OF GUARDED AREA */ + case U'\x97': /* END OF GUARDED AREA */ + case U'\x98': /* START OF STRING */ + case U'\x99': /* <CONTROL> */ + case U'\x9A': /* SINGLE CHARACTER INTRODUCER */ + case U'\x9B': /* CONTROL SEQUENCE INTRODUCER */ + case U'\x9C': /* STRING TERMINATOR */ + case U'\x9D': /* OPERATING SYSTEM COMMAND */ + case U'\x9E': /* PRIVACY MESSAGE */ + case U'\x9F': /* APPLICATION PROGRAM COMMAND */ + case U'\xA0': /* NO-BREAK SPACE */ + case U'\u2028': /* LINE SEPERATOR */ + case U'\u2029': /* PARAGRAPH SEPERATOR */ + case U'\u202D': /* LEFT-TO-RIGHT OVERRIDE */ + case U'\u202E': /* RIGHT-TO-LEFT OVERRIDE */ + case U'\u2068': /* FIRST STRONG ISOLATE */ + case U'\u2069': /* POP DIRECTIONAL ISOLATE */ + ret.res = true; + break; + } + return ret; +} diff --git a/src/u8c/is.h.d/isdigit.c b/src/u8c/chk.h.d/isdigit.c index 8b799d9..61665cf 100644 --- a/src/u8c/is.h.d/isdigit.c +++ b/src/u8c/chk.h.d/isdigit.c @@ -13,16 +13,17 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/is.h> -bool u8c_isdigit(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); +# include <u8c/chk.h> +struct u8c_isdigit_tuple u8c_isdigit(char32_t const _chr) { + struct u8c_isdigit_tuple ret = { + .stat = false, + }; switch(_chr) { default: - *_res = UINT8_C(0x0); + ret.res = false; break; case U'0': /* DIGIT ZERO */ case U'1': /* DIGIT ONE */ @@ -36,8 +37,8 @@ bool u8c_isdigit(uint_least8_t * const _res,char32_t const _chr) { case U'9': /* DIGIT NINE */ case U'\u218A': /* TURNED DIGIT TWO */ case U'\u218B': /* TURNED DIGIT THREE */ - *_res = UINT8_C(0x1); + ret.res = true; break; } - return false; + return ret; } diff --git a/src/u8c/chk.h.d/islower.c b/src/u8c/chk.h.d/islower.c new file mode 100644 index 0000000..0cb3ea1 --- /dev/null +++ b/src/u8c/chk.h.d/islower.c @@ -0,0 +1,187 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <u8c/chk.h> +struct u8c_islower_tuple u8c_islower(char32_t const _chr) { + struct u8c_islower_tuple ret = { + .stat = false, + }; + switch(_chr) { + default: + ret.res = false; + break; + case U'a': /* LATIN SMALL LETTER A */ + case U'b': /* LATIN SMALL LETTER B */ + case U'c': /* LATIN SMALL LETTER C */ + case U'd': /* LATIN SMALL LETTER D */ + case U'e': /* LATIN SMALL LETTER E */ + case U'f': /* LATIN SMALL LETTER F */ + case U'g': /* LATIN SMALL LETTER G */ + case U'h': /* LATIN SMALL LETTER H */ + case U'i': /* LATIN SMALL LETTER I */ + case U'j': /* LATIN SMALL LETTER J */ + case U'k': /* LATIN SMALL LETTER K */ + case U'l': /* LATIN SMALL LETTER L */ + case U'm': /* LATIN SMALL LETTER M */ + case U'n': /* LATIN SMALL LETTER N */ + case U'o': /* LATIN SMALL LETTER O */ + case U'p': /* LATIN SMALL LETTER P */ + case U'q': /* LATIN SMALL LETTER Q */ + case U'r': /* LATIN SMALL LETTER R */ + case U's': /* LATIN SMALL LETTER S */ + case U't': /* LATIN SMALL LETTER T */ + case U'u': /* LATIN SMALL LETTER U */ + case U'v': /* LATIN SMALL LETTER V */ + case U'w': /* LATIN SMALL LETTER W */ + case U'x': /* LATIN SMALL LETTER X */ + case U'y': /* LATIN SMALL LETTER Y */ + case U'z': /* LATIN SMALL LETTER Z */ + case U'\u00DF': /* LATIN SMALL LETTER SHARP S */ + case U'\u00E0': /* LATIN SMALL LETTER A WITH GRAVE */ + case U'\u00E1': /* LATIN SMALL LETTER A WITH ACUTE */ + case U'\u00E2': /* LATIN SMALL LETTER A WITH CIRCUMFLEX */ + case U'\u00E3': /* LATIN SMALL LETTER A WITH TILDE */ + case U'\u00E4': /* LATIN SMALL LETTER A WITH DIAERESIS */ + case U'\u00E5': /* LATIN SMALL LETTER A WITH RING ABOVE */ + case U'\u00E6': /* LATIN SMALL LETTER AE */ + case U'\u00E7': /* LATIN SMALL LETTER C WITH CEDILLA */ + case U'\u00E8': /* LATIN SMALL LETTER E WITH GRAVE */ + case U'\u00E9': /* LATIN SMALL LETTER E WITH ACUTE */ + case U'\u00EA': /* LATIN SMALL LETTER E WITH CIRCUMFLEX */ + case U'\u00EB': /* LATIN SMALL LETTER E WITH DIAERESIS */ + case U'\u00EC': /* LATIN SMALL LETTER I WITH GRAVE */ + case U'\u00ED': /* LATIN SMALL LETTER I WITH ACUTE */ + case U'\u00EE': /* LATIN SMALL LETTER I WITH CIRCUMFLEX */ + case U'\u00EF': /* LATIN SMALL LETTER I WITH DIAERESIS */ + case U'\u00F0': /* LATIN SMALL LETTER ETH */ + case U'\u00F1': /* LATIN SMALL LETTER N WITH TILDE */ + case U'\u00F2': /* LATIN SMALL LETTER O WITH GRAVE */ + case U'\u00F3': /* LATIN SMALL LETTER O WITH ACUTE */ + case U'\u00F4': /* LATIN SMALL LETTER O WITH CIRCUMFLEX */ + case U'\u00F5': /* LATIN SMALL LETTER O WITH TILDE */ + case U'\u00F6': /* LATIN SMALL LETTER O WITH DIAERESIS */ + case U'\u00F8': /* LATIN SMALL LETTER O WITH STROKE */ + case U'\u00F9': /* LATIN SMALL LETTER U WITH GRAVE */ + case U'\u00FA': /* LATIN SMALL LETTER U WITH ACUTE */ + case U'\u00FB': /* LATIN SMALL LETTER U WITH CIRCUMFLEX */ + case U'\u00FC': /* U WITH TWO DOTS */ + case U'\u00FD': /* LATIN SMALL LETTER Y WITH ACUTE */ + case U'\u00FE': /* LATIN SMALL LETTER THORN */ + case U'\u00FF': /* LATIN SMALL LETTER Y WITH DIAERESIS */ + case U'\u0105': /* LATIN SMALL LETTER A WITH OGONEK */ + case U'\u0107': /* LATIN SMALL LETTER C WITH ACUTE */ + case U'\u010D': /* LATIN SMALL LETTER C WITH CARON */ + case U'\u010F': /* LATIN SMALL LETTER D WITH CARON */ + case U'\u0119': /* LATIN SMALL LETTER E WITH OGONEK */ + case U'\u011B': /* LATIN SMALL LETTER E WITH CARON */ + case U'\u011F': /* LATIN SMALL LETTER G WITH BREVE */ + case U'\u0131': /* LATIN SMALL LETTER DOTLESS I */ + case U'\u0133': /* LATIN SMALL LIGATURE LJ */ + case U'\u0138': /* LATIN SMALL LETTER KRA */ + case U'\u0142': /* LATIN SMALL LETTER L WITH STROKE */ + case U'\u0144': /* LATIN SMALL LETTER N WITH ACUTE */ + case U'\u0148': /* LATIN SMALL LETTER N WITH CARON */ + case U'\u014B': /* LATIN SMALL LETTER ENG */ + case U'\u0153': /* LATIN SMALL LIGATURE OE */ + case U'\u0159': /* LATIN SMALL LETTER R WITH CARON */ + case U'\u015B': /* LATIN SMALL LETTER S WITH ACUTE */ + case U'\u015F': /* LATIN SMALL LETTER S WITH CEDILLA */ + case U'\u0161': /* LATIN SMALL LETTER S WITH CARON */ + case U'\u0165': /* LATIN SMALL LETTER T WITH CARON */ + case U'\u016F': /* LATIN SMALL LETTER U WITH RING ABOVE */ + case U'\u017A': /* LATIN SMALL LETTER Z WITH ACUTE */ + case U'\u017C': /* LATIN SMALL LETTER Z WITH DOT ABOVE */ + case U'\u017E': /* LATIN SMALL LETTER Z WITH CARON */ + case U'\u01BF': /* LATIN LETTER WYNN */ + case U'\u01DD': /* LATIN SMALL LETTER TURNED E */ + case U'\u021D': /* LATIN SMALL LETTER YOGH */ + case U'\u0242': /* LATIN SMALL LETTER GLOTTAL STOP */ + case U'\u0250': /* LATIN SMALL LETTER TURNED A */ + case U'\u0251': /* LATIN SMALL LETTER ALPHA */ + case U'\u0252': /* LATIN SMALL LETTER TURNED ALPHA */ + case U'\u0253': /* LATIN SMALL LETTER B WITH HOOk */ + case U'\u0254': /* LATIN SMALL LETTER OPEN O */ + case U'\u0255': /* LATIN SMALL LETTER C WITH CURL */ + case U'\u0256': /* LATIN SMALL LETTER D WITH TAIL */ + case U'\u0257': /* LATIN SMALL LETTER D WITH HOOk */ + case U'\u0258': /* LATIN SMALL LETTER REVERSED E */ + case U'\u0259': /* LATIN SMALL LETTER SCHWA */ + case U'\u025A': /* LATIN SMALL LETTER SCHWA WITH HOOK */ + case U'\u025B': /* LATIN SMALL LETTER OPEN E */ + case U'\u025C': /* LATIN SMALL LETTER REVERSED OPEN E */ + case U'\u025D': /* LATIN SMALL LETTER REVERSED OPEN E WITH HOOK */ + case U'\u025E': /* LATIN SMALL LETTER CLOSED REVERSED OPEN E */ + case U'\u025F': /* LATIN SMALL LETTER DOTLESS J WITH STROKE */ + case U'\u0260': /* LATIN SMALL LETTER G WITH HOOK */ + case U'\u0261': /* LATIN SMALL LETTER SCRIPT G */ + case U'\u0262': /* LATIN LETTER SMALL CAPITAL G */ + case U'\u0263': /* LATIN SMALL LETTER GAMMA */ + case U'\u0264': /* LATIN SMALL LETTER RAMS HORN */ + case U'\u0265': /* LATIN SMALL LETTER TURNED H */ + case U'\u0266': /* LATIN SMALL LETTER H WITH HOOK */ + case U'\u0267': /* LATIN SMALL LETTER HENG WITH HOOK */ + case U'\u0268': /* LATIN SMALL LETTER I WITH STROKE */ + case U'\u0269': /* LATIN SMALL LETTER IOTA */ + case U'\u026A': /* LATIN LETTER SMALL CAPITAL I */ + case U'\u026B': /* LATIN SMALL LETTER L WITH MIDDLE TILDE */ + case U'\u026C': /* LATIN SMALL LETTER L WITH BELT */ + case U'\u026D': /* LATIN SMALL LETTER L WITH RETROFLEX HOOK */ + case U'\u026E': /* LATIN SMALL LETTER LEZH */ + case U'\u026F': /* LATIN SMALL LETTER TURNED M */ + case U'\u0270': /* LATIN SMALL LETTER TURNED M WITH LONG LEG */ + case U'\u0271': /* LATIN SMALL LETTER M WITH HOOK */ + case U'\u0272': /* LATIN SMALL LETTER N WITH LEFT HOOK */ + case U'\u0273': /* LATIN SMALL LETTER N WITH RETROFLEX HOOK */ + case U'\u0283': /* LATIN SMALL LETTER ESH */ + case U'\u028A': /* LATIN SMALL LETTER UPSILON */ + case U'\u028B': /* LATIN SMALL LETTER V WITH HOOK */ + case U'\u0292': /* LATIN SMALL LETTER EZH */ + case U'\u0294': /* LATIN SMALL LETTER GLOTTAL STOP */ + case U'\u03B1': /* GREEK SMALL LETTER ALPHA */ + case U'\u03B2': /* GREEK SMALL LETTER BETA */ + case U'\u03B3': /* GREEK SMALL LETTER GAMMA */ + case U'\u03B4': /* GREEK SMALL LETTER DELTA */ + case U'\u03B5': /* GREEK SMALL LETTER EPSILON */ + case U'\u03B6': /* GREEK SMALL LETTER ZETA */ + case U'\u03B7': /* GREEK SMALL LETTER ETA */ + case U'\u03B8': /* GREEK SMALL LETTER THETA */ + case U'\u03B9': /* GREEK SMALL LETTER IOTA */ + case U'\u03BA': /* GREEK SMALL LETTER KAPPA */ + case U'\u03BB': /* GREEK SMALL LETTER LAMBDA */ + case U'\u03BC': /* GREEK SMALL LETTER MU */ + case U'\u03BD': /* GREEK SMALL LETTER NU */ + case U'\u03BE': /* GREEK SMALL LETTER XI */ + case U'\u03BF': /* GREEK SMALL LETTER OMICRON */ + case U'\u03C0': /* GREEK SMALL LETTER PI */ + case U'\u03C1': /* GREEK SMALL LETTER RHO */ + case U'\u03C2': /* GREEK SMALL LETTER FINAL SIGMA */ + case U'\u03C3': /* GREEK SMALL LETTER SIGMA */ + case U'\u03C4': /* GREEK SMALL LETTER TAU */ + case U'\u03C5': /* GREEK SMALL LETTER UPSILON */ + case U'\u03C6': /* GREEK SMALL LETTER PHI */ + case U'\u03C7': /* GREEK SMALL LETTER CHI */ + case U'\u03C8': /* GREEK SMALL LETTER PSI */ + case U'\u03C9': /* GREEK SMALL LETTER OMEGA */ + case U'\u1D79': /* LATIN SMALL LETTER INSULAR G */ + case U'\uA7B7': /* LATIN SMALL LETTER OMEGA */ + case U'\uFB00': /* LATIN SMALL LIGATURE FF */ + ret.res = true; + break; + } + return ret; +} diff --git a/src/u8c/is.h.d/ispunct.c b/src/u8c/chk.h.d/ispunct.c index f6b041f..2d2a276 100644 --- a/src/u8c/is.h.d/ispunct.c +++ b/src/u8c/chk.h.d/ispunct.c @@ -13,16 +13,17 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/is.h> -bool u8c_ispunct(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); +# include <u8c/chk.h> +struct u8c_ispunct_tuple u8c_ispunct(char32_t const _chr) { + struct u8c_ispunct_tuple ret = { + .stat = false, + }; switch(_chr) { default: - *_res = UINT8_C(0x0); + ret.res = false; break; case U'!': /* EXCLAMATION MARK */ case U'\"': /* QUOTATION MARK */ @@ -83,6 +84,18 @@ bool u8c_ispunct(uint_least8_t * const _res,char32_t const _chr) { case U'\u201D': /* RIGHT DOUBLE QUOTATION MARK */ case U'\u2026': /* HORIZONTAL ELLIPSIS */ case U'\u2030': /* PER MILLE SIGN */ + case U'\u2031': /* PER TEN THOUSAND SIGN */ + case U'\u2032': /* PRIME */ + case U'\u2033': /* DOUBLE PRIME */ + case U'\u2034': /* TRIPLE PRIME */ + case U'\u2035': /* REVERSED PRIME */ + case U'\u2036': /* REVERSED DOUBLE PRIME */ + case U'\u2037': /* REVERSED TRIPLE PRIME */ + case U'\u203C': /* DOUBLE EXCLAMATION MARK */ + case U'\u203D': /* INTERROBANG */ + case U'\u2047': /* DOUBLE QUOTATION MARK */ + case U'\u2048': /* QUESTION EXCLAMATION MARK */ + case U'\u2049': /* EXCLAMATION QUESTION MARK */ case U'\u20A3': /* FRENCH FRANC SIGN */ case U'\u20A4': /* LIRA SIGN */ case U'\u20A8': /* RUPEE SIGN */ @@ -145,8 +158,8 @@ bool u8c_ispunct(uint_least8_t * const _res,char32_t const _chr) { case U'\U0001F16D': /* CIRCLED CC */ case U'\U0001F16E': /* CIRCLED C WITH OVERLAID BACKSLASH */ case U'\U0001F16F': /* CIRCLED HUMAN FIGURE */ - *_res = UINT8_C(0x1); + ret.res = true; break; } - return false; + return ret; } diff --git a/src/u8c/is.h.d/isspace.c b/src/u8c/chk.h.d/isspace.c index 9473476..478e7a7 100644 --- a/src/u8c/is.h.d/isspace.c +++ b/src/u8c/chk.h.d/isspace.c @@ -13,16 +13,17 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/is.h> -bool u8c_isspace(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); +# include <u8c/chk.h> +struct u8c_isspace_tuple u8c_isspace(char32_t const _chr) { + struct u8c_isspace_tuple ret = { + .stat = false, + }; switch(_chr) { default: - *_res = UINT8_C(0x0); + ret.res = false; break; case U'\t': /* HORIZONTAL TABULATION */ case U'\n': /* NEW LINE */ @@ -30,8 +31,8 @@ bool u8c_isspace(uint_least8_t * const _res,char32_t const _chr) { case U'\f': /* FORM FEED */ case U'\r': /* CARRIAGE RETURN */ case U' ': /* SPACE */ - *_res = UINT8_C(0x1); + ret.res = true; break; } - return false; + return ret; } diff --git a/src/u8c/u32.h.d/u32sz.c b/src/u8c/chk.h.d/issurro.c index deb1ecd..e6873cd 100644 --- a/src/u8c/u32.h.d/u32sz.c +++ b/src/u8c/chk.h.d/issurro.c @@ -13,19 +13,18 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/SIZE_C.h> -# include <u8c/u32.h> -# include <uchar.h> -bool u8c_u32sz(size_t * const _sz,char32_t const * const _in) { - assert(_sz != NULL); - size_t sz = SIZE_C(0x0); - if(u8c_u32fndchr(&sz,_in,UINT8_C(0x0))) { - return true; +# include <u8c/chk.h> +struct u8c_issurro_tuple u8c_issurro(char32_t const _chr) { + struct u8c_issurro_tuple ret = { + .stat = false, + }; + bool res = false; + if(_chr >= U'\xD800' && _chr <= U'\xDFFF') { + res = true; } - *_sz = sz; - return true; + ret.res = res; + return ret; } diff --git a/src/u8c/chk.h.d/isupper.c b/src/u8c/chk.h.d/isupper.c new file mode 100644 index 0000000..8c5e2ab --- /dev/null +++ b/src/u8c/chk.h.d/isupper.c @@ -0,0 +1,147 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <u8c/chk.h> +struct u8c_isupper_tuple u8c_isupper(char32_t const _chr) { + struct u8c_isupper_tuple ret = { + .stat = false, + }; + switch(_chr) { + default: + ret.res = false; + break; + case U'A': /* LATIN CAPITAL LETTER A */ + case U'B': /* LATIN CAPITAL LETTER B */ + case U'C': /* LATIN CAPITAL LETTER C */ + case U'D': /* LATIN CAPITAL LETTER D */ + case U'E': /* LATIN CAPITAL LETTER E */ + case U'F': /* LATIN CAPITAL LETTER F */ + case U'G': /* LATIN CAPITAL LETTER G */ + case U'H': /* LATIN CAPITAL LETTER H */ + case U'I': /* LATIN CAPITAL LETTER I */ + case U'J': /* LATIN CAPITAL LETTER J */ + case U'K': /* LATIN CAPITAL LETTER K */ + case U'L': /* LATIN CAPITAL LETTER L */ + case U'M': /* LATIN CAPITAL LETTER M */ + case U'N': /* LATIN CAPITAL LETTER N */ + case U'O': /* LATIN CAPITAL LETTER O */ + case U'P': /* LATIN CAPITAL LETTER P */ + case U'Q': /* LATIN CAPITAL LETTER Q */ + case U'R': /* LATIN CAPITAL LETTER R */ + case U'S': /* LATIN CAPITAL LETTER S */ + case U'T': /* LATIN CAPITAL LETTER T */ + case U'U': /* LATIN CAPITAL LETTER U */ + case U'V': /* LATIN CAPITAL LETTER V */ + case U'X': /* LATIN CAPITAL LETTER Y */ + case U'W': /* LATIN CAPITAL LETTER X */ + case U'Y': /* LATIN CAPITAL LETTER Y */ + case U'Z': /* LATIN CAPITAL LETTER Z */ + case U'\u00C0': /* LATIN CAPITAL LETTER A WITH GRAVE */ + case U'\u00C1': /* LATIN CAPITAL LETTER A WITH ACUTE */ + case U'\u00C2': /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ + case U'\u00C3': /* LATIN CAPITAL LETTER A WITH TILDE */ + case U'\u00C4': /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + case U'\u00C5': /* LATIN CAPITAL LETTER A WITH RING ABOVE */ + case U'\u00C6': /* LATIN CAPITAL LETTER AE */ + case U'\u00C7': /* LATIN CAPITAL LETTER C WITH CEDILLA */ + case U'\u00C8': /* LATIN CAPITAL LETTER E WITH GRAVE */ + case U'\u00C9': /* LATIN CAPITAL LETTER E WITH ACUTE */ + case U'\u00CA': /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */ + case U'\u00CB': /* LATIN CAPITAL LETTER E WITH DIAERESIS */ + case U'\u00CC': /* LATIN CAPITAL LETTER I WITH GRAVE */ + case U'\u00CD': /* LATIN CAPITAL LETTER I WITH ACUTE */ + case U'\u00CE': /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */ + case U'\u00CF': /* LATIN CAPITAL LETTER I WITH DIAERESIS */ + case U'\u00D0': /* LATIN CAPITAL LETTER ETH */ + case U'\u00D1': /* LATIN CAPITAL LETTER N WITH TILDE */ + case U'\u00D2': /* LATIN CAPITAL LETTER O WITH GRAVE */ + case U'\u00D3': /* LATIN CAPITAL LETTER O WITH ACUTE */ + case U'\u00D4': /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */ + case U'\u00D5': /* LATIN CAPITAL LETTER O WITH TILDE */ + case U'\u00D6': /* LATIN CAPITAL LETTER O WITH DIAERESIS */ + case U'\u00D8': /* LATIN CAPITAL LETTER O WITH STROKE */ + case U'\u00D9': /* LATIN CAPITAL LETTER U WITH GRAVE */ + case U'\u00DA': /* LATIN CAPITAL LETTER U WITH STROKE */ + case U'\u00DB': /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */ + case U'\u00DC': /* LATIN CAPITAL LETTER U WITH DIAERESIS */ + case U'\u00DD': /* LATIN CAPITAL LETTER Y WITH ACUTE */ + case U'\u00DE': /* LATIN CAPITAL LETTER THORN */ + case U'\u0100': /* LATIN CAPITAL LETTER A WITH MACRON */ + case U'\u0102': /* LATIN CAPITAL LETTER A WITH BREVE */ + case U'\u0104': /* LATIN CAPITAL LETTER A WITH OGONEK */ + case U'\u0106': /* LATIN CAPITAL LETTER C WITH ACUTE */ + case U'\u0108': /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX */ + case U'\u010A': /* LATIN CAPITAL LETTER C WITH DOT ABOVE */ + case U'\u010C': /* LATIN CAPITAL LETTER C WITH CARON */ + case U'\u010E': /* LATIN CAPITAL LETTER D WITH CARON */ + case U'\u0110': /* LATIN CAPITAL LETTER D WITH STROKE */ + case U'\u0112': /* LATIN CAPITAL LETTER E WITH MACRON */ + case U'\u0114': /* LATIN CAPITAL LETTER E WITH BREVE */ + case U'\u0116': /* LATIN CAPITAL LETTER E WITH DOT ABOVE */ + case U'\u0118': /* LATIN CAPITAL LETTER E WITH OGONEK */ + case U'\u011A': /* LATIN CAPITAL LETTER E WITH CARON */ + case U'\u011C': /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX */ + case U'\u014A': /* LATIN CAPITAL LETTER ENG */ + case U'\u0152': /* LATIN CAPITAL LIGATURE OE */ + case U'\u0186': /* LATIN CAPITAL LETTER OPEN O */ + case U'\u018E': /* LATIN CAPITAL LETTER REVERSED E */ + case U'\u018F': /* LATIN CAPITAL LETTER SCHWA */ + case U'\u0190': /* LATIN CAPITAL LETTER OPEN E */ + case U'\u0194': /* LATIN CAPITAL LETTER GAMMA */ + case U'\u0196': /* LATIN CAPITAL LETTER IOTA */ + case U'\u01A9': /* LATIN CAPITAL LETTER ESH */ + case U'\u01B1': /* LATIN CAPITAL LETTER UPSILON */ + case U'\u01B2': /* LATIN CAPITAL LETTER V WITH HOOk */ + case U'\u01B7': /* LATIN CAPITAL LETTER EZH */ + case U'\u01F7': /* LATIN CAPITAL LETTER WYNN */ + case U'\u021C': /* LATIN CAPITAL LETTER YOGH */ + case U'\u0241': /* LATIN CAPITAL LETTER GLOTTAL STOP */ + case U'\u0391': /* GREEK CAPITAL LETTER ALPHA */ + case U'\u0392': /* GREEK CAPITAL LETTER BETA */ + case U'\u0393': /* GREEK CAPITAL LETTER GAMMA */ + case U'\u0394': /* GREEK CAPITAL LETTER DELTA */ + case U'\u0395': /* GREEK CAPITAL LETTER EPSILON */ + case U'\u0396': /* GREEK CAPITAL LETTER ZETA */ + case U'\u0397': /* GREEK CAPITAL LETTER ETA */ + case U'\u0398': /* GREEK CAPITAL LETTER THETA */ + case U'\u0399': /* GREEK CAPITAL LETTER IOTA */ + case U'\u039A': /* GREEK CAPITAL LETTER KAPPA */ + case U'\u039B': /* GREEK CAPITAL LETTER LAMBDA */ + case U'\u039C': /* GREEK CAPITAL LETTER MU */ + case U'\u039D': /* GREEK CAPITAL LETTER NU */ + case U'\u039E': /* GREEK CAPITAL LETTER XI */ + case U'\u039F': /* GREEK CAPITAL LETTER OMICRON */ + case U'\u03A0': /* GREEK CAPITAL LETTER PI */ + case U'\u03A1': /* GREEK CAPITAL LETTER RHO */ + case U'\u03A3': /* GREEK CAPITAL LETTER SIGMA */ + case U'\u03A4': /* GREEK CAPITAL LETTER TAU */ + case U'\u03A5': /* GREEK CAPITAL LETTER UPSILON */ + case U'\u03A6': /* GREEK CAPITAL LETTER PHI */ + case U'\u03A7': /* GREEK CAPITAL LETTER CHI */ + case U'\u03A8': /* GREEK CAPITAL LETTER PSI */ + case U'\u03A9': /* GREEK CAPITAL LETTER OMEGA */ + case U'\u1E9E': /* LATIN CAPITAL LETTER SHARP S */ + case U'\u2C6D': /* LATIN CAPITAL LETTER ALPHA */ + case U'\uA77D': /* LATIN CAPITAL LETTER INSULAR G */ + case U'\uA7B4': /* LATIN CAPITAL LETTER BETA */ + case U'\uA7B6': /* LATIN CAPITAL LETTER OMEGA */ + ret.res = true; + break; + } + return ret; +} diff --git a/src/u8c/is.h.d/isxdigit.c b/src/u8c/chk.h.d/isxdigit.c index 4a59b0d..5100624 100644 --- a/src/u8c/is.h.d/isxdigit.c +++ b/src/u8c/chk.h.d/isxdigit.c @@ -13,16 +13,17 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> -# include <u8c/is.h> -bool u8c_isxdigit(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); +# include <u8c/chk.h> +struct u8c_isxdigit_tuple u8c_isxdigit(char32_t const _chr) { + struct u8c_isxdigit_tuple ret = { + .stat = false, + }; switch(_chr) { default: - *_res = UINT8_C(0x0); + ret.res = false; break; case U'0': /* DIGIT ZERO */ case U'1': /* DIGIT ONE */ @@ -40,8 +41,8 @@ bool u8c_isxdigit(uint_least8_t * const _res,char32_t const _chr) { case U'D': /* LATIN CAPITAL LETTER D */ case U'E': /* LATIN CAPITAL LETTER E */ case U'F': /* LATIN CAPITAL LETTER F */ - *_res = UINT8_C(0x1); + ret.res = true; break; } - return false; + return ret; } diff --git a/src/u8c/err.h.d/geterr.c b/src/u8c/err.h.d/geterr.c index 58f3612..5219348 100644 --- a/src/u8c/err.h.d/geterr.c +++ b/src/u8c/err.h.d/geterr.c @@ -18,14 +18,21 @@ # include <stdint.h> # include <u8c/err.h> # include <u8c/intern.h> -# include <u8c/u32.h> -bool u8c_geterr(size_t * const _sz,char32_t const * * const _out) { - # if defined(u8c_bethrdsafe) - mtx_lock(&u8c_dat.errlock); - # endif - u8c_u32cp(_sz,_out,u8c_dat.err); - # if defined(u8c_bethrdsafe) - mtx_unlock(&u8c_dat.errlock); - # endif - return false; +# include <u8c/str.h> +struct u8c_geterr_tuple u8c_geterr(void) { + struct u8c_geterr_tuple ret = { + .stat = false, + }; + { +# if defined(u8c_bethrdsafe) + mtx_lock(&u8c_dat.errlock); +# endif + struct u8c_strcp_tuple const tuple = u8c_strcp(u8c_dat.err); +# if defined(u8c_bethrdsafe) + mtx_unlock(&u8c_dat.errlock); +# endif + ret.err = tuple.str; + ret.errsz = tuple.strsz; } + return ret; +} diff --git a/src/u8c/err.h.d/regerrhandl.c b/src/u8c/err.h.d/regerrhandl.c index 5e97177..5ac43a5 100644 --- a/src/u8c/err.h.d/regerrhandl.c +++ b/src/u8c/err.h.d/regerrhandl.c @@ -20,12 +20,15 @@ static void u8c_regerrhandl_seterrhandl(enum u8c_errtyp _typ,u8c_errhandltyp _errhandl) { u8c_dat.errhandls[(size_t)_typ] = _errhandl; } -bool u8c_regerrhandl(enum u8c_errtyp _typ,u8c_errhandltyp _errhandl) { +struct u8c_regerrhandl_tuple u8c_regerrhandl(enum u8c_errtyp _typ,u8c_errhandltyp _errhandl) { + struct u8c_regerrhandl_tuple ret = { + .stat = false, + }; # if defined(u8c_bethrdsafe) mtx_lock(&u8c_dat.errhandlslock); # endif if(_typ == u8c_errtyp_all) { - for(register int n = 0x0;n < (int)u8c_errtyp_maxerrtyp;n += 0x1) { + for(register int n = 0x0;n < (int)u8c_errtyp_all;n += 0x1) { u8c_regerrhandl_seterrhandl((enum u8c_errtyp)n,_errhandl); } } @@ -35,5 +38,5 @@ bool u8c_regerrhandl(enum u8c_errtyp _typ,u8c_errhandltyp _errhandl) { # if defined(u8c_bethrdsafe) mtx_unlock(&u8c_dat.errhandlslock); # endif - return false; + return ret; } diff --git a/src/u8c/err.h.d/seterr.c b/src/u8c/err.h.d/seterr.c index 017e45e..89edf19 100644 --- a/src/u8c/err.h.d/seterr.c +++ b/src/u8c/err.h.d/seterr.c @@ -13,25 +13,26 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> # include <u8c/err.h> # include <u8c/fmt.h> # include <u8c/intern.h> -# include <u8c/u32.h> +# include <u8c/str.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_seterr(char32_t const * const _msg,enum u8c_errtyp _typ) { - assert(_msg != NULL); - //u8c_dbgprint(_msg); +struct u8c_seterr_tuple u8c_seterr(enum u8c_errtyp _typ,char32_t const * const restrict _msg) { + struct u8c_seterr_tuple ret = { + .stat = false, + }; + /* u8c_dbgprint(_msg); */ # if defined(u8c_bethrdsafe) mtx_lock(&u8c_dat.errlock); # endif - u8c_u32free(&u8c_dat.err); - u8c_u32cp(NULL,&u8c_dat.err,_msg); + u8c_strfree(u8c_dat.err); + u8c_dat.err = u8c_strcp(_msg).str; # if defined(u8c_bethrdsafe) mtx_unlock(&u8c_dat.errlock); # endif @@ -44,5 +45,5 @@ bool u8c_seterr(char32_t const * const _msg,enum u8c_errtyp _typ) { # if defined(u8c_bethrdsafe) mtx_unlock(&u8c_dat.errhandlslock); # endif - return false; + return ret; } diff --git a/src/u8c/fmt.h.d/fmt.c b/src/u8c/fmt.h.d/fmt.c index 43f1ea4..59feb74 100644 --- a/src/u8c/fmt.h.d/fmt.c +++ b/src/u8c/fmt.h.d/fmt.c @@ -18,11 +18,15 @@ # include <stddef.h> # include <stdint.h> # include <u8c/fmt.h> -# include <u8c/u32.h> -bool u8c_fmt(size_t * const _outsz,char32_t const * * const _out,char32_t const * const _in,...) { +# include <u8c/str.h> +struct u8c_fmt_tuple u8c_fmt(char32_t const * const restrict _in,...) { + struct u8c_fmt_tuple ret; va_list args; va_start(args,_in); - uint_least8_t val = u8c_vfmt(_outsz,_out,_in,args); + struct u8c_vfmt_tuple tuple = u8c_vfmt(_in,args); va_end(args); - return val; + ret.stat = tuple.stat; + ret.str = tuple.str; + ret.strsz = tuple.strsz; + return ret; } diff --git a/src/u8c/fmt.h.d/print.c b/src/u8c/fmt.h.d/print.c index 8785ad8..e2f4802 100644 --- a/src/u8c/fmt.h.d/print.c +++ b/src/u8c/fmt.h.d/print.c @@ -17,10 +17,12 @@ # include <stdbool.h> # include <stdint.h> # include <u8c/fmt.h> -bool u8c_print(FILE * _fp,char32_t const * const _msg,...) { +struct u8c_print_tuple u8c_print(FILE * restrict _fp,char32_t const * const restrict _msg,...) { + struct u8c_print_tuple ret; va_list args; va_start(args,_msg); - uint_least8_t val = u8c_vprint(_fp,_msg,args); + struct u8c_vprint_tuple tuple = u8c_vprint(_fp,_msg,args); va_end(args); - return val; + ret.stat = tuple.stat; + return ret; } diff --git a/src/u8c/fmt.h.d/println.c b/src/u8c/fmt.h.d/println.c index 1a924ad..d73a897 100644 --- a/src/u8c/fmt.h.d/println.c +++ b/src/u8c/fmt.h.d/println.c @@ -13,27 +13,21 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdarg.h> # include <stdbool.h> # include <stdint.h> # include <stdio.h> # include <u8c/fmt.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <uchar.h> -bool u8c_println(FILE * _fp,char32_t const * const _msg,...) { - assert(_fp != NULL); +struct u8c_println_tuple u8c_println(FILE * restrict _fp,char32_t const * const restrict _msg,...) { + struct u8c_println_tuple ret; va_list args; va_start(args,_msg); - char32_t const * msg = NULL; - u8c_u32cat(NULL,&msg,_msg,U"\n"); - { - register bool const val = u8c_vprint(_fp,msg,args); - u8c_u32free(&msg); - if(val) { - return true; - } - } + char32_t const * msg = u8c_strcat(_msg,U"\n").str; + register struct u8c_vprint_tuple const tuple = u8c_vprint(_fp,msg,args); + u8c_strfree(msg); va_end(args); - return false; + ret.stat = tuple.stat; + return ret; } diff --git a/src/u8c/fmt.h.d/setfmt.c b/src/u8c/fmt.h.d/setfmt.c index 7956ea3..4c6d3a3 100644 --- a/src/u8c/fmt.h.d/setfmt.c +++ b/src/u8c/fmt.h.d/setfmt.c @@ -20,15 +20,15 @@ # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_setfmt(unsigned char const _base,unsigned char const _endian) { - uint_least8_t base = _base; - uint_least8_t endian = _endian; +struct u8c_setfmt_tuple u8c_setfmt(uint_least8_t const _base,bool const _endian) { + struct u8c_setfmt_tuple ret = { + .stat = false, + }; + register uint_least8_t base = _base; + register bool endian = _endian; if(_base > UINT8_C(0x20)) { base = UINT8_C(0xC); } - if(_endian > UINT8_C(0x1)) { - endian = UINT8_C(0x0); - } # if defined(u8c_bethrdsafe) mtx_lock(&u8c_dat.fmtlock); # endif @@ -37,5 +37,5 @@ bool u8c_setfmt(unsigned char const _base,unsigned char const _endian) { # if defined(u8c_bethrdsafe) mtx_unlock(&u8c_dat.fmtlock); # endif - return false; + return ret; } diff --git a/src/u8c/fmt.h.d/vfmt.c b/src/u8c/fmt.h.d/vfmt.c index 5b0c1e1..5148784 100644 --- a/src/u8c/fmt.h.d/vfmt.c +++ b/src/u8c/fmt.h.d/vfmt.c @@ -16,11 +16,18 @@ # include <stdarg.h> # include <stdbool.h> # include <u8c/fmt.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <uchar.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_vfmt(size_t * const _sz,char32_t const * * const _out,char32_t const * const _in,[[maybe_unused]] va_list _args) { - return u8c_u32cp(_sz,_out,_in); +struct u8c_vfmt_tuple u8c_vfmt(char32_t const * const restrict _in,[[maybe_unused]] va_list _args) { + struct u8c_vfmt_tuple ret = { + .stat = false, + }; + struct u8c_strcp_tuple const tuple = u8c_strcp(_in); + ret.stat = tuple.stat; + ret.str = tuple.str; + ret.strsz = tuple.strsz; + return ret; } diff --git a/src/u8c/fmt.h.d/vprint.c b/src/u8c/fmt.h.d/vprint.c index 8d824a8..cfcb850 100644 --- a/src/u8c/fmt.h.d/vprint.c +++ b/src/u8c/fmt.h.d/vprint.c @@ -13,7 +13,6 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdarg.h> # include <stdbool.h> # include <stdint.h> @@ -23,20 +22,24 @@ # include <u8c/err.h> # include <u8c/fmt.h> # include <u8c/intern.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <u8c/u8.h> # include <uchar.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_vprint(FILE * _fp,char32_t const * const _msg,va_list _args) { - assert(_msg != NULL); - char32_t const * str0 = NULL; - u8c_vfmt(NULL,&str0,_msg,_args); +struct u8c_vprint_tuple u8c_vprint(FILE * restrict _fp,char32_t const * const restrict _msg,va_list _args) { + struct u8c_vprint_tuple ret = { + .stat = false, + }; + char32_t const * str0 = u8c_vfmt(_msg,_args).str; size_t str1sz = SIZE_C(0x0); unsigned char const * str1 = NULL; - u8c_u8enc(&str1sz,&str1,str0); - assert(str1sz > SIZE_C(0x0)); + { + struct u8c_u8enc_tuple const tuple = u8c_u8enc(str0); + str1 = tuple.u8; + str1sz = tuple.u8sz; + } # if defined(u8c_bethrdsafe) mtx_lock(&u8c_dat.outlock); # endif @@ -46,11 +49,12 @@ bool u8c_vprint(FILE * _fp,char32_t const * const _msg,va_list _args) { mtx_unlock(&u8c_dat.outlock); # endif if(val < str1sz - SIZE_C(0x1)) { - u8c_seterr(U"u8c_vprint: fwrite: Unable to write to stdout.",u8c_errtyp_badio); - return true; + u8c_seterr(u8c_errtyp_badio,U"u8c_vprint: Unable to write to stdout."); + ret.stat = true; + return ret; } } - u8c_u32free(&str0); - u8c_u8free(&str1); - return false; + u8c_strfree(str0); + u8c_u8free(str1); + return ret; } diff --git a/src/u8c/is.h.d/isalpha.c b/src/u8c/is.h.d/isalpha.c deleted file mode 100644 index d5952dc..0000000 --- a/src/u8c/is.h.d/isalpha.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <assert.h> -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/is.h> -bool u8c_isalpha(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); - switch(_chr) { - default: - *_res = UINT8_C(0x0); - break; - case U'a': /* LATIN SMALL LETTER A */ - case U'b': /* LATIN SMALL LETTER B */ - case U'c': /* LATIN SMALL LETTER C */ - case U'd': /* LATIN SMALL LETTER D */ - case U'e': /* LATIN SMALL LETTER E */ - case U'f': /* LATIN SMALL LETTER F */ - case U'g': /* LATIN SMALL LETTER G */ - case U'h': /* LATIN SMALL LETTER H */ - case U'i': /* LATIN SMALL LETTER I */ - case U'j': /* LATIN SMALL LETTER J */ - case U'k': /* LATIN SMALL LETTER K */ - case U'l': /* LATIN SMALL LETTER L */ - case U'm': /* LATIN SMALL LETTER M */ - case U'n': /* LATIN SMALL LETTER N */ - case U'o': /* LATIN SMALL LETTER O */ - case U'p': /* LATIN SMALL LETTER P */ - case U'q': /* LATIN SMALL LETTER Q */ - case U'r': /* LATIN SMALL LETTER R */ - case U's': /* LATIN SMALL LETTER S */ - case U't': /* LATIN SMALL LETTER T */ - case U'u': /* LATIN SMALL LETTER U */ - case U'v': /* LATIN SMALL LETTER V */ - case U'w': /* LATIN SMALL LETTER W */ - case U'x': /* LATIN SMALL LETTER X */ - case U'y': /* LATIN SMALL LETTER Y */ - case U'z': /* LATIN SMALL LETTER Z */ - case U'\u00DF': /* LATIN SMALL LETTER SHARP S */ - case U'\u00E1': /* LATIN SMALL LETTER A WITH ACUTE */ - case U'\u00E4': /* LATIN SMALL LETTER A WITH DIAERESIS */ - case U'\u00E5': /* LATIN SMALL LETTER A WITH RING ABOVE */ - case U'\u00E6': /* LATIN SMALL LETTER AE */ - case U'\u00E7': /* LATIN SMALL LETTER C WITH CEDILLA */ - case U'\u00E9': /* LATIN SMALL LETTER E WITH ACUTE */ - case U'\u00ED': /* LATIN SMALL LETTER I WITH ACUTE */ - case U'\u00F0': /* LATIN SMALL LETTER ETH */ - case U'\u00F3': /* LATIN SMALL LETTER O WITH ACUTE */ - case U'\u00F6': /* LATIN SMALL LETTER O WITH DIAERESIS */ - case U'\u00F8': /* LATIN SMALL LETTER O WITH STROKE */ - case U'\u00FA': /* LATIN SMALL LETTER U WITH ACUTE */ - case U'\u00FC': /* U WITH TWO DOTS */ - case U'\u00FD': /* LATIN SMALL LETTER Y WITH ACUTE */ - case U'\u00FE': /* LATIN SMALL LETTER THORN */ - case U'\u0105': /* LATIN SMALL LETTER A WITH OGONEK */ - case U'\u0107': /* LATIN SMALL LETTER C WITH ACUTE */ - case U'\u010D': /* LATIN SMALL LETTER C WITH CARON */ - case U'\u010F': /* LATIN SMALL LETTER D WITH CARON */ - case U'\u0119': /* LATIN SMALL LETTER E WITH OGONEK */ - case U'\u011B': /* LATIN SMALL LETTER E WITH CARON */ - case U'\u011F': /* LATIN SMALL LETTER G WITH BREVE */ - case U'\u0131': /* LATIN SMALL LETTER DOTLESS I */ - case U'\u0133': /* LATIN SMALL LIGATURE LJ */ - case U'\u0138': /* LATIN SMALL LETTER KRA */ - case U'\u0142': /* LATIN SMALL LETTER L WITH STROKE */ - case U'\u0144': /* LATIN SMALL LETTER N WITH ACUTE */ - case U'\u0148': /* LATIN SMALL LETTER N WITH CARON */ - case U'\u014B': /* LATIN SMALL LETTER ENG */ - case U'\u0153': /* LATIN SMALL LIGATURE OE */ - case U'\u0159': /* LATIN SMALL LETTER R WITH CARON */ - case U'\u015B': /* LATIN SMALL LETTER S WITH ACUTE */ - case U'\u015F': /* LATIN SMALL LETTER S WITH CEDILLA */ - case U'\u0161': /* LATIN SMALL LETTER S WITH CARON */ - case U'\u0165': /* LATIN SMALL LETTER T WITH CARON */ - case U'\u016F': /* LATIN SMALL LETTER U WITH RING ABOVE */ - case U'\u017A': /* LATIN SMALL LETTER Z WITH ACUTE */ - case U'\u017C': /* LATIN SMALL LETTER Z WITH DOT ABOVE */ - case U'\u017E': /* LATIN SMALL LETTER Z WITH CARON */ - case U'\u01BF': /* LATIN LETTER WYNN */ - case U'\u01DD': /* LATIN SMALL LETTER TURNED E */ - case U'\u021D': /* LATIN SMALL LETTER YOGH */ - case U'\u0251': /* LATIN SMALL LETTER ALPHA */ - case U'\u0254': /* LATIN SMALL LETTER OPEN O */ - case U'\u0259': /* LATIN SMALL LETTER SCHWA */ - case U'\u025B': /* LATIN SMALL LETTER OPEN E */ - case U'\u0263': /* LATIN SMALL LETTER GAMMA */ - case U'\u0269': /* LATIN SMALL LETTER IOTA */ - case U'\u0283': /* LATIN SMALL LETTER ESH */ - case U'\u028A': /* LATIN SMALL LETTER UPSILON */ - case U'\u028B': /* LATIN SMALL LETTER V WITH HOOK */ - case U'\u0292': /* LATIN SMALL LETTER EZH */ - case U'\u0294': /* LATIN SMALL LETTER GLOTTAL STOP */ - case U'\u03B1': /* GREEK SMALL LETTER ALPHA */ - case U'\u03B2': /* GREEK SMALL LETTER BETA */ - case U'\u03B3': /* GREEK SMALL LETTER GAMMA */ - case U'\u03B4': /* GREEK SMALL LETTER DELTA */ - case U'\u03B5': /* GREEK SMALL LETTER EPSILON */ - case U'\u03B6': /* GREEK SMALL LETTER ZETA */ - case U'\u03B7': /* GREEK SMALL LETTER ETA */ - case U'\u03B8': /* GREEK SMALL LETTER THETA */ - case U'\u03B9': /* GREEK SMALL LETTER IOTA */ - case U'\u03BA': /* GREEK SMALL LETTER KAPPA */ - case U'\u03BB': /* GREEK SMALL LETTER LAMBDA */ - case U'\u03BC': /* GREEK SMALL LETTER MU */ - case U'\u03BD': /* GREEK SMALL LETTER NU */ - case U'\u03BE': /* GREEK SMALL LETTER XI */ - case U'\u03BF': /* GREEK SMALL LETTER OMICRON */ - case U'\u03C0': /* GREEK SMALL LETTER PI */ - case U'\u03C1': /* GREEK SMALL LETTER RHO */ - case U'\u03C2': /* GREEK SMALL LETTER FINAL SIGMA */ - case U'\u03C3': /* GREEK SMALL LETTER SIGMA */ - case U'\u03C4': /* GREEK SMALL LETTER TAU */ - case U'\u03C5': /* GREEK SMALL LETTER UPSILON */ - case U'\u03C6': /* GREEK SMALL LETTER PHI */ - case U'\u03C7': /* GREEK SMALL LETTER CHI */ - case U'\u03C8': /* GREEK SMALL LETTER PSI */ - case U'\u03C9': /* GREEK SMALL LETTER OMEGA */ - case U'\u1D79': /* LATIN SMALL LETTER INSULAR G */ - case U'\uA7B7': /* LATIN SMALL LETTER OMEGA */ - case U'\uFB00': /* LATIN SMALL LIGATURE FF */ - *_res = UINT8_C(0x1); - break; - } - return false; -} diff --git a/src/u8c/is.h.d/iscntrl.c b/src/u8c/is.h.d/iscntrl.c deleted file mode 100644 index 4dcf543..0000000 --- a/src/u8c/is.h.d/iscntrl.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <assert.h> -# include <stdbool.h> -# include <stddef.h> -# include <stdint.h> -# include <u8c/is.h> -bool u8c_iscntrl(uint_least8_t * const _res,char32_t const _chr) { - assert(_res != NULL); - switch(_chr) { - default: - *_res = UINT8_C(0x0); - break; - case U'\x0': /* NULL */ - case U'\x1': /* START OF HEADING */ - case U'\x2': /* START OF TEXT */ - case U'\x3': /* END OF TEXT */ - case U'\x4': /* END OF TRANSMISSION */ - case U'\x5': /* ENQUIRY */ - case U'\x6': /* ACKNOWLEDGE */ - case U'\a': /* BELL */ - case U'\b': /* BACKSPACE */ - case U'\t': /* HORIZONTAL TABULATION */ - case U'\n': /* NEW LINE */ - case U'\v': /* VERTICAL TABULATION */ - case U'\f': /* FORM FEED */ - case U'\r': /* CARRIAGE RETURN */ - case U'\xE': /* SHIFT OUT */ - case U'\xF': /* SHIFT IN */ - case U'\x10': /* DATA LINK ESCAPE */ - case U'\x11': /* DEVICE CONTROL ONE */ - case U'\x12': /* DEVICE CONTROL TWO */ - case U'\x13': /* DEVICE CONTROL THREE */ - case U'\x14': /* DEVICE CONTROL FOUR */ - case U'\x15': /* NEGATIVE ACKNOWLEDGE */ - case U'\x16': /* SYNCHRONOUS IDLE */ - case U'\x17': /* END OF TRANSMISSION BLOCK */ - case U'\x18': /* CANCEL */ - case U'\x19': /* END OF MEDIUM */ - case U'\x1A': /* SUBSTITUTE */ - case U'\x1B': /* ESCAPE */ - case U'\x1C': /* FILE SEPERATOR */ - case U'\x1D': /* GROUP SEPERATOR */ - case U'\x1E': /* RECORD SEPERATOR */ - case U'\x1F': /* UNIT SEPERATOR */ - *_res = UINT8_C(0x1); - break; - } - return false; -} diff --git a/src/u8c/main.h.d/abrtfn.c b/src/u8c/main.h.d/abrtfn.c index 80a03e3..bab21f2 100644 --- a/src/u8c/main.h.d/abrtfn.c +++ b/src/u8c/main.h.d/abrtfn.c @@ -22,7 +22,7 @@ # include <time.h> # include <u8c/intern.h> # include <u8c/main.h> -noreturn bool u8c_abrtfn(char const * const _fl,long const _ln,char const * const _fn,char const * const _why) { +noreturn void u8c_abrtfn(char const * const restrict _fl,long const _ln,char const * const restrict _fn,char const * const restrict _why) { fprintf(stderr,"u8c: *** Aborted (\"%s\":%li in function \"%s\": \"%s\" @ %" PRIuMAX ") ***\nLibrary diagnostics:\n debug:%s\n status:%" PRIuLEAST8 "\n thread-safe:%s\n version:%" PRIuLEAST64 "\n",_fl,_ln,_fn,_why,(intmax_t)time(NULL),u8c_dbg ? "true" : "false",u8c_dat.stat,u8c_thrdsafe ? "true" : "false",u8c_ver); fprintf(stderr,"Trying to clean up...\n"); u8c_end(); diff --git a/src/u8c/main.h.d/end.c b/src/u8c/main.h.d/end.c index ef8114c..741a961 100644 --- a/src/u8c/main.h.d/end.c +++ b/src/u8c/main.h.d/end.c @@ -19,13 +19,16 @@ # include <u8c/SIZE_C.h> # include <u8c/intern.h> # include <u8c/main.h> -# include <u8c/u32.h> +# include <u8c/str.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_end(void) { +struct u8c_end_tuple u8c_end(void) { + struct u8c_end_tuple ret = { + .stat = false, + }; if(!u8c_dat.stat) { - return false; + return ret; } # if defined(u8c_bethrdsafe) /* Destroy mutexes: */ @@ -35,11 +38,11 @@ bool u8c_end(void) { mtx_destroy(&u8c_dat.outlock); # endif /* Free error message: */ - u8c_u32free(&u8c_dat.err); + u8c_strfree(u8c_dat.err); /* Set default formatting options: */ u8c_dat.fmtbase = UINT8_C(0xC); u8c_dat.fmtendian = UINT8_C(0x0); /* Set status: */ u8c_dat.stat = UINT8_C(0x0); - return false; + return ret; } diff --git a/src/u8c/main.h.d/init.c b/src/u8c/main.h.d/init.c index 0b34577..1762822 100644 --- a/src/u8c/main.h.d/init.c +++ b/src/u8c/main.h.d/init.c @@ -19,37 +19,45 @@ # include <stdbool.h> # include <stddef.h> # include <stdint.h> +# include <u8c/SIZE_C.h> # include <u8c/err.h> # include <u8c/intern.h> # include <u8c/main.h> -# include <u8c/u32.h> +# include <u8c/str.h> # if defined(u8c_bethrdsafe) # include <threads.h> # endif -bool u8c_init() { +struct u8c_init_tuple u8c_init() { + struct u8c_init_tuple ret = { + .stat = false, + }; /* Initialise mutexes: */ # if defined(u8c_bethrdsafe) if(mtx_init(&u8c_dat.errhandlslock,mtx_plain) == thrd_error) { - return true; + ret.stat = true; + return ret; } if(mtx_init(&u8c_dat.errlock,mtx_plain) == thrd_error) { - return true; + ret.stat = true; + return ret; } if(mtx_init(&u8c_dat.fmtlock,mtx_plain) == thrd_error) { - return true; + ret.stat = true; + return ret; } if(mtx_init(&u8c_dat.outlock,mtx_plain) == thrd_error) { - return true; + ret.stat = true; + return ret; } # endif /* Set default error message: */ u8c_dat.err = NULL; - u8c_seterr(U"",u8c_errtyp_deferr); + u8c_seterr(u8c_errtyp_deferr,U""); /* Initialise error handler array: */ - for(register size_t n = SIZE_C(0x0);n < u8c_errtyp_maxerrtyp;n += SIZE_C(0x1)) { + for(register size_t n = SIZE_C(0x0);n < u8c_errtyp_all;n += SIZE_C(0x1)) { u8c_dat.errhandls[n] = NULL; } /* Set status: */ u8c_dat.stat = UINT8_C(0x1); - return false; + return ret; } diff --git a/src/u8c/main.h.d/thrdsafe.c b/src/u8c/main.h.d/thrdsafe.c index 9d8e41a..4e8c40c 100644 --- a/src/u8c/main.h.d/thrdsafe.c +++ b/src/u8c/main.h.d/thrdsafe.c @@ -22,3 +22,193 @@ bool const u8c_thrdsafe = # else false; # endif +/* +1720 173F HANUNOO +1740 175F BUHID +1760 177F TAGBANWA +1780 17FF KHMER +1800 18AF MONGOLIAN +18B0 18FF UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED +1900 194F LIMBU +1950 197F TAI LE +1980 19DF NEW TAI LUE +19E0 19FF KHMER SYMBOLS +1A00 1A1F BUGINESE +1A20 1AAF TAI THAM +1AB0 1AFF COMBINING DIACRITICAL MARKS EXTENDED +1B00 1B7F BALINESE +1B80 1BBF SUNDANESE +1BC0 1BFF BATAK +1C00 1C4F LEPCHA +1C50 1C7F OL CHIKI +1C80 1C8F CYRILLIC EXTENDED C +1C90 1CBF GEORGIAN EXTENDED +1CC0 1CCF SUNDANESE SUPPLEMENT +1CD0 1CFF VEDIC EXTENSIONS +1D00 1D7F PHONETIC EXTENSIONS +1D80 1DBF PHONETIC EXTENSIONS SUPPLEMENT +1DC0 1DFF COMBINING DIACRITICAL MARKS SUPPLEMENT +1E00 1EFF LATIN EXTENDED ADDITIONAL +1F00 1FFF GREEK EXTENDED +2000 206F GENERAL PUNCTUATION +2070 209F SUPERSCRIPTS AND SUBSCRIPTS +20A0 20CF CURRENCY SYMBOLS +20D0 20FF COMBINING DIACRITICAL MARKS FOR SYMBOLS +2100 214F LETTERLIKE SYMBOLS +2150 218F NUMBER FORMS +2190 21FF ARROWS +2200 22FF MATHEMATICAL OPERATORS +2300 23FF MISCELLANEOUS TECHNICAL +2400 243F CONTROL PICTURES +2440 245F OPTICAL CHARACTER RECOGNITION +2460 24FF ENCLOSED ALPHANUMERICS +2500 257F BOX DRAWING +2580 259F BLOCK ELEMENTS +25A0 25FF GEOMETRIC SHAPES +2600 26FF MISCELLANEOUS SYMBOLS +2700 27BF DINGBATS +27C0 27EF MISCELLANEOUS MATHEMATICAL SYMBOLS-A +27F0 27FF SUPPLEMENTAL ARROWS-A +2800 28FF BRAILLE PATTERNS +2900 297F SUPPLEMENTAL ARROWS-B +2980 29FF MISCELLANEOUS MATHEMATICAL SYMBOLS-B +2A00 2AFF SUPPLEMENTAL MATHEMATICAL OPERATORS +2B00 2BFF MISCELLANEOUS SYMBOLS AND ARROWS +2C00 2C5F GLAGOLITIC +2C60 2C7F LATIN EXTENDED-C +2C80 2CFF COPTIC +2D00 2D2F GEORGIAN SUPPLEMENT +2D30 2D7F TIFINAGH +2D80 2DDF ETHIOPIC EXTENDED +2DE0 2DFF CYRILLIC EXTENDED-A +2E00 2E7F SUPPLEMENTAL PUNCTUATION +2E80 2EFF CJK RADICALS SUPPLEMENT +2F00 2FDF KANGXI RADICALS +2FF0 2FFF IDEOGRAPHIC DESCRIPTION CHARACTERS +3000 303F CJK SYMBOLS AND PUNCTUATION +3040 309F HIRAGANA +30A0 30FF KATAKANA +3100 312F BOPOMOFO +3130 318F HANGUL COMPATIBILITY JAMO +3190 319F KANBUN +31A0 31BF BOPOMOFO EXTENDED +31C0 31EF CJK STROKES +31F0 31FF KATAKANA PHONETIC EXTENSIONS +3200 32FF ENCLOSED CJK LETTERS AND MONTHS +3300 33FF CJK COMPATIBILITY +3400 4DBF CJK UNIFIED IDEOGRAPHS EXTENSION A +4DC0 4DFF YIJING HEXAGRAM SYMBOLS +4E00 9FFF CJK UNIFIED IDEOGRAPHS +A000 A48F YI SYLLABLES +A490 A4CF YI RADICALS +A4D0 A4FF LISU +A500 A63F VAI +A640 A69F CYRILLIC EXTENDED-B +A6A0 A6FF BAMUM +A700 A71F MODIFIER TONE LETTERS +A720 A7FF LATIN EXTENDED-D +A800 A82F SYLOTI NAGRI +A830 A83F COMMON INDIC NUMBER FORMS +A840 A87F PHAGS-PA +A880 A8DF SAURASHTRA +A8E0 A8FF DEVANAGARI EXTENDED +A900 A92F KAYAH LI +A930 A95F REJANG +A960 A97F HANGUL JAMO EXTENDED-A +A980 A9DF JAVANESE +A9E0 A9FF MYANMAR EXTENDED-B +AA00 AA5F CHAM +AA60 AA7F MYANMAR EXTENDED-A +AA80 AADF TAI VIET +AAE0 AAFF MEETEI MAYEK EXTENSIONS +AB00 AB2F ETHIOPIC EXTENDED-A +AB30 AB6F LATIN EXTENDED-E +AB70 ABBF CHEROKEE SUPPLEMENT +ABC0 ABFF MEETEI MAYEK +AC00 D7AF HANGUL SYLLABLES +D7B0 D7FF HANGUL JAMO EXTENDED-B +D800 DB7F HIGH SURROGATES +DB80 DBFF HIGH PRIVATE USE SURROGATES +DC00 DFFF LOW SURROGATES +E000 F8FF PRIVATE USE AREA +F900 FAFF CJK COMPATIBILITY IDEOGRAPHS +FB00 FB4F ALPHABETIC PRESENTATION FORMS +FB50 FDFF ARABIC PRESENTATION FORMS-A +FE00 FE0F VARIATION SELECTORS +FE10 FE1F VERTICAL FORMS +FE20 FE2F COMBINING HALF MARKS +FE30 FE4F CJK COMPATIBILITY FORMS +FE50 FE6F SMALL FORM VARIANTS +FE70 FEFF ARABIC PRESENTATION FORMS-B +FF00 FFEF HALFWIDTH AND FULLWIDTH FORMS +FFF0 FFFF SPECIALS +10000 1007F LINEAR B SYLLABARY +10080 100FF LINEAR B IDEOGRAMS +10100 1013F AEGEAN NUMBERS +10140 1018F ANCIENT GREEK NUMBERS +10190 101CF ANCIENT SYMBOLS +101D0 101FF PHAISTOS DISC +10280 1029F LYCIAN +102A0 102DF CARIAN +102E0 102FF COPTIC EPACT NUMBERS +10300 1032F OLD ITALIC +10330 1034F GOTHIC +10350 1037F OLD PERMIC +10380 1039F UGARITIC +103A0 103DF OLD PERSIAN +10400 1044F DESERET +10450 1047F SHAVIAN +10480 104AF OSMANYA +104B0 104FF OSAGE +10500 1052F ELBASAN +10530 1056F CAUCASIAN ALBANIAN +10600 1077F LINEAR A +10800 1083F CYPRIOT SYLLABARY +10840 1085F IMPERIAL ARAMAIC +10860 1087F PALMYRENE +10880 108AF NABATAEAN +108E0 108FF HATRAN +10900 1091F PHOENICIAN +10920 1093F LYDIAN +10980 1099F MEROITIC HIEROGLYPHS +109A0 109FF MEROITIC CURSIVE +10A00 10A5F KHAROSHTHI +10A60 10A7F OLD SOUTH ARABIAN +10A80 10A9F OLD NORTH ARABIAN +10AC0 10AFF MANICHAEAN +10B00 10B3F AVESTAN +10B40 10B5F INSCRIPTIONAL PARTHIAN +10B60 10B7F INSCRIPTIONAL PAHLAVI +10B80 10BAF PSALTER PAHLAVI +10C00 10C4F OLD TURKIC +10C80 10CFF OLD HUNGARIAN +10D00 10D3F HANIFI ROHINGYA +10E60 10E7F RUMI NUMERAL SYMBOLS +10E80 10EBF YEZIDI +10F00 10F2F OLD SOGDIAN +10F30 10F6F SOGDIAN +10FB0 10FDF CHORASMIAN +10FE0 10FFF ELYMAIC +11000 1107F BRAHMI +11080 110CF KAITHI +110D0 110FF SORA SOMPENG +11100 1114F CHAKMA +11150 1117F MAHAJANI +11180 111DF SHARADA +111E0 111FF SINHALA ARCHAIC NUMBERS +11200 1124F KHOJKI +11280 112AF MULTANI +112B0 112FF KHUDAWADI +11300 1137F GRANTHA +11400 1147F NEWA +11480 114DF TIRHUTA +11580 115FF SIDDHAM +11600 1165F MODI +11660 1167F MONGOLIAN SUPPLEMENT +11680 116CF TAKRI +11700 1173F AHOM +11800 1184F DOGRA +118A0 118FF WARANG CITI +11900 1195F DIVES AKURU +119A0 119FF NANDINAGARI +*/ diff --git a/src/u8c/main.h.d/uniblk.c b/src/u8c/main.h.d/uniblk.c new file mode 100644 index 0000000..332f8aa --- /dev/null +++ b/src/u8c/main.h.d/uniblk.c @@ -0,0 +1,517 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <u8c/main.h> +# include <u8c/str.h> +# include <uchar.h> +# define u8c_uniblk_isinrng(val,start,end) (val >= start && val <= end) +struct u8c_uniblk_tuple u8c_uniblk(char32_t const _chr) { + struct u8c_uniblk_tuple ret = { + .stat = false, + }; + char32_t const * blk = U"UNDEFINED IN UNICODE"; + if(_chr <= U'\x7F') { + blk = U"BASIC LATIN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\x80',U'\u00FF')) { + blk = U"LATIN-1 SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0100',U'\u017F')) { + blk = U"LATIN EXTENDED-A"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0180',U'\u024F')) { + blk = U"LATIN EXTENDED-B"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0250',U'\u02AF')) { + blk = U"IPA EXTENSIONS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u02B0',U'\u02FF')) { + blk = U"SPACING MODIFIER LETTERS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0300',U'\u036F')) { + blk = U"COMBINING DIRACITICAL MARKS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0370',U'\u03FF')) { + blk = U"GREEK AND COPTIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0400',U'\u04FF')) { + blk = U"CYRILLIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0500',U'\u052F')) { + blk = U"CYRILLIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0530',U'\u058F')) { + blk = U"ARMENIAN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0590',U'\u05FF')) { + blk = U"HEBREW"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0600',U'\u06FF')) { + blk = U"ARABIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0700',U'\u074F')) { + blk = U"SYRIAC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0750',U'\u077F')) { + blk = U"ARABIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0780',U'\u07BF')) { + blk = U"THAANA"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u07C0',U'\u07FF')) { + blk = U"NKO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0800',U'\u083F')) { + blk = U"SAMARITAN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0840',U'\u085F')) { + blk = U"MANDAIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0860',U'\u086F')) { + blk = U"SYRIAC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u08A0',U'\u08FF')) { + blk = U"ARABIC EXTENDED-A"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0900',U'\u097F')) { + blk = U"DEVANAGARI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0980',U'\u09FF')) { + blk = U"BENGALI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0A00',U'\u0A7F')) { + blk = U"GURMUKHI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0A80',U'\u0AFF')) { + blk = U"GUJARATI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0B00',U'\u0B7F')) { + blk = U"ORIYAS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0B80',U'\u0BFF')) { + blk = U"TAMIL"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0C00',U'\u0C7F')) { + blk = U"TELUGU"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0C80',U'\u0CFF')) { + blk = U"KANNADA"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0D00',U'\u0D7F')) { + blk = U"MALAYALAM"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0D80',U'\u0DFF')) { + blk = U"SINHALA"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0E00',U'\u0E7F')) { + blk = U"THAI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0E80',U'\u0EFF')) { + blk = U"LAO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u0F00',U'\u0FFF')) { + blk = U"TIBETAN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1000',U'\u109F')) { + blk = U"MYANMAR"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u10A0',U'\u10FF')) { + blk = U"GEORGIAN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1100',U'\u11FF')) { + blk = U"HANGUL JAMO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1200',U'\u137F')) { + blk = U"ETHIOPIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1380',U'\u139F')) { + blk = U"ETHIOPIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u13A0',U'\u13FF')) { + blk = U"CHEROKEE"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1400',U'\u167F')) { + blk = U"UNIFIED CANADIAN ABORIGINAL SYLLABICS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1680',U'\u169F')) { + blk = U"OGHAM"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u16A0',U'\u16FF')) { + blk = U"RUNIC"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\u1700',U'\u171F')) { + blk = U"TAGALOG"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011A00',U'\U00011A4F')) { + blk = U"ZANABAZAR SQUARE"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011A50',U'\U00011AAF')) { + blk = U"SOYOMBO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011AC0',U'\U00011AFF')) { + blk = U"PAU CIN HAU"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011C00',U'\U00011C6F')) { + blk = U"BHAIKSUKI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011C70',U'\U00011CBF')) { + blk = U"MARCHEN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011D00',U'\U00011D5F')) { + blk = U"MASARAM GONDI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011D60',U'\U00011DAF')) { + blk = U"GUNJALA GONDI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011EE0',U'\U00011EFF')) { + blk = U"MAKASAR"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011FB0',U'\U00011FBF')) { + blk = U"LISU SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00011FC0',U'\U00011FFF')) { + blk = U"TAMIL SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00012000',U'\U000123FF')) { + blk = U"CUNEIFORM"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00012400',U'\U0001247F')) { + blk = U"CUNEIFORM NUMBERS AND PUNCTUATION"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00012480',U'\U0001254F')) { + blk = U"EARLY DYNASTIC CUNEIFORM"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00013000',U'\U0001342F')) { + blk = U"EGYPTIAN HIEROGLYPHS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00013430',U'\U0001343F')) { + blk = U"EGYPTIAN HIEROGLYPH FORMAT CONTROLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00014400',U'\U0001467F')) { + blk = U"ANATOLIAN HIEROGLYPHS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016800',U'\U00016A3F')) { + blk = U"BAMUM SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016A40',U'\U00016A6F')) { + blk = U"MRO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016AD0',U'\U00016AFF')) { + blk = U"BASSA VAH"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016B00',U'\U00016B8F')) { + blk = U"PAHAWH HMONG"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016E40',U'\U00016E9F')) { + blk = U"MEDEFAIDRIN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016F00',U'\U00016F9F')) { + blk = U"MIAO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00016FE0',U'\U00016FFF')) { + blk = U"IDEOGRAPHIC SYMBOLS AND PUNCTUATION"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00017000',U'\U000187FF')) { + blk = U"TANGUT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00018800',U'\U00018AFF')) { + blk = U"TANGUT COMPONENTS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00018B00',U'\U00018CFF')) { + blk = U"KHITAN SMALL SCRIPT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00018D00',U'\U00018D8F')) { + blk = U"TANGUT SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001B000',U'\U0001B0FF')) { + blk = U"KANA SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001B100',U'\U0001B12F')) { + blk = U"KANA EXTENDED-A"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001B130',U'\U0001B16F')) { + blk = U"SMALL KANA EXTENSION"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001B170',U'\U0001B2FF')) { + blk = U"NUSHU"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001BC00',U'\U0001BC9F')) { + blk = U"DUPLOYAN"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001BCA0',U'\U0001BCAF')) { + blk = U"SHORTHAND FORMAT CONTROLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D000',U'\U0001D0FF')) { + blk = U"BYZANTINE MUSICAL SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D100',U'\U0001D1FF')) { + blk = U"MUSICAL SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D200',U'\U0001D24F')) { + blk = U"ANCIENT GREEK MUSICAL NOTATION"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D2E0',U'\U0001D2FF')) { + blk = U"MAYAN NUMERALS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D300',U'\U0001D35F')) { + blk = U"TAI XUAN JING SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D360',U'\U0001D37F')) { + blk = U"COUNTING ROD NUMERALS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D400',U'\U0001D7FF')) { + blk = U"MATHEMATICAL ALPHANUMERIC SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001D800',U'\U0001DAAF')) { + blk = U"SUTTON SIGNWRITING"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001E000',U'\U0001E02F')) { + blk = U"GLAGOLITIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001E100',U'\U0001E14F')) { + blk = U"NYIAKENG PUACHUE HMONG"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001E2C0',U'\U0001E2FF')) { + blk = U"WANCHO"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001E800',U'\U0001E8DF')) { + blk = U"MENDE KIKAKUI"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001E900',U'\U0001E95F')) { + blk = U"ADLAM"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001EC70',U'\U0001ECBF')) { + blk = U"INDIC SIYAQ NUMBERS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001ED00',U'\U0001ED4F')) { + blk = U"OTTOMAN SIYAQ NUMBERS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001EE00',U'\U0001EEFF')) { + blk = U"ARABIC MATHEMATICAL ALPHABETIC SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F000',U'\U0001F02F')) { + blk = U"MAHJONG TILES"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F030',U'\U0001F09F')) { + blk = U"DOMINO TILES"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F0A0',U'\U0001F0FF')) { + blk = U"PLAYING CARDS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F100',U'\U0001F1FF')) { + blk = U"ENCLOSED ALPHANUMERIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F200',U'\U0001F2FF')) { + blk = U"ENCLOSED IDEOGRAPHIC SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F300',U'\U0001F5FF')) { + blk = U"MISCELLANEOUS SYMBOLS AND PICTOGRAPHS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F600',U'\U0001F64F')) { + blk = U"EMOTICONS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F650',U'\U0001F67F')) { + blk = U"ORNAMENTAL DINGBATS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F680',U'\U0001F6FF')) { + blk = U"TRANSPORT AND MAP SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F700',U'\U0001F77F')) { + blk = U"ALCHEMICAL SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F780',U'\U0001F7FF')) { + blk = U"GEOMETRIC SHAPES EXTENDED"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F800',U'\U0001F8FF')) { + blk = U"SUPPLEMENTAL ARROWS-C"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001F900',U'\U0001F9FF')) { + blk = U"SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001FA00',U'\U0001FA6F')) { + blk = U"CHESS SYMBOLS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001FA70',U'\U0001FAFF')) { + blk = U"SYMBOLS AND PICTOGRAPHS EXTENDED-A"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0001FB00',U'\U0001FBFF')) { + blk = U"SYMBOLS FOR LEGACY COMPUTING"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00020000',U'\U0002A6DF')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION B"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0002A700',U'\U0002B73F')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION C"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0002B740',U'\U0002B81F')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION D"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0002B820',U'\U0002CEAF')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION E"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0002CEB0',U'\U0002EBEF')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION F"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U0002F800',U'\U0002FA1F')) { + blk = U"CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00030000',U'\U0003134F')) { + blk = U"CJK UNIFIED IDEOGRAPHS EXTENSION G"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U000E0000',U'\U000E007F')) { + blk = U"TAGS"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U000E0100',U'\U000E1EFF')) { + blk = U"VARIATION SELECTORS SUPPLEMENT"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U000F0000',U'\U000FFFFF')) { + blk = U"SUPPLEMENTARY PRIVATE USE AREA-A"; + goto end; + } + if(u8c_uniblk_isinrng(_chr,U'\U00100000',U'\U0010FFFF')) { + blk = U"SUPPLEMENTARY PRIVATE USE AREA-B"; + goto end; + } + if(_chr > u8c_unimax) { + ret.stat = true; + return ret; + } +end:; + { + struct u8c_strcp_tuple const tuple = u8c_strcp(blk); + ret.blk = tuple.str; + ret.blksz = tuple.strsz; + } + return ret; +} diff --git a/src/u8c/main.h.d/uninm.c b/src/u8c/main.h.d/uninm.c new file mode 100644 index 0000000..d409263 --- /dev/null +++ b/src/u8c/main.h.d/uninm.c @@ -0,0 +1,2582 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <u8c/err.h> +# include <u8c/main.h> +# include <u8c/str.h> +# include <uchar.h> +struct u8c_uninm_tuple u8c_uninm(char32_t const _chr) { + struct u8c_uninm_tuple ret; + if(_chr > u8c_unimax) { + u8c_seterr(u8c_errtyp_stroor,U"u8c_uninm: Character out of range."); + ret.stat = true; + return ret; + } + char32_t const * nm = U""; + switch(_chr) { + default: + nm = U"UNDEFINED IN UNICODE"; + break; + /* BASIC LATIN: */ + case U'\x0': + nm = U"NULL"; + break; + case U'\x1': + nm = U"START OF HEADING"; + break; + case U'\x2': + nm = U"START OF TEXT"; + break; + case U'\x3': + nm = U"END OF TEXT"; + break; + case U'\x4': + nm = U"END OF TRANSMISSION"; + break; + case U'\x5': + nm = U"ENQUIRY"; + break; + case U'\x6': + nm = U"ACKNOWLEDGE"; + break; + case U'\a': + nm = U"BELL"; + break; + case U'\b': + nm = U"BACKSPACE"; + break; + case U'\t': + nm = U"HORIZONTAL TABULATION"; + break; + case U'\n': + nm = U"NEW LINE"; + break; + case U'\v': + nm = U"VERTICAL TABULATION"; + break; + case U'\f': + nm = U"FORM FEED"; + break; + case U'\r': + nm = U"CARRIAGE RETURN"; + break; + case U'\xE': + nm = U"SHIFT OUT"; + break; + case U'\xF': + nm = U"SHIFT IN"; + break; + case U'\x10': + nm = U"DATA LINK ESCAPE"; + break; + case U'\x11': + nm = U"DEVICE CONTROL ONE"; + break; + case U'\x12': + nm = U"DEVICE CONTROL TWO"; + break; + case U'\x13': + nm = U"DEVICE CONTROL THREE"; + break; + case U'\x14': + nm = U"DEVICE CONTROL FOUR"; + break; + case U'\x15': + nm = U"NEGATIVE ACKNOWLEDGE"; + break; + case U'\x16': + nm = U"SYNCHRONOUS IDLE"; + break; + case U'\x17': + nm = U"END OF TRANSMISSION BLOCk"; + break; + case U'\x18': + nm = U"CANCEL"; + break; + case U'\x19': + nm = U"END OF MEDIUM"; + break; + case U'\x1A': + nm = U"SUBSTITUTE"; + break; + case U'\x1B': + nm = U"ESCAPE"; + break; + case U'\x1C': + nm = U"FILE SEPERATOR"; + break; + case U'\x1D': + nm = U"GROUP SEPERATOR"; + break; + case U'\x1E': + nm = U"RECORD SEPERATOR"; + break; + case U'\x1F': + nm = U"UNIT SEPERATOR"; + break; + case U' ': + nm = U"SPACE"; + break; + case U'!': + nm = U"EXCLAMATION MARK"; + break; + case U'\"': + nm = U"QUOTATION MARK"; + break; + case U'#': + nm = U"NUMBER SIGN"; + break; + case U'\u0024': + nm = U"DOLLAR SIGN"; + break; + case U'%': + nm = U"PERCENT SIGN"; + break; + case U'&': + nm = U"AMPERSAND"; + break; + case U'\'': + nm = U"APOSTROPHE"; + break; + case U'(': + nm = U"LEFT PARANTHESIS"; + break; + case U')': + nm = U"RIGHT PARANTHESIS"; + break; + case U'*': + nm = U"ASTERISK"; + break; + case U'+': + nm = U"PLUS SIGN"; + break; + case U',': + nm = U"COMMA"; + break; + case U'-': + nm = U"HYPHEN-MINUS"; + break; + case U'.': + nm = U"FULL STOP"; + break; + case U'/': + nm = U"SOLIDUS"; + break; + case U'0': + nm = U"DIGIT ZERO"; + break; + case U'1': + nm = U"DIGIT ONE"; + break; + case U'2': + nm = U"DIGIT TWO"; + break; + case U'3': + nm = U"DIGIT THREE"; + break; + case U'4': + nm = U"DIGIT FOUR"; + break; + case U'5': + nm = U"DIGIT FIVE"; + break; + case U'6': + nm = U"DIGIT SIX"; + break; + case U'7': + nm = U"DIGIT SEVEN"; + break; + case U'8': + nm = U"DIGIT EIGHT"; + break; + case U'9': + nm = U"DIGIT NINE"; + break; + case U':': + nm = U"COLON"; + break; + case U';': + nm = U"SEMICOLON"; + break; + case U'<': + nm = U"LESS-THAN SIGN"; + break; + case U'=': + nm = U"EQUALS SIGN"; + break; + case U'>': + nm = U"GREATER-THAN SIGN"; + break; + case U'?': + nm = U"QUESTION MARK"; + break; + case U'\u0040': + nm = U"COMMERCIAL AT"; + break; + case U'A': + nm = U"LATIN CAPITAL LETTER A"; + break; + case U'B': + nm = U"LATIN CAPITAL LETTER B"; + break; + case U'C': + nm = U"LATIN CAPITAL LETTER C"; + break; + case U'D': + nm = U"LATIN CAPITAL LETTER D"; + break; + case U'E': + nm = U"LATIN CAPITAL LETTER E"; + break; + case U'F': + nm = U"LATIN CAPITAL LETTER F"; + break; + case U'G': + nm = U"LATIN CAPITAL LETTER G"; + break; + case U'H': + nm = U"LATIN CAPITAL LETTER H"; + break; + case U'I': + nm = U"LATIN CAPITAL LETTER I"; + break; + case U'J': + nm = U"LATIN CAPITAL LETTER J"; + break; + case U'K': + nm = U"LATIN CAPITAL LETTER K"; + break; + case U'L': + nm = U"LATIN CAPITAL LETTER L"; + break; + case U'M': + nm = U"LATIN CAPITAL LETTER M"; + break; + case U'N': + nm = U"LATIN CAPITAL LETTER N"; + break; + case U'O': + nm = U"LATIN CAPITAL LETTER O"; + break; + case U'P': + nm = U"LATIN CAPITAL LETTER P"; + break; + case U'Q': + nm = U"LATIN CAPITAL LETTER Q"; + break; + case U'R': + nm = U"LATIN CAPITAL LETTER R"; + break; + case U'S': + nm = U"LATIN CAPITAL LETTER S"; + break; + case U'T': + nm = U"LATIN CAPITAL LETTER T"; + break; + case U'U': + nm = U"LATIN CAPITAL LETTER U"; + break; + case U'V': + nm = U"LATIN CAPITAL LETTER V"; + break; + case U'W': + nm = U"LATIN CAPITAL LETTER W"; + break; + case U'X': + nm = U"LATIN CAPITAL LETTER X"; + break; + case U'Y': + nm = U"LATIN CAPITAL LETTER Y"; + break; + case U'Z': + nm = U"LATIN CAPITAL LETTER Z"; + break; + case U'[': + nm = U"LEFT SQUARE BRACKET"; + break; + case U'\\': + nm = U"REVERSE SOLIDUS"; + break; + case U']': + nm = U"RIGHT SQUARE BRACKET"; + break; + case U'^': + nm = U"CIRCUMFLEX ACCENT"; + break; + case U'_': + nm = U"LOW LINE"; + break; + case U'\u0060': + nm = U"GRAVE ACCENT"; + break; + case U'a': + nm = U"LATIN SMALL LETTER A"; + break; + case U'b': + nm = U"LATIN SMALL LETTER B"; + break; + case U'c': + nm = U"LATIN SMALL LETTER C"; + break; + case U'd': + nm = U"LATIN SMALL LETTER D"; + break; + case U'e': + nm = U"LATIN SMALL LETTER E"; + break; + case U'f': + nm = U"LATIN SMALL LETTER F"; + break; + case U'g': + nm = U"LATIN SMALL LETTER G"; + break; + case U'h': + nm = U"LATIN SMALL LETTER H"; + break; + case U'i': + nm = U"LATIN SMALL LETTER I"; + break; + case U'j': + nm = U"LATIN SMALL LETTER J"; + break; + case U'k': + nm = U"LATIN SMALL LETTER K"; + break; + case U'l': + nm = U"LATIN SMALL LETTER L"; + break; + case U'm': + nm = U"LATIN SMALL LETTER M"; + break; + case U'n': + nm = U"LATIN SMALL LETTER N"; + break; + case U'o': + nm = U"LATIN SMALL LETTER O"; + break; + case U'p': + nm = U"LATIN SMALL LETTER P"; + break; + case U'q': + nm = U"LATIN SMALL LETTER Q"; + break; + case U'r': + nm = U"LATIN SMALL LETTER R"; + break; + case U's': + nm = U"LATIN SMALL LETTER S"; + break; + case U't': + nm = U"LATIN SMALL LETTER T"; + break; + case U'u': + nm = U"LATIN SMALL LETTER U"; + break; + case U'v': + nm = U"LATIN SMALL LETTER V"; + break; + case U'w': + nm = U"LATIN SMALL LETTER W"; + break; + case U'x': + nm = U"LATIN SMALL LETTER X"; + break; + case U'y': + nm = U"LATIN SMALL LETTER Y"; + break; + case U'z': + nm = U"LATIN SMALL LETTER Z"; + break; + case U'{': + nm = U"LEFT CURLY BRACKET"; + break; + case U'|': + nm = U"VERTICAL LINE"; + break; + case U'}': + nm = U"RIGHT CURLY BRACKET"; + break; + case U'~': + nm = U"TILDE"; + break; + case U'\x7F': + nm = U"DELETE"; + break; + /* LATIN-1 SUPPLEMENT: */ + case U'\x80': + nm = U"<CONTROL>"; + break; + case U'\x81': + nm = U"<CONTROL>"; + break; + case U'\x82': + nm = U"BREAK PERMITTED HERE"; + break; + case U'\x83': + nm = U"NO BREAK HERE"; + break; + case U'\x84': + nm = U"<CONTROL>"; + break; + case U'\x85': + nm = U"NEXT LINE"; + break; + case U'\x86': + nm = U"START OF SELECTED AREA"; + break; + case U'\x87': + nm = U"END OF SELECTED AREA"; + break; + case U'\x88': + nm = U"CHARACTER TABULATION SET"; + break; + case U'\x89': + nm = U"CHARACTER TABULATION WITH JUSTIFICATION"; + break; + case U'\x8A': + nm = U"LINE TABULATION SET"; + break; + case U'\x8B': + nm = U"PARTIAL LINE FORWARD"; + break; + case U'\x8C': + nm = U"PARTIAL LINE BACKWARD"; + break; + case U'\x8D': + nm = U"REVERSE LINE FEED"; + break; + case U'\x8E': + nm = U"SINGLE SHIFT TWO"; + break; + case U'\x8F': + nm = U"SINGLE SHIFT THREE"; + break; + case U'\x90': + nm = U"DEVICE CONTROL STRING"; + break; + case U'\x91': + nm = U"PRIVATE USE ONE"; + break; + case U'\x92': + nm = U"PRIVATE USE TWO"; + break; + case U'\x93': + nm = U"SET TRANSMIT STATE"; + break; + case U'\x94': + nm = U"CANCEL CHARACTER"; + break; + case U'\x95': + nm = U"MESSAGE WAITING"; + break; + case U'\x96': + nm = U"START OF GUARDED AREA"; + break; + case U'\x97': + nm = U"END OF GUARDED AREA"; + break; + case U'\x98': + nm = U"START OF STRING"; + break; + case U'\x99': + nm = U"<CONTROL>"; + break; + case U'\x9A': + nm = U"SINGLE CHARACTER INTRODUCER"; + break; + case U'\x9B': + nm = U"CONTROL SEQUENCE INTRODUCER"; + break; + case U'\x9C': + nm = U"STRING TERMINATOR"; + break; + case U'\x9D': + nm = U"OPERATING SYSTEM COMMAND"; + break; + case U'\x9E': + nm = U"PRIVACY MESSAGE"; + break; + case U'\x9F': + nm = U"APPLICATION PROGRAM COMMAND"; + break; + case U'\xA0': + nm = U"NO-BREAK SPACE"; + break; + case U'\u00A1': + nm = U"INVERTED EXCLAMATION MARK"; + break; + case U'\u00A2': + nm = U"CENT SIGN"; + break; + case U'\u00A3': + nm = U"POUND SIGN"; + break; + case U'\u00A4': + nm = U"CURRENCY SIGN"; + break; + case U'\u00A5': + nm = U"YEN SIGN"; + break; + case U'\u00A6': + nm = U"BROKEN BAR"; + break; + case U'\u00A7': + nm = U"SECTION SIGN"; + break; + case U'\u00A8': + nm = U"DIAERESIS"; + break; + case U'\u00A9': + nm = U"COPYRIGHT SIGN"; + break; + case U'\u00AA': + nm = U"FEMININE ORDINAL INDICATOR"; + break; + case U'\u00AB': + nm = U"LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"; + break; + case U'\u00AC': + nm = U"NOT SIGN"; + break; + case U'\u00AD': + nm = U"SOFT HYPHEN"; + break; + case U'\u00AE': + nm = U"REGISTERED SIGN"; + break; + case U'\u00AF': + nm = U"MACRON"; + break; + case U'\u00B0': + nm = U"DEGREE SIGN"; + break; + case U'\u00B1': + nm = U"PLUS MINUS SYMBOL"; + break; + case U'\u00B2': + nm = U"SUPERSCRIPT TWO"; + break; + case U'\u00B3': + nm = U"SUPERSCRIPT THREE"; + break; + case U'\u00B4': + nm = U"ACUTE ACCENT"; + break; + case U'\u00B5': + nm = U"MICRO SIGN"; + break; + case U'\u00B6': + nm = U"PILCROW SIGN"; + break; + case U'\u00B7': + nm = U"MIDDLE DOT"; + break; + case U'\u00B8': + nm = U"CEDILLA"; + break; + case U'\u00B9': + nm = U"SUPERSCRIPT ONE"; + break; + case U'\u00BA': + nm = U"MASCULINE ORDINAL INDICATOR"; + break; + case U'\u00BB': + nm = U"RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"; + break; + case U'\u00BC': + nm = U"VULGAR FRACTION ONE QUARTER"; + break; + case U'\u00BD': + nm = U"VULGAR FRACTION ONE HALF"; + break; + case U'\u00BE': + nm = U"VULGAR FRACTION THREE QUARTERS"; + break; + case U'\u00BF': + nm = U"INVERTED QUESTION MARK"; + break; + case U'\u00C0': + nm = U"LATIN CAPITAL LETTER A WITH GRAVE"; + break; + case U'\u00C1': + nm = U"LATIN CAPITAL LETTER A WITH ACUTE"; + break; + case U'\u00C2': + nm = U"LATIN CAPITAL LETTER A WITH CIRCUMFLEX"; + break; + case U'\u00C3': + nm = U"LATIN CAPITAL LETTER A WITH TILDE"; + break; + case U'\u00C4': + nm = U"LATIN CAPITAL LETTER A WITH DIAERESIS"; + break; + case U'\u00C5': + nm = U"LATIN CAPITAL LETTER A WITH RING ABOVE"; + break; + case U'\u00C6': + nm = U"LATIN CAPITAL LETTER AE"; + break; + case U'\u00C7': + nm = U"LATIN CAPITAL LETTER C WITH CEDILLA"; + break; + case U'\u00C8': + nm = U"LATIN CAPITAL LETTER E WITH GRAVE"; + break; + case U'\u00C9': + nm = U"LATIN CAPITAL LETTER E WITH ACUTE"; + break; + case U'\u00CA': + nm = U"LATIN CAPITAL LETTER E WITH CIRCUMFLEX"; + break; + case U'\u00CB': + nm = U"LATIN CAPITAL LETTER E WITH DIAERESIS"; + break; + case U'\u00CC': + nm = U"LATIN CAPITAL LETTER I WITH GRAVE"; + break; + case U'\u00CD': + nm = U"LATIN CAPITAL LETTER I WITH ACUTE"; + break; + case U'\u00CE': + nm = U"LATIN CAPITAL LETTER I WITH CIRCUMFLEX"; + break; + case U'\u00CF': + nm = U"LATIN CAPITAL LETTER I WITH DIAERESIS"; + break; + case U'\u00D0': + nm = U"LATIN CAPITAL LETTER ETH"; + break; + case U'\u00D1': + nm = U"LATIN CAPITAL LETTER N WITH TILDE"; + break; + case U'\u00D2': + nm = U"LATIN CAPITAL LETTER O WITH GRAVE"; + break; + case U'\u00D3': + nm = U"LATIN CAPITAL LETTER O WITH ACUTE"; + break; + case U'\u00D4': + nm = U"LATIN CAPITAL LETTER O WITH CIRCUMFLEX"; + break; + case U'\u00D5': + nm = U"LATIN CAPITAL LETTER O WITH TILDE"; + break; + case U'\u00D6': + nm = U"LATIN CAPITAL LETTER O WITH DIAERESIS"; + break; + case U'\u00D7': + nm = U"MULTIPLICATION SIGN"; + break; + case U'\u00D8': + nm = U"LATIN CAPITAL LETTER O WITH STROKE"; + break; + case U'\u00D9': + nm = U"LATIN CAPITAL LETTER U WITH GRAVE"; + break; + case U'\u00DA': + nm = U"LATIN CAPITAL LETTER U WITH ACUTE"; + break; + case U'\u00DB': + nm = U"LATIN CAPITAL LETTER U WITH CIRCUMFLEX"; + break; + case U'\u00DC': + nm = U"LATIN CAPITAL LETTER U WITH DIAERESIS"; + break; + case U'\u00DD': + nm = U"LATIN CAPITAL LETTER Y WITH ACUTE"; + break; + case U'\u00DE': + nm = U"LATIN CAPITAL LETTER THORN"; + break; + case U'\u00DF': + nm = U"LATIN SMALL LETTER SHARP S"; + break; + case U'\u00E0': + nm = U"LATIN SMALL LETTER A WITH GRAVE"; + break; + case U'\u00E1': + nm = U"LATIN SMALL LETTER A WITH ACUTE"; + break; + case U'\u00E2': + nm = U"LATIN SMALL LETTER A WITH CIRCUMFLEX"; + break; + case U'\u00E3': + nm = U"LATIN SMALL LETTER A WITH TILDE"; + break; + case U'\u00E4': + nm = U"LATIN SMALL LETTER A WITH DIAERESIS"; + break; + case U'\u00E5': + nm = U"LATIN SMALL LETTER A WITH RING ABOVE"; + break; + case U'\u00E6': + nm = U"LATIN SMALL LETTER AE"; + break; + case U'\u00E7': + nm = U"LATIN SMALL LETTER C WITH CEDILLA"; + break; + case U'\u00E8': + nm = U"LATIN SMALL LETTER E WITH GRAVE"; + break; + case U'\u00E9': + nm = U"LATIN SMALL LETTER E WITH ACUTE"; + break; + case U'\u00EA': + nm = U"LATIN SMALL LETTER E WITH CIRCUMFLEX"; + break; + case U'\u00EB': + nm = U"LATIN SMALL LETTER E WITH DIAERESIS"; + break; + case U'\u00EC': + nm = U"LATIN SMALL LETTER I WITH GRAVE"; + break; + case U'\u00ED': + nm = U"LATIN SMALL LETTER I WITH ACUTE"; + break; + case U'\u00EE': + nm = U"LATIN SMALL LETTER I WITH CIRCUMFLEX"; + break; + case U'\u00EF': + nm = U"LATIN SMALL LETTER I WITH DIAERESIS"; + break; + case U'\u00F0': + nm = U"LATIN SMALL LETTER ETH"; + break; + case U'\u00F1': + nm = U"LATIN SMALL LETTER N WITH TILDE"; + break; + case U'\u00F2': + nm = U"LATIN SMALL LETTER O WITH GRAVE"; + break; + case U'\u00F3': + nm = U"LATIN SMALL LETTER O WITH ACUTE"; + break; + case U'\u00F4': + nm = U"LATIN SMALL LETTER O WITH CIRCUMFLEX"; + break; + case U'\u00F5': + nm = U"LATIN SMALL LETTER O WITH TILDE"; + break; + case U'\u00F6': + nm = U"LATIN SMALL LETTER O WITH DIAERESIS"; + break; + case U'\u00F7': + nm = U"DIVISION SIGN"; + break; + case U'\u00F8': + nm = U"LATIN SMALL LETTER O WITH STROKE"; + break; + case U'\u00F9': + nm = U"LATIN SMALL LETTER U WITH GRAVE"; + break; + case U'\u00FA': + nm = U"LATIN SMALL LETTER U WITH ACUTE"; + break; + case U'\u00FB': + nm = U"LATIN SMALL LETTER U WITH CIRCUMFLEX"; + break; + case U'\u00FC': + nm = U"U WITH TWO DOTS"; + break; + case U'\u00FD': + nm = U"LATIN SMALL LETTER Y WITH ACUTE"; + break; + case U'\u00FE': + nm = U"LATIN SMALL LETTER THORN"; + break; + case U'\u00FF': + nm = U"LATIN SMALL LETTER Y WITH DIAERESIS"; + break; + /* LATIN EXTENDED-A: */ + case U'\u0100': + nm = U"LATIN CAPITAL LETTER A WITH MACRON"; + break; + case U'\u0101': + nm = U"LATIN SMALL LETTER A WITH MACRON"; + break; + case U'\u0102': + nm = U"LATIN CAPITAL LETTER A WITH BREVE"; + break; + case U'\u0103': + nm = U"LATIN SMALL LETTER A WITH BREVE"; + break; + case U'\u0104': + nm = U"LATIN CAPITAL LETTER A WITH OGONEK"; + break; + case U'\u0105': + nm = U"LATIN SMALL LETTER A WITH OGONEK"; + break; + case U'\u0106': + nm = U"LATIN CAPITAL LETTER C WITH ACUTE"; + break; + case U'\u0107': + nm = U"LATIN SMALL LETTER C WITH ACUTE"; + break; + case U'\u0108': + nm = U"LATIN CAPITAL LETTER C WITH CIRCUMFLEX"; + break; + case U'\u0109': + nm = U"LATIN SMALL LETTER C WITH CIRCUMFLEX"; + break; + case U'\u010A': + nm = U"LATIN CAPITAL LETTER C WITH DOT ABOVE"; + break; + case U'\u010B': + nm = U"LATIN SMALL LETTER C WITH DOT ABOVE"; + break; + case U'\u010C': + nm = U"LATIN CAPITAL LETTER C WITH CARON"; + break; + case U'\u010D': + nm = U"LATIN SMALL LETTER C WITH CARON"; + break; + case U'\u010E': + nm = U"LATIN CAPITAL LETTER D WITH CARON"; + break; + case U'\u010F': + nm = U"LATIN SMALL LETTER D WITH CARON"; + break; + case U'\u0110': + nm = U"LATIN CAPITAL LETTER D WITH STROKE"; + break; + case U'\u0120': + nm = U"LATIN CAPITAL LETTER G WITH DOT ABOVE"; + break; + case U'\u0130': + nm = U"LATIN CAPITAL LETTER I WITH DOT ABOVE"; + break; + case U'\u0140': + nm = U"LATIN SMALL LETTER L WITH MIDDLE DOT"; + break; + case U'\u0150': + nm = U"LATIN CAPITAL LETTER O WITH DOUBLE ACUTE"; + break; + case U'\u0160': + nm = U"LATIN CAPITAL LETTER S WITH CARON"; + break; + case U'\u0170': + nm = U"LATIN CAPITAL LETTER U WITH DOUBLE ACUTE"; + break; + /* LATIN EXTENDED-B: */ + case U'\u0180': + nm = U"LATIN SMALL LETTER B WITH STROKE"; + break; + case U'\u0190': + nm = U"LATIN CAPITAL LETTER OPEN E"; + break; + case U'\u01A0': + nm = U"LATIN CAPITAL LETTER O WITH HORN"; + break; + case U'\u01B0': + nm = U"LATIN SMALL LETTER U WITH HORN"; + break; + case U'\u01C0': + nm = U"LATIN LETTER DENTAL CLICK"; + break; + case U'\u01D0': + nm = U"LATIN SMALL LETTER I WITH CARON"; + break; + case U'\u01E0': + nm = U"LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON"; + break; + case U'\u01F0': + nm = U"LATIN SMALL LETTER J WITH CARON"; + break; + case U'\u0200': + nm = U"LATIN CAPITAL LETTER A WITH DOUBLE GRAVE"; + break; + case U'\u0210': + nm = U"LATIN CAPITAL LETTER R WITH DOUBLE GRAVE"; + break; + case U'\u0220': + nm = U"LATIN CAPITAL LETTER N WITH LONG RIGHT LEG"; + break; + case U'\u0230': + nm = U"LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON"; + break; + case U'\u0240': + nm = U"LATIN SMALL LETTER Z WITH SWASH TAIL"; + break; + /* IPA EXTENSIONS: */ + case U'\u0250': + nm = U"LATIN SMALL LETTER TURNED A"; + break; + case U'\u0251': + nm = U"LATIN SMALL LETTER ALPHA"; + break; + case U'\u0252': + nm = U"LATIN SMALL LETTER TURNED ALPHA"; + break; + case U'\u0253': + nm = U"LATIN SMALL LETTER B WITH HOOK"; + break; + case U'\u0254': + nm = U"LATIN SMALL LETTER OPEN O"; + break; + case U'\u0255': + nm = U"LATIN SMALL LETTER C WITH CURL"; + break; + case U'\u0256': + nm = U"LATIN SMALL LETTER D WITH TAIL"; + break; + case U'\u0257': + nm = U"LATIN SMALL LETTER D WITH HOOK"; + break; + case U'\u0258': + nm = U"LATIN SMALL LETTER REVERSED E"; + break; + case U'\u0259': + nm = U"LATIN SMALL LETTER SCHWA"; + break; + case U'\u025A': + nm = U"LATIN SMALL LETTER SCHWA WITH HOOK"; + break; + case U'\u025B': + nm = U"LATIN SMALL LETTER OPEN E"; + break; + case U'\u025C': + nm = U"LATIN SMALL LETTER REVERSED OPEN E"; + break; + case U'\u025D': + nm = U"LATIN SMALL LETTER REVERSED OPEN E WITH HOOK"; + break; + case U'\u025E': + nm = U"LATIN SMALL LETTER CLOSED REVERSED OPEN E"; + break; + case U'\u025F': + nm = U"LATIN SMALL LETTER DOTLESS J WITH STROKE"; + break; + case U'\u0260': + nm = U"LATIN SMALL LETTER G WITH HOOK"; + break; + case U'\u0261': + nm = U"LATIN SMALL LETTER SCRIPT G"; + break; + case U'\u0262': + nm = U"LATIN LETTER SMALL CAPITAL G"; + break; + case U'\u0263': + nm = U"LATIN SMALL LETTER GAMMA"; + break; + case U'\u0264': + nm = U"LATIN SMALL LETTER RAMS HORN"; + break; + case U'\u0265': + nm = U"LATIN SMALL LETTER TURNED H"; + break; + case U'\u0266': + nm = U"LATIN SMALL LETTER H WITH HOOK"; + break; + case U'\u0267': + nm = U"LATIN SMALL LETTER HENG WITH HOOK"; + break; + case U'\u0268': + nm = U"LATIN SMALL LETTER I WITH STROKE"; + break; + case U'\u0269': + nm = U"LATIN SMALL LETTER IOTA"; + break; + case U'\u026A': + nm = U"LATIN LETTER SMALL CAPITAL I"; + break; + case U'\u026B': + nm = U"LATIN SMALL LETTER L WITH MIDDLE TILDE"; + break; + case U'\u026C': + nm = U"LATIN SMALL LETTER L WITH BELT"; + break; + case U'\u026D': + nm = U"LATIN SMALL LETTER L WITH RETROFLEX HOOK"; + break; + case U'\u026E': + nm = U"LATIN SMALL LETTER LEZH"; + break; + case U'\u026F': + nm = U"LATIN SMALL LETTER TURNED M"; + break; + case U'\u0270': + nm = U"LATIN SMALL LETTER TURNED M WITH LONG LEG"; + break; + case U'\u0271': + nm = U"LATIN SMALL LETTER M WITH HOOK"; + break; + case U'\u0272': + nm = U"LATIN SMALL LETTER N WITH LEFT HOOK"; + break; + case U'\u0273': + nm = U"LATIN SMALL LETTER N WITH RETROFLEX HOOK"; + break; + case U'\u0274': + nm = U"LATIN LETTER SMALL CAPITAL N"; + break; + case U'\u0275': + nm = U"LATIN SMALL LETTER BARRED O"; + break; + case U'\u0276': + nm = U"LATIN LETTER SMALL CAPITAL OE"; + break; + case U'\u0277': + nm = U"LATIN SMALL LETTER CLOSED OMEGA"; + break; + case U'\u0278': + nm = U"LATIN SMALL LETTER PHI"; + break; + case U'\u0279': + nm = U"LATIN SMALL LETTER TURNED R"; + break; + case U'\u027A': + nm = U"LATIN SMALL LETTER TURNED R WITH LONG LEG"; + break; + case U'\u027B': + nm = U"LATIN SMALL LETTER TURNED R WITH HOOK"; + break; + case U'\u027C': + nm = U"LATIN SMALL LETTER R WITH LONG LEG"; + break; + case U'\u027D': + nm = U"LATIN SMALL LETTER R WITH TAIL"; + break; + case U'\u027E': + nm = U"LATIN SMALL LETTER R WITH FISHHOOK"; + break; + case U'\u027F': + nm = U"LATIN SMALL LETTER REVERSED R WITH FISHHOOK"; + break; + case U'\u0280': + nm = U"LATIN LETTER SMALL CAPITAL R"; + break; + /* GREEK AND COPTIC: */ + case U'\u0370': + nm = U"GREEK CAPITAL LETTER HETA"; + break; + case U'\u0371': + nm = U"GREEK SMALL LETTER HETA"; + break; + case U'\u0372': + nm = U"GREEK CAPITAL LETTER ARCHAIC SAMPI"; + break; + case U'\u0373': + nm = U"GREEK SMALL LETTER ARCHAIC SAMPI"; + break; + case U'\u0374': + nm = U"GREEK NUMERAL SIGN"; + break; + case U'\u0375': + nm = U"GREEK LOWER NUMERAL SIGN"; + break; + case U'\u0376': + nm = U"GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA"; + break; + case U'\u0377': + nm = U"GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"; + break; + case U'\u037A': + nm = U"GREEK YPOGEGRAMMENI"; + break; + case U'\u037B': + nm = U"GREEK SMALL REVERSED LUNATE SIGMA SYMBOL"; + break; + case U'\u037C': + nm = U"GREEK SMALL DOTTED LUNATE SIGMA SYMBOL"; + break; + case U'\u037D': + nm = U"GREEK SMALL REVERSED DOTTED LUNATE SIGMAL SYMBOL"; + break; + case U'\u037E': + nm = U"GREEK QUESTION MARK"; + break; + case U'\u037F': + nm = U"GREEK CAPITAL LETTER YOT"; + break; + case U'\u0384': + nm = U"GREEK TONOS"; + break; + case U'\u0385': + nm = U"GREEK DIALYTIKA TONOS"; + break; + case U'\u0386': + nm = U"GREEK CAPITAL LETTER ALPHA WITH TONOS"; + break; + case U'\u0387': + nm = U"GREEK ANO TELEIA"; + break; + case U'\u0388': + nm = U"GREEK CAPITAL LETTER EPSILON WITH TONOS"; + break; + case U'\u0389': + nm = U"GREEK CAPITAL LETTER ETA WITH TONOS"; + break; + case U'\u038A': + nm = U"GREEK CAPITAL LETTER IOTA WITH TONOS"; + break; + case U'\u038C': + nm = U"GREEK CAPITAL LETTER OMICRON WITH TONOS"; + break; + case U'\u038E': + nm = U"GREEK CAPITAL LETTER USPILON WITH TONOS"; + break; + case U'\u038F': + nm = U"GREEK CAPITAL LETTER OMEGA WITH TONOS"; + break; + case U'\u0390': + nm = U"GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"; + break; + case U'\u0391': + nm = U"GREEK CAPITAL LETTER ALPHA"; + break; + case U'\u0392': + nm = U"GREEK CAPITAL LETTER BETA"; + break; + case U'\u0393': + nm = U"GREEK CAPITAL LETTER GAMMA"; + break; + case U'\u0394': + nm = U"GREEK CAPITAL LETTER DELTA"; + break; + case U'\u0395': + nm = U"GREEK CAPITAL LETTER EPSILON"; + break; + case U'\u0396': + nm = U"GREEK CAPITAL LETTER ZETA"; + break; + case U'\u0397': + nm = U"GREEK CAPITAL LETTER ETA"; + break; + case U'\u0398': + nm = U"GREEK CAPITAL LETTER THETA"; + break; + case U'\u0399': + nm = U"GREEK CAPITAL LETTER IOTA"; + break; + case U'\u039A': + nm = U"GREEK CAPITAL LETTER KAPPA"; + break; + case U'\u039B': + nm = U"GREEK CAPITAL LETTER LAMBDA"; + break; + case U'\u039C': + nm = U"GREEK CAPITAL LETTER MU"; + break; + case U'\u039D': + nm = U"GREEK CAPITAL LETTER NU"; + break; + case U'\u039E': + nm = U"GREEK CAPITAL LETTER XI"; + break; + case U'\u039F': + nm = U"GREEK CAPITAL LETTER OMICRON"; + break; + case U'\u03A0': + nm = U"GREEK CAPITAL LETTER PI"; + break; + case U'\u03A1': + nm = U"GREEK CAPITAL LETTER RHO"; + break; + case U'\u03A3': + nm = U"GREEK CAPITAL LETTER SIGMA"; + break; + case U'\u03A4': + nm = U"GREEK CAPITAL LETTER TAU"; + break; + case U'\u03A5': + nm = U"GREEK CAPITAL LETTER UPSILON"; + break; + case U'\u03A6': + nm = U"GREEK CAPITAL LETTER PHI"; + break; + case U'\u03A7': + nm = U"GREEK CAPITAL LETTER CHI"; + break; + case U'\u03A8': + nm = U"GREEK CAPITAL LETTER PSI"; + break; + case U'\u03A9': + nm = U"GREEK CAPITAL LETTER OMEGA"; + break; + case U'\u03AA': + nm = U"GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"; + break; + case U'\u03AB': + nm = U"GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"; + break; + case U'\u03AC': + nm = U"GREEK SMALL LETTER ALPHA WITH TONOS"; + break; + case U'\u03AD': + nm = U"GREEK SMALL LETTER EPSILON WITH TONOS"; + break; + case U'\u03AE': + nm = U"GREEK SMALL LETTER ETA WITH TONOS"; + break; + case U'\u03AF': + nm = U"GREEK SMALL LETTER IOTA WITH TONOS"; + break; + case U'\u03B0': + nm = U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"; + break; + case U'\u03B1': + nm = U"GREEK SMALL LETTER ALPHA"; + break; + case U'\u03B2': + nm = U"GREEK SMALL LETTER BETA"; + break; + case U'\u03B3': + nm = U"GREEK SMALL LETTER GAMMA"; + break; + case U'\u03B4': + nm = U"GREEK SMALL LETTER DELTA"; + break; + case U'\u03B5': + nm = U"GREEK SMALL LETTER EPSILON"; + break; + case U'\u03B6': + nm = U"GREEK SMALL LETTER ZETA"; + break; + case U'\u03B7': + nm = U"GREEK SMALL LETTER ETA"; + break; + case U'\u03B8': + nm = U"GREEK SMALL LETTER THETA"; + break; + case U'\u03B9': + nm = U"GREEK SMALL LETTER IOTA"; + break; + case U'\u03BA': + nm = U"GREEK SMALL LETTER KAPPA"; + break; + case U'\u03BB': + nm = U"GREEK SMALL LETTER LAMBDA"; + break; + case U'\u03BC': + nm = U"GREEK SMALL LETTER MU"; + break; + case U'\u03BD': + nm = U"GREEK SMALL LETTER NU"; + break; + case U'\u03BE': + nm = U"GREEK SMALL LETTER XI"; + break; + case U'\u03BF': + nm = U"GREEK SMALL LETTER OMICRON"; + break; + case U'\u03C0': + nm = U"GREEK SMALL LETTER PI"; + break; + case U'\u03C1': + nm = U"GREEK SMALL LETTER RHO"; + break; + case U'\u03C2': + nm = U"GREEK SMALL LETTER FINAL SIGMA"; + break; + case U'\u03C3': + nm = U"GREEK SMALL LETTER SIGMA"; + break; + case U'\u03C4': + nm = U"GREEK SMALL LETTER TAU"; + break; + case U'\u03C5': + nm = U"GREEK SMALL LETTER UPSILON"; + break; + case U'\u03C6': + nm = U"GREEK SMALL LETTER PHI"; + break; + case U'\u03C7': + nm = U"GREEK SMALL LETTER CHI"; + break; + case U'\u03C8': + nm = U"GREEK SMALL LETTER PSI"; + break; + case U'\u03C9': + nm = U"GREEK SMALL LETTER OMEGA"; + break; + case U'\u03CA': + nm = U"GREEK SMALL LETTER IOTA WITH DIALYTIKA"; + break; + case U'\u03CB': + nm = U"GREEK SMALL LETTER UPSILON WITH DIALYTIKA"; + break; + case U'\u03CC': + nm = U"GREEK SMALL LETTER OMICRON WITH TONOS"; + break; + case U'\u03CD': + nm = U"GREEK SMALL LETTER UPSILON WITH TONOS"; + break; + case U'\u03CE': + nm = U"GREEK SMALL LETTER OMEGA WITH TONOS"; + break; + case U'\u03CF': + nm = U"GREEK CAPITAL KAI SYMBOL"; + break; + case U'\u03D0': + nm = U"GREEK BETA SYMBOL"; + break; + case U'\u03D1': + nm = U"GREEK THETA SYMBOL"; + break; + case U'\u03D2': + nm = U"GREEK UPSILON WITH HOOK SYMBOL"; + break; + case U'\u03D3': + nm = U"GREEK UPSILON WITH ACUTE AND HOOK SYMBOL"; + break; + case U'\u03D4': + nm = U"GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL"; + break; + case U'\u03D5': + nm = U"GREEK PHI SYMBOL"; + break; + case U'\u03D6': + nm = U"GREEK PI SYMBOL"; + break; + case U'\u03D7': + nm = U"GREEK KAI SYMBOL"; + break; + case U'\u03D8': + nm = U"GREEK LETTER ARCHAIC KOPPA"; + break; + case U'\u03D9': + nm = U"GREEK SMALL LETTER ARCHAIC KOPPA"; + break; + case U'\u03DA': + nm = U"GREEK LETTER STIGMA"; + break; + case U'\u03DB': + nm = U"GREEK SMALL LETTER STIGMA"; + break; + case U'\u03DC': + nm = U"GREEK LETTER DIGAMMA"; + break; + case U'\u03DD': + nm = U"GREEK SMALL LETTER DIGAMMA"; + break; + case U'\u03DE': + nm = U"GREEK LETTER KOPPA"; + break; + case U'\u03DF': + nm = U"GREEK SMALL LETTER KOPPA"; + break; + case U'\u03E0': + nm = U"GREEK LETTER SAMPI"; + break; + case U'\u03F0': + nm = U"GREEK KAPPA SYMBOL"; + break; + /* HEBREW: */ + case U'\u05D0': + nm = U"HEBREW LETTER ALEF"; + break; + case U'\u05D1': + nm = U"HEBREW LETTER BET"; + break; + case U'\u05D2': + nm = U"HEBREW LETTER GIMEL"; + break; + case U'\u05D3': + nm = U"HEBREW LETTER DALET"; + break; + case U'\u05D4': + nm = U"HEBREW LETTER HE"; + break; + case U'\u05D5': + nm = U"HEBREW LETTER VAV"; + break; + case U'\u05D6': + nm = U"HEBREW LETTER ZAYIN"; + break; + case U'\u05D7': + nm = U"HEBREW LETTER HET"; + break; + case U'\u05D8': + nm = U"HEBREW LETTER TET"; + break; + case U'\u05D9': + nm = U"HEBREW LETTER YOD"; + break; + case U'\u05DA': + nm = U"HEBREW LETTER FINAL KAF"; + break; + case U'\u05DB': + nm = U"HEBREW LETTER KAF"; + break; + case U'\u05DC': + nm = U"HEBREW LETTER LAMED"; + break; + case U'\u05DD': + nm = U"HEBREW LETTER FINAL MEM"; + break; + case U'\u05DE': + nm = U"HEBREW LETTER MEM"; + break; + case U'\u05DF': + nm = U"HEBREW LETTER FINAL NUN"; + break; + case U'\u05E0': + nm = U"HEBREW LETTER NUN"; + break; + case U'\u05E1': + nm = U"HEBREW LETTER SAMEKH"; + break; + case U'\u05E2': + nm = U"HEBREW LETTER AYIN"; + break; + case U'\u05E3': + nm = U"HEBREW LETTER FINAL PE"; + break; + case U'\u05E4': + nm = U"HEBREW LETTER PE"; + break; + case U'\u05E5': + nm = U"HEBREW LETTER FINAL TSADI"; + break; + case U'\u05E6': + nm = U"HEBREW LETTER TSADI"; + break; + case U'\u05E7': + nm = U"HEBREW LETTER QOF"; + break; + case U'\u05E8': + nm = U"HEBREW LETTER RESH"; + break; + case U'\u05E9': + nm = U"HEBREW LETTER SHIN"; + break; + case U'\u05EA': + nm = U"HEBREW LETTER TAV"; + break; + case U'\u05EF': + nm = U"HEBREW YOD TRIANGLE"; + break; + /* CYRILLIC: */ + case U'\u0400': + nm = U"CYRILLIC CAPITAL LETTER LE WITH GRAVE"; + break; + case U'\u0401': + nm = U"CYRILLIC CAPITAL LETTER LO"; + break; + case U'\u0402': + nm = U"CYRILLIC CAPITAL LETTER DJE"; + break; + case U'\u0403': + nm = U"CYRILLIC CAPITAL LETTER GJE"; + break; + case U'\u0404': + nm = U"CYRILLIC CAPITAL LETTER UKRAINIAN LE"; + break; + case U'\u0405': + nm = U"CYRILLIC CAPITAL LETTER DZE"; + break; + case U'\u0406': + nm = U"CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I"; + break; + case U'\u0407': + nm = U"CYRILLIC CAPITAL LETTER YI"; + break; + case U'\u0408': + nm = U"CYRILLIC CAPITAL LETTER JE"; + break; + case U'\u0409': + nm = U"CYRILLIC CAPITAL LETTER LJE"; + break; + case U'\u040A': + nm = U"CYRILLIC CAPITAL LETTER NJE"; + break; + case U'\u040B': + nm = U"CYRILLIC CAPITAL LETTER TSHE"; + break; + case U'\u040C': + nm = U"CYRILLIC CAPITAL LETTER KJE"; + break; + case U'\u040D': + nm = U"CYRILLIC CAPITAL LETTER I WITH GRAVE"; + break; + case U'\u040E': + nm = U"CYRILLIC CAPITAL LETTER SHORT U"; + break; + case U'\u040F': + nm = U"CYRILLIC CAPITAL LETTER DZHE"; + break; + case U'\u0410': + nm = U"CYRILLIC CAPITAL LETTER A"; + break; + case U'\u0420': + nm = U"CYRILLIC CAPITAL LETTER ER"; + break; + case U'\u0430': + nm = U"CYRILLIC SMALL LETTER A"; + break; + case U'\u0440': + nm = U"CYRILLIC SMALL LETTER ER"; + break; + case U'\u0450': + nm = U"CYRILLIC SMALL LETTER LE WITH GRAVE"; + break; + case U'\u0460': + nm = U"CYRILLIC CAPITAL LETTER OMEGA"; + break; + case U'\u0470': + nm = U"CYRILLIC CAPITAL LETTER PSI"; + break; + case U'\u0480': + nm = U"CYRILLIC CAPITAL LETTER KOPPA"; + break; + case U'\u0490': + nm = U"CYRILLIC CAPITAL LETTER GHE WITH UPTURN"; + break; + case U'\u04A0': + nm = U"CYRILLIC CAPITAL LETTER BASHKIR KA"; + break; + case U'\u04B0': + nm = U"CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE"; + break; + case U'\u04C0': + nm = U"CYRILLIC LETTER PALOCHKA"; + break; + case U'\u04D0': + nm = U"CYRILLIC CAPITAL LETTER A WITH BREVE"; + break; + case U'\u04E0': + nm = U"CYRILLIC CAPITAL LETTER ABKHASIAN DZE"; + break; + case U'\u04F0': + nm = U"CYRILLIC CAPITAL LETTER U WITH DIAERESIS"; + break; + /* SYRIAC SUPPLEMENT: */ + case U'\u0860': + nm = U"SYRIAC LETTER MALAYALAM NGA"; + break; + case U'\u0861': + nm = U"SYRIAC LETTER MALAYALAM JA"; + break; + case U'\u0862': + nm = U"SYRIAC LETTER MALAYALAM NYA"; + break; + case U'\u0863': + nm = U"SYRIAC LETTER MALAYALAM TTA"; + break; + case U'\u0864': + nm = U"SYRIAC LETTER MALAYALAM NNA"; + break; + case U'\u0865': + nm = U"SYRIAC LETTER MALAYALAM NNNA"; + break; + case U'\u0866': + nm = U"SYRIAC LETTER MALAYALAM BHA"; + break; + case U'\u0867': + nm = U"SYRIAC LETTER MALAYALAM RA"; + break; + case U'\u0868': + nm = U"SYRIAC LETTER MALAYALAM LLA"; + break; + case U'\u0869': + nm = U"SYRIAC LETTER MALAYALAM LLLA"; + break; + case U'\u086A': + nm = U"SYRIAC LETTER MALAYALAM SSA"; + break; + /* RUNIC: */ + case U'\u16A0': + nm = U"RUNIC LETTER FEHU FEOH FE F"; + break; + case U'\u16A1': + nm = U"RUNIC LETTER V"; + break; + case U'\u16A2': + nm = U"RUNIC LETTER URUZ UR U"; + break; + case U'\u16A3': + nm = U"RUNIC LETTER YR"; + break; + case U'\u16A4': + nm = U"RUNIC LETTER Y"; + break; + case U'\u16A5': + nm = U"RUNIC LETTER W"; + break; + case U'\u16A6': + nm = U"RUNIC LETTER THURISAZ THURS THORN"; + break; + case U'\u16A7': + nm = U"RUNIC LETTER ETH"; + break; + case U'\u16A8': + nm = U"RUNIC LETTER ANSUZ A"; + break; + case U'\u16A9': + nm = U"RUNIC LETTER OS O"; + break; + case U'\u16AA': + nm = U"RUNIC LETTER AC A"; + break; + case U'\u16AB': + nm = U"RUNIC LETTER AESC"; + break; + case U'\u16AC': + nm = U"RUNIC LETTER LONG-BRANCHED-OSS O"; + break; + case U'\u16AD': + nm = U"RUNIC LETTER SHORT-TWIG-OSS O"; + break; + case U'\u16AE': + nm = U"RUNIC LETTER O"; + break; + case U'\u16AF': + nm = U"RUNIC LETTER OE"; + break; + case U'\u16B0': + nm = U"RUNIC LETTER ON"; + break; + case U'\u16C0': + nm = U"RUNIC LETTER DOTTED-N"; + break; + case U'\u16D0': + nm = U"RUNIC LETTER SHORT-TWIG-TYR T"; + break; + case U'\u16E0': + nm = U"RUNIC LETTER EAR"; + break; + case U'\u16F0': + nm = U"RUNIC BELGTHOR SYMBOL"; + break; + /* CYRILLIC EXTENDED C: */ + case U'\u1C80': + nm = U"CYRILLIC SMALL LETTER ROUNDED VE"; + break; + case U'\u1C81': + nm = U"CYRILLIC SMALL LETTER LONG-LEGGED DE"; + break; + case U'\u1C82': + nm = U"CYRILLIC SMALL LETTER NARROW O"; + break; + case U'\u1C83': + nm = U"CYRILLIC SMALL LETTER WIDE ES"; + break; + case U'\u1C84': + nm = U"CYRILLIC SMALL LETTER TALL TE"; + break; + case U'\u1C85': + nm = U"CYRILLIC SMALL LETTER THREE-LEGGED TE"; + break; + case U'\u1C86': + nm = U"CYRILLIC SMALL LETTER TALL HARD SIGN"; + break; + case U'\u1C87': + nm = U"CYRILLIC SMALL LETTER TALL YAT"; + break; + case U'\u1C88': + nm = U"CYRILLIC SMALL LETTER UNBLENDED UK"; + break; + /* GENERAL PUNCTUATION: */ + case U'\u2000': + nm = U"EN QUAD"; + break; + case U'\u2001': + nm = U"EM QUAD"; + break; + case U'\u2002': + nm = U"EN SPACE"; + break; + case U'\u2003': + nm = U"EM SPACE"; + break; + case U'\u2004': + nm = U"THREE-PER-EM SPACE"; + break; + case U'\u2005': + nm = U"FOUR-PER-EM SPACE"; + break; + case U'\u2006': + nm = U"SIX-PER-EM SPACE"; + break; + case U'\u2007': + nm = U"FIGURE SPACE"; + break; + case U'\u2008': + nm = U"PUNCTUATION SPACE"; + break; + case U'\u2009': + nm = U"THIN SPACE"; + break; + case U'\u200A': + nm = U"HAIR SPACE"; + break; + case U'\u203C': + nm = U"DOUBLE EXCLAMATION MARK"; + break; + case U'\u2047': + nm = U"DOUBLE QUOTATION MARK"; + break; + case U'\u2048': + nm = U"QUESTION EXCLAMATION MARK"; + break; + case U'\u2049': + nm = U"EXCLAMATION QUESTION MARK"; + break; + /* CURRENCY SYMBOLS: */ + case U'\u20A0': + nm = U"EURO-CURRENCY SIGN"; + break; + case U'\u20A1': + nm = U"COLON SIGN"; + break; + case U'\u20A2': + nm = U"CRUZEIRO SIGN"; + break; + case U'\u20A3': + nm = U"FRENCH FRANC SIGN"; + break; + case U'\u20A4': + nm = U"LIRA SIGN"; + break; + case U'\u20A5': + nm = U"MILL SIGN"; + break; + case U'\u20A6': + nm = U"NAIRA SIGN"; + break; + case U'\u20A7': + nm = U"PESETA SIGN"; + break; + case U'\u20A8': + nm = U"RUPEE SIGN"; + break; + case U'\u20A9': + nm = U"WON SIGN"; + break; + case U'\u20AA': + nm = U"NEW SHEQEL SIGN"; + break; + case U'\u20AB': + nm = U"DONG SIGN"; + break; + case U'\u20AC': + nm = U"EURO SIGN"; + break; + case U'\u20AD': + nm = U"KIP SIGN"; + break; + case U'\u20AE': + nm = U"TUGRIK SIGN"; + break; + case U'\u20AF': + nm = U"DRACHMA SIGN"; + break; + case U'\u20B0': + nm = U"GERMAN PENNY SIGN"; + break; + case U'\u20B1': + nm = U"PESO SIGN"; + break; + case U'\u20B2': + nm = U"GUARANI SIGN"; + break; + case U'\u20B3': + nm = U"AUSTRAL SIGN"; + break; + case U'\u20B4': + nm = U"HRYVNIA SIGN"; + break; + case U'\u20B5': + nm = U"CEDI SIGN"; + break; + case U'\u20B6': + nm = U"LIVRE TOURNOIS SIGN"; + break; + case U'\u20B7': + nm = U"SPESMILO SIGN"; + break; + case U'\u20B8': + nm = U"TENGE SIGN"; + break; + case U'\u20BA': + nm = U"TURKISH LIRA SIGN"; + break; + case U'\u20BB': + nm = U"NORDIC MARK SIGN"; + break; + case U'\u20BC': + nm = U"MANAT SIGN"; + break; + case U'\u20BD': + nm = U"RUBLE SYMBOL"; + break; + case U'\u20BE': + nm = U"LARI SIGN"; + break; + case U'\u20BF': + nm = U"BITCOIN SIGN"; + break; + /* LETTERLIKE SYMBOLS: */ + case U'\u2100': + nm = U"ACCOUNT OF"; + break; + case U'\u2101': + nm = U"ADRESSED TO THE SUBJECT"; + break; + case U'\u2102': + nm = U"DOUBLE-STRUCK CAPITAL C"; + break; + case U'\u2103': + nm = U"DEGREE CELSIUS"; + break; + case U'\u2104': + nm = U"CENTRE LINE SYMBOL"; + break; + case U'\u2105': + nm = U"CARE OF"; + break; + case U'\u2106': + nm = U"CADA UNA"; + break; + case U'\u2107': + nm = U"EULER CONSTANT"; + break; + case U'\u2108': + nm = U"SCRUPLE"; + break; + case U'\u2109': + nm = U"DEGREE FAHRENHEIT"; + break; + case U'\u210A': + nm = U"SCRIPT SMALL G"; + break; + case U'\u210B': + nm = U"SCRIPT CAPITAL H"; + break; + case U'\u210C': + nm = U"BLACK-LETTER CAPITAL H"; + break; + case U'\u210D': + nm = U"DOUBLE-STRUCK CAPITAL H"; + break; + case U'\u210E': + nm = U"PLANCK CONSTANT"; + break; + case U'\u210F': + nm = U"PLANCK CONSTANT OVER TWO PI"; + break; + case U'\u2110': + nm = U"SCRIPT CAPITAL I"; + break; + case U'\u2111': + nm = U"BLACK-LETTER CAPITAL I"; + break; + case U'\u2112': + nm = U"SCRIPT CAPITAL L"; + break; + case U'\u2113': + nm = U"SCRIPT SMALL L"; + break; + case U'\u2114': + nm = U"L B BAR SYMBOL"; + break; + case U'\u2115': + nm = U"DOUBLE-STRUCK CAPITAL N"; + break; + case U'\u2116': + nm = U"NUMERO SIGN"; + break; + case U'\u2117': + nm = U"SOUND RECORDING COPYRIGHT"; + break; + case U'\u2118': + nm = U"SCRIPT CAPITAL P"; + break; + case U'\u2119': + nm = U"DOUBLE-STRUCK CAPITAL P"; + break; + case U'\u211A': + nm = U"DOUBLE-STRUCK CAPITAL Q"; + break; + case U'\u211B': + nm = U"SCRIPT CAPITAL R"; + break; + case U'\u211C': + nm = U"BLACK-LETTER CAPITAL R"; + break; + case U'\u211D': + nm = U"DOUBLE-STRUCK CAPITAL R"; + break; + case U'\u211E': + nm = U"PRESCRIPTION TAKE"; + break; + case U'\u211F': + nm = U"RESPONSE"; + break; + case U'\u2120': + nm = U"SERVICE MARK"; + break; + case U'\u2121': + nm = U"TELEPHONE SIGN"; + break; + case U'\u2122': + nm = U"TRADE MARK SIGN"; + break; + case U'\u2123': + nm = U"VERSICLE"; + break; + case U'\u2124': + nm = U"DOUBLE-STRUCK CAPITAL Z"; + break; + case U'\u2125': + nm = U"OUNCE SIGN"; + break; + case U'\u2126': + nm = U"OHM SIGN"; + break; + case U'\u2127': + nm = U"INVERTED OHM SIGN"; + break; + case U'\u2128': + nm = U"BLACK-LETTER CAPITAL Z"; + break; + case U'\u2129': + nm = U"TURNED GREEK SMALL LETTER IOTA"; + break; + case U'\u212A': + nm = U"KELVIN SIGN"; + break; + case U'\u212B': + nm = U"ANGSTROM SIGN"; + break; + case U'\u212C': + nm = U"SCRIPT CAPITAL B"; + break; + case U'\u212D': + nm = U"BLACK-LETTER CAPITAL C"; + break; + case U'\u212E': + nm = U"ESTIMATED SYMBOL"; + break; + case U'\u212F': + nm = U"SCRIPT SMALL E"; + break; + case U'\u2130': + nm = U"SCRIPT CAPITAL E"; + break; + case U'\u2131': + nm = U"SCRIPT CAPITAL F"; + break; + case U'\u2132': + nm = U"TURNED CAPITAL F"; + break; + case U'\u2133': + nm = U"SCRIPT CAPITAL M"; + break; + case U'\u2134': + nm = U"SCRIPT SMALL O"; + break; + case U'\u2135': + nm = U"ALEF SYMBOL"; + break; + case U'\u2136': + nm = U"BET SYMBOL"; + break; + case U'\u2137': + nm = U"GIMEL SYMBOL"; + break; + case U'\u2138': + nm = U"DALET SYMBOL"; + break; + case U'\u2139': + nm = U"INFORMATION SOURCE"; + break; + case U'\u213A': + nm = U"ROTATED CAPITAL Q"; + break; + case U'\u213B': + nm = U"FACSIMILE SIGN"; + break; + case U'\u213C': + nm = U"DOUBLE-STRUCK SMALL PI"; + break; + case U'\u213D': + nm = U"DOUBLE-STRUCK SMALL GAMMA"; + break; + case U'\u213E': + nm = U"DOUBLE-STRUCK CAPITAL GAMMA"; + break; + case U'\u213F': + nm = U"DOUBLE-STRUCK CAPITAL PI"; + break; + case U'\u2140': + nm = U"DOUBLE-STRUCK N-ARY SUMMATION"; + break; + case U'\u2141': + nm = U"TURNED SANS-SERIF CAPITAL G"; + break; + case U'\u2142': + nm = U"TURNED SANS-SERIF CAPITAL L"; + break; + case U'\u2143': + nm = U"REVERSED SANS-SERIF CAPITAL L"; + break; + case U'\u2144': + nm = U"TURNED SANS-SERIF CAPITAL Y"; + break; + case U'\u2145': + nm = U"DOUBLE-STRUCK ITALIC CAPITAL D"; + break; + case U'\u2146': + nm = U"DOUBLE-STRUCK ITALIC SMALL D"; + break; + case U'\u2147': + nm = U"DOUBLE-STRUCK ITALIC SMALL E"; + break; + case U'\u2148': + nm = U"DOUBLE-STRUCK ITALIC SMALL I"; + break; + case U'\u2149': + nm = U"DOUBLE-STRUCK ITALIC SMALL J"; + break; + case U'\u214A': + nm = U"PROPERTY LINE"; + break; + case U'\u214B': + nm = U"TURNED AMPERSAND"; + break; + case U'\u214C': + nm = U"PER SIGN"; + break; + case U'\u214D': + nm = U"AKTIESELSKAB"; + break; + case U'\u214E': + nm = U"TURNED SMALL F"; + break; + case U'\u214F': + nm = U"SYMBOL FOR SAMARITAN SOURCE"; + break; + /* NUMBER FORMS: */ + case U'\u2150': + nm = U"VULGAR FRACTION ONE SEVENTH"; + break; + case U'\u2151': + nm = U"VULGAR FRACTION ONE NINTH"; + break; + case U'\u2152': + nm = U"VULGAR FRACTION ONE TENTH"; + break; + case U'\u2153': + nm = U"VULGAR FRACTION ONE THIRD"; + break; + case U'\u2154': + nm = U"VULGAR FRACTION TWO THIRDS"; + break; + case U'\u2155': + nm = U"VULGAR FRACTION ONE FIFTH"; + break; + case U'\u2156': + nm = U"VULGAR FRACTION TWO FIFTHS"; + break; + case U'\u2157': + nm = U"VULGAR FRACTION THREE FIFTHS"; + break; + case U'\u2158': + nm = U"VULGAR FRACTION FOUR FIFTHS"; + break; + case U'\u2159': + nm = U"VULGAR FRACTION ONE SIXTH"; + break; + case U'\u215A': + nm = U"VULGAR FRACTION FIVE SIXTHS"; + break; + case U'\u215B': + nm = U"VULGAR FRACTION ONE EIGTH"; + break; + case U'\u215C': + nm = U"VULGAR FRACTION THREE EIGTHS"; + break; + case U'\u215D': + nm = U"VULGAR FRACTION FIVE EIGHTS"; + break; + case U'\u215E': + nm = U"VULGAR FRACTION SEVEN EIGTHS"; + break; + case U'\u215F': + nm = U"FRACTION NUMERATOR ONE"; + break; + case U'\u2160': + nm = U"ROMAN NUMERAL ONE"; + break; + case U'\u2161': + nm = U"ROMAN NUMERAL TWO"; + break; + case U'\u2162': + nm = U"ROMAN NUMERAL THREE"; + break; + case U'\u2163': + nm = U"ROMAN NUMERAL FOUR"; + break; + case U'\u2164': + nm = U"ROMAN NUMERAL FIVE"; + break; + case U'\u2165': + nm = U"ROMAN NUMERAL SIX"; + break; + case U'\u2166': + nm = U"ROMAN NUMERAL SEVEN"; + break; + case U'\u2167': + nm = U"ROMAN NUMERAL EIGHT"; + break; + case U'\u2168': + nm = U"ROMAN NUMERAL NINE"; + break; + case U'\u2169': + nm = U"ROMAN NUMERAL TEN"; + break; + case U'\u216A': + nm = U"ROMAN NUMERAL ELEVEN"; + break; + case U'\u216B': + nm = U"ROMAN NUMERAL TWELVE"; + break; + case U'\u216C': + nm = U"ROMAN NUMERAL FIFTY"; + break; + case U'\u216D': + nm = U"ROMAN NUMERAL ONE HUNDRED"; + break; + case U'\u216E': + nm = U"ROMAN NUMERAL FIVE HUNDRED"; + break; + case U'\u216F': + nm = U"ROMAN NUMERAL ONE THOUSAND"; + break; + case U'\u2170': + nm = U"SMALL ROMAN NUMERAL ONE"; + break; + case U'\u2171': + nm = U"SMALL ROMAN NUMERAL TWO"; + break; + case U'\u2172': + nm = U"SMALL ROMAN NUMERAL THREE"; + break; + case U'\u2173': + nm = U"SMALL ROMAN NUMERAL FOUR"; + break; + case U'\u2174': + nm = U"SMALL ROMAN NUMERAL FIVE"; + break; + case U'\u2175': + nm = U"SMALL ROMAN NUMERAL SIX"; + break; + case U'\u2176': + nm = U"SMALL ROMAN NUMERAL SEVEN"; + break; + case U'\u2177': + nm = U"SMALL ROMAN NUMERAL EIGHT"; + break; + case U'\u2178': + nm = U"SMALL ROMAN NUMERAL NINE"; + break; + case U'\u2179': + nm = U"SMALL ROMAN NUMERAL TEN"; + break; + case U'\u217A': + nm = U"SMALL ROMAN NUMERAL ELEVEN"; + break; + case U'\u217B': + nm = U"SMALL ROMAN NUMERAL TWELVE"; + break; + case U'\u217C': + nm = U"SMALL ROMAN NUMERAL FIFTY"; + break; + case U'\u217D': + nm = U"SMALL ROMAN NUMERAL ONE HUNDRED"; + break; + case U'\u217E': + nm = U"SMALL ROMAN NUMERAL FIVE HUNDRED"; + break; + case U'\u217F': + nm = U"SMALL ROMAN NUMERAL ONE THOUSAND"; + break; + case U'\u2180': + nm = U"ROMAN NUMERAL ONE THOUSAND C D"; + break; + case U'\u2181': + nm = U"ROMAN NUMERAL FIVE THOUSAND"; + break; + case U'\u2182': + nm = U"ROMAN NUMERAL TEN THOUSAND"; + break; + case U'\u2183': + nm = U"ROMAN NUMERAL REVERSED ONE HUNDRED"; + break; + case U'\u2184': + nm = U"LATIN SMALL LETTER REVERSED C"; + break; + case U'\u2185': + nm = U"ROMAN NUMERAL SIX LATE FORM"; + break; + case U'\u2186': + nm = U"ROMAN NUMERAL FIFTY EARLY FORM"; + break; + case U'\u2187': + nm = U"ROMAN NUMERAL FIFTY THOUSAND"; + break; + case U'\u2188': + nm = U"ROMAN NUMERAL ONE HUNDRED THOUSAND"; + break; + case U'\u2189': + nm = U"VULGAR FRACTION ZERO THIRDS"; + break; + case U'\u218A': + nm = U"TURNED DIGIT TWO"; + break; + case U'\u218B': + nm = U"TURNED DIGIT THREE"; + break; + /* MISCELLANEOUS SYMBOLS: */ + case U'\u26B9': + nm = U"SEXTILE"; + break; + /* DINGBATS: */ + case U'\u271D': + nm = U"LATIN CROSS"; + break; + case U'\u2721': + nm = U"STAR OF DAVID"; + break; + /* SUPPLEMENTAL PUNCTUATION: */ + case U'\u2E3B': + nm = U"THREE-EM DASH"; + break; + /* ARABIC PRESENTATION FORMS-A: */ + case U'\uFDFD': + nm = U"ARABIC LIGATURE BISMILLAH AL-RAHMAN AR-RAHEEM"; + break; + /* ANCIENT SYMBOLS: */ + case U'\U00010190': + nm = U"ROMAN SEXTANS SIGN"; + break; + case U'\U00010191': + nm = U"ROMAN UNCIA SIGN"; + break; + case U'\U00010192': + nm = U"ROMAN SEMUNCIA SIGN"; + break; + case U'\U00010193': + nm = U"ROMAN SEXTULA SIGN"; + break; + case U'\U00010194': + nm = U"ROMAN DIMIDIA SEXTULA SIGN"; + break; + case U'\U00010195': + nm = U"ROMAN SILIQUA SIGN"; + break; + case U'\U00010196': + nm = U"ROMAN DENARIUS SIGN"; + break; + case U'\U00010197': + nm = U"ROMAN QUINARIUS SIGN"; + break; + case U'\U00010198': + nm = U"ROMAN SESTERTIUS SIGN"; + break; + case U'\U00010199': + nm = U"ROMAN DUPONDIUS SIGN"; + break; + case U'\U0001019A': + nm = U"ROMAN AS SIGN"; + break; + case U'\U0001019B': + nm = U"ROMAN CENTURIAL SIGN"; + break; + case U'\U0001019C': + nm = U"ASCIA SIGN"; + break; + /* BRAHMI: */ + case U'\U00011066': + nm = U"BRAHMI DIGIT ZERO"; + break; + case U'\U00011067': + nm = U"BRAHMI DIGIT ONE"; + break; + case U'\U00011068': + nm = U"BRAHMI DIGIT TWO"; + break; + case U'\U00011069': + nm = U"BRAHMI DIGIT THREE"; + break; + case U'\U0001106A': + nm = U"BRAHMI DIGIT FOUR"; + break; + case U'\U0001106B': + nm = U"BRAHMI DIGIT FIVE"; + break; + case U'\U0001106C': + nm = U"BRAHMI DIGIT SIX"; + break; + case U'\U0001106D': + nm = U"BRAHMI DIGIT SEVEN"; + break; + case U'\U0001106E': + nm = U"BRAHMI DIGIT EIGHT"; + break; + case U'\U0001106F': + nm = U"BRAHMI DIGIT NINE"; + break; + /* CUNEIFORM: */ + case U'\U00012031': + nm = U"CUNEIFORM SIGN AN PLUS NAGA SQUARED"; + break; + /* CUNEIFORM NUMBERS AND PUNCTUATION: */ + case U'\U0001242B': + nm = U"CUNEIFORM NUMERIC SIGN NINE SHAR2"; + break; + /* EGYPTIAN HIEROGLYPHS: */ + case U'\U000130B8': + nm = U"EGYPTIAN HIEROGLYPH D052"; + break; + /* COUNTING ROD NUMERALS: */ + case U'\U0001D372': + nm = U"IDEOGRAPHIC TALLY MARK ONE"; + break; + case U'\U0001D373': + nm = U"IDEOGRAPHIC TALLY MARK TWO"; + break; + case U'\U0001D374': + nm = U"IDEOGRAPHIC TALLY MARK THREE"; + break; + case U'\U0001D375': + nm = U"IDEOGRAPHIC TALLY MARK FOUR"; + break; + case U'\U0001D376': + nm = U"IDEOGRAPHIC TALLY MARK FIVE"; + break; + case U'\U0001D377': + nm = U"TALLY MARK ONE"; + break; + case U'\U0001D378': + nm = U"TALLY MARK FIVE"; + break; + /* ENCLOSED ALPHANUMERIC SUPPLEMENT: */ + case U'\U0001F10D': + nm = U"CIRCLED ZERO WITH SLASH"; + break; + case U'\U0001F10E': + nm = U"CIRCLED ANTICKLOCKWISE ARROW"; + break; + case U'\U0001F10F': + nm = U"CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH"; + break; + case U'\U0001F12F': + nm = U"COPYLEFT SYMBOL"; + break; + case U'\U0001F16D': + nm = U"CIRCLED CC"; + break; + case U'\U0001F16E': + nm = U"CIRCLED C WITH OVERLAID BACKSLASH"; + break; + case U'\U0001F16F': + nm = U"CIRCLED HUMAN FIGURE"; + break; + /* EMOTICONS: */ + case U'\U0001F600': + nm = U"GRINNING FACE"; + break; + case U'\U0001F601': + nm = U"GRINNING FACE WITH SMIRKING EYES"; + break; + case U'\U0001F602': + nm = U"FACE WITH TEARS OF JOY"; + break; + case U'\U0001F603': + nm = U"SMILING FACE WITH OPEN MOUTH"; + break; + case U'\U0001F604': + nm = U"SMILING FACE WITH OPEN MOUTH AND SMILING EYES"; + break; + case U'\U0001F605': + nm = U"SMILING FACE WITH OPEN MOUTH AND COULD SWEAT"; + break; + case U'\U0001F606': + nm = U"SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES"; + break; + case U'\U0001F607': + nm = U"SMILING FACE WITH HALO"; + break; + case U'\U0001F608': + nm = U"SMILING FACE WITH HORNS"; + break; + case U'\U0001F609': + nm = U"WINKING FACE"; + break; + case U'\U0001F60A': + nm = U"SMILING FACE WITH SMILING EYES"; + break; + case U'\U0001F60B': + nm = U"FACE SAVOURING DELICIOUS FOOD"; + break; + case U'\U0001F60C': + nm = U"RELIEVED FACE"; + break; + case U'\U0001F60D': + nm = U"SMILLING FACE HEART-SHAPED EYES"; + break; + case U'\U0001F60E': + nm = U"SMILLING FACE WITH SUNGLASSES"; + break; + case U'\U0001F60F': + nm = U"SMIRKING FACE"; + break; + case U'\U0001F610': + nm = U"NEUTRAL FACE"; + break; + case U'\U0001F611': + nm = U"EXPRESSIONLESS FACE"; + break; + case U'\U0001F612': + nm = U"UNAMUSED FACE"; + break; + case U'\U0001F613': + nm = U"FACE WITH COLD SWEAT"; + break; + case U'\U0001F614': + nm = U"PENSIVE FACE"; + break; + case U'\U0001F615': + nm = U"CONFUSED FACE"; + break; + case U'\U0001F616': + nm = U"CONFOUNDED FACE"; + break; + case U'\U0001F617': + nm = U"KISSING FACE"; + break; + case U'\U0001F618': + nm = U"FACE THROWING A KISS"; + break; + case U'\U0001F619': + nm = U"KISSING FACE WITH SMILLING EYES"; + break; + case U'\U0001F61A': + nm = U"KISSING FACE WITH CLOSED EYES"; + break; + case U'\U0001F61B': + nm = U"FACE WITH STUCK-OUT TONGUE"; + break; + case U'\U0001F61C': + nm = U"FACE WITH STUCK-OUT TONGUE AND WINKING EYE"; + break; + case U'\U0001F61D': + nm = U"FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES"; + break; + case U'\U0001F61E': + nm = U"DISSAPOINTED FACE"; + break; + case U'\U0001F61F': + nm = U"WORRIED FACE"; + break; + case U'\U0001F620': + nm = U"ANGRY FACE"; + break; + case U'\U0001F621': + nm = U"POUTING FACE"; + break; + case U'\U0001F622': + nm = U"CRYING FACE"; + break; + case U'\U0001F623': + nm = U"PERSEVERING FACE"; + break; + case U'\U0001F624': + nm = U"FACE WITH LOOK OF TRIUMPH"; + break; + case U'\U0001F625': + nm = U"DISSAPOINTED BUT RELIEVED FACE"; + break; + case U'\U0001F626': + nm = U"FROWNING FACE WITH OPEN MOUTH"; + break; + case U'\U0001F627': + nm = U"ANGUISHED FACE"; + break; + case U'\U0001F628': + nm = U"FEARFUL FACE"; + break; + case U'\U0001F629': + nm = U"WEARY FACE"; + break; + case U'\U0001F62A': + nm = U"SLEEPY FACE"; + break; + case U'\U0001F62B': + nm = U"TIRED FACE"; + break; + case U'\U0001F62C': + nm = U"GRIMACING FACE"; + break; + case U'\U0001F62D': + nm = U"LOUDLY CRYING FACE"; + break; + case U'\U0001F62E': + nm = U"FACE WITH OPEN MOUTH"; + break; + case U'\U0001F62F': + nm = U"HUSHED FACE"; + break; + case U'\U0001F630': + nm = U"FACE WITH OPEN MOUTH AND COLD SWEAT"; + break; + case U'\U0001F631': + nm = U"FACE SCREAMING IN FEAR"; + break; + case U'\U0001F632': + nm = U"ASTONISHED FACE"; + break; + case U'\U0001F633': + nm = U"FLUSHED FACE"; + break; + case U'\U0001F634': + nm = U"SLEEPING FACE"; + break; + case U'\U0001F635': + nm = U"DIZZY FACE"; + break; + case U'\U0001F636': + nm = U"FACE WITHOUT MOUTH"; + break; + case U'\U0001F637': + nm = U"FACE WITH MEDICAL MASK"; + break; + case U'\U0001F641': + nm = U"SLIGHTLY FROWNING FACE"; + break; + case U'\U0001F642': + nm = U"SLIGHTLY SMILING FACE"; + break; + case U'\U0001F643': + nm = U"UPSIDE-DOWN FACE"; + break; + case U'\U0001F644': + nm = U"FACE WITH ROLLING EYES"; + break; + /* CJK UNIFIED IDEOGRAPHS EXTENSION G: */ + case U'\U0003106C': + nm = U"CJK UNIFIED IDEOGRAPH-3106C"; + break; + } + { + struct u8c_strcp_tuple const tuple = u8c_strcp(nm); + ret.nm = tuple.str; + ret.nmsz = tuple.strsz; + } + return ret; +} diff --git a/src/u8c/u32.h.d/u32alloc.c b/src/u8c/str.h.d/stralloc.c index b64a1ee..f9addcd 100644 --- a/src/u8c/u32.h.d/u32alloc.c +++ b/src/u8c/str.h.d/stralloc.c @@ -16,14 +16,18 @@ # include <stdbool.h> # include <stdlib.h> # include <u8c/err.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <uchar.h> -bool u8c_u32alloc(char32_t * * const _u32,size_t const _sz) { +struct u8c_stralloc_tuple u8c_stralloc(size_t const _sz) { + struct u8c_stralloc_tuple ret = { + .stat = false, + }; char32_t * arr = NULL; if((arr = calloc(sizeof *arr,_sz)) == NULL) { - u8c_seterr(U"u8c_u32alloc: Unable to allocate resources (not enough memory?).",u8c_errtyp_badalloc); - return false; + u8c_seterr(u8c_errtyp_badalloc,U"u8c_stralloc: Unable to allocate resources (not enough memory?)."); + ret.stat = true; + return ret; } - *_u32 = arr; - return false; + ret.str = arr; + return ret; } diff --git a/src/u8c/u32.h.d/u32cat.c b/src/u8c/str.h.d/strcat.c index 600e0dc..5e5f693 100644 --- a/src/u8c/u32.h.d/u32cat.c +++ b/src/u8c/str.h.d/strcat.c @@ -13,29 +13,27 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stdlib.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <uchar.h> -bool u8c_u32cat(size_t * const _sz,char32_t const * * const _out,char32_t const * const _lstr,char32_t const * const _rstr) { - assert(_out != NULL); - assert(_lstr != NULL); - assert(_rstr != NULL); - size_t sz = SIZE_C(0x0); - size_t lsz = SIZE_C(0x0); - size_t rsz = SIZE_C(0x0); - u8c_u32sz(&lsz,_lstr); - u8c_u32sz(&rsz,_rstr); - sz = lsz + rsz; - if(_sz != NULL) { - *_sz = sz; - } +struct u8c_strcat_tuple u8c_strcat(char32_t const * const restrict _lstr,char32_t const * const restrict _rstr) { + struct u8c_strcat_tuple ret = { + .stat = false, + }; + size_t lsz = u8c_strsz(_lstr).sz; + size_t rsz = u8c_strsz(_rstr).sz; + ret.strsz = lsz + rsz; char32_t * out = NULL; - if(u8c_u32alloc(&out,sz + SIZE_C(0x1))) { - return true; + { + struct u8c_stralloc_tuple const tuple = u8c_stralloc(ret.strsz + SIZE_C(0x1)); + if(tuple.stat) { + ret.stat = true; + return ret; + } + out = tuple.str; } for(register size_t n = SIZE_C(0x0);n < lsz;n += SIZE_C(0x1)) { out[n] = _lstr[n]; @@ -43,7 +41,6 @@ bool u8c_u32cat(size_t * const _sz,char32_t const * * const _out,char32_t const for(register size_t n = SIZE_C(0x0);n < rsz;n += SIZE_C(0x1)) { out[n + lsz] = _rstr[n]; } - u8c_u32free(_out); - *_out = out; - return false; + ret.str = out; + return ret; } diff --git a/src/u8c/u32.h.d/u32cmp.c b/src/u8c/str.h.d/strcmp.c index 8a6617d..31654d0 100644 --- a/src/u8c/u32.h.d/u32cmp.c +++ b/src/u8c/str.h.d/strcmp.c @@ -13,33 +13,33 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> -bool u8c_u32cmp(uint_least8_t * const _res,char32_t const * const _lstr,char32_t const * const _rstr) { - assert(_res != NULL); - assert(_lstr != NULL); - assert(_rstr != NULL); +# include <u8c/str.h> +struct u8c_strcmp_tuple u8c_strcmp(char32_t const * const restrict _lstr,char32_t const * const restrict _rstr) { + struct u8c_strcmp_tuple ret = { + .stat = false, + }; for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { register char32_t const lchr = _lstr[n]; register char32_t const rchr = _rstr[n]; if(lchr != rchr) { if(lchr < rchr) { - *_res = UINT8_C(0x0); - return false; + ret.res = UINT8_C(0x0); + return ret; } - *_res = UINT8_C(0x2); - return false; + ret.res = UINT8_C(0x2); + return ret; } - if(lchr == UINT32_C(0x0)) { - *_res = UINT8_C(0x1); - return false; + if(lchr == U'\x0') { + ret.res = UINT8_C(0x1); + return ret; } } - u8c_seterr(U"u8c_u32cmp: Unterminated input.",u8c_errtyp_untermin); - return true; + u8c_seterr(u8c_errtyp_untermin,U"u8c_strcmp: Unterminated input."); + ret.stat = true; + return ret; } diff --git a/src/u8c/u32.h.d/u32cp.c b/src/u8c/str.h.d/strcp.c index 95a9b35..1343bf1 100644 --- a/src/u8c/u32.h.d/u32cp.c +++ b/src/u8c/str.h.d/strcp.c @@ -13,30 +13,28 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stdlib.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> -bool u8c_u32cp(size_t * const _sz,char32_t const * * const _out,char32_t const * const _in) { - assert(_out != NULL); - assert(_in != NULL); - if(*_out != NULL) { - u8c_u32free(&*_out); - } - size_t insz = SIZE_C(0x0); - u8c_u32sz(&insz,_in); - if(_sz != NULL) { - *_sz = insz; - } +# include <u8c/str.h> +struct u8c_strcp_tuple u8c_strcp(char32_t const * const restrict _in) { + struct u8c_strcp_tuple ret = { + .stat = false, + }; + ret.strsz = u8c_strsz(_in).sz; uint_least32_t * out = NULL; - if(u8c_u32alloc(&out,insz + SIZE_C(0x1))) { - return false; + { + struct u8c_stralloc_tuple const tuple = u8c_stralloc(ret.strsz + SIZE_C(0x1)); + if(tuple.stat) { + ret.stat = true; + return ret; + } + out = tuple.str; } - for(register size_t n = SIZE_C(0x0);n < insz;n += SIZE_C(0x1)) { + for(register size_t n = SIZE_C(0x0);n < ret.strsz;n += SIZE_C(0x1)) { out[n] = _in[n]; } - *_out = out; - return false; + ret.str = out; + return ret; } diff --git a/src/u8c/u32.h.d/u32fndchr.c b/src/u8c/str.h.d/strfndchr.c index 228c553..93bb77c 100644 --- a/src/u8c/u32.h.d/u32fndchr.c +++ b/src/u8c/str.h.d/strfndchr.c @@ -13,32 +13,33 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> -bool u8c_u32fndchr(size_t * const _pos,char32_t const * const _in,char32_t const _chr) { - assert(_pos != NULL); - assert(_in != NULL); +# include <u8c/str.h> +struct u8c_strfndchr_tuple u8c_strfndchr(char32_t const * const restrict _in,char32_t const _chr) { + struct u8c_strfndchr_tuple ret = { + .stat = false, + }; for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { register uint_least32_t const tmp = _in[n]; if(tmp == U'\x0') { if(_chr == U'\x0') { - *_pos = n; - return false; + ret.pos = n; + return ret; } - *_pos = SIZE_C(-0x1); - return true; + ret.pos = SIZE_C(-0x1); + return ret; } if(tmp == _chr) { - *_pos = n; - return false; + ret.pos = n; + return ret; } } - u8c_seterr(U"u8c_u32fndchr: Unterminated input.",u8c_errtyp_badio); - *_pos = SIZE_C(-0x1); - return true; + u8c_seterr(u8c_errtyp_untermin,U"u8c_strfndchr: Unterminated input."); + ret.pos = SIZE_C(-0x1); + ret.stat = true; + return ret; } diff --git a/src/u8c/u32.h.d/u32fndpat.c b/src/u8c/str.h.d/strfndpat.c index 5a1b5d2..1091238 100644 --- a/src/u8c/u32.h.d/u32fndpat.c +++ b/src/u8c/str.h.d/strfndpat.c @@ -13,35 +13,31 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stddef.h> # include <stdint.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> -bool u8c_u32fndpat(size_t * const _pos,char32_t const * const _in,char32_t const * const _pat) { - assert(_pos != NULL); - assert(_in != NULL); - size_t insz = SIZE_C(0x0); - size_t patsz = SIZE_C(0x0); - u8c_u32sz(&insz,_in); - u8c_u32sz(&patsz,_pat); +# include <u8c/str.h> +struct u8c_strfndpat_tuple u8c_strfndpat(char32_t const * const restrict _in,char32_t const * const restrict _pat) { + struct u8c_strfndpat_tuple ret = { + .stat = false, + }; + size_t insz = u8c_strsz(_in).sz; + size_t patsz = u8c_strsz(_pat).sz; if(insz == SIZE_C(0x1) || insz < patsz) { - *_pos = SIZE_C(-0x1); - return false; + ret.pos = SIZE_C(-0x1); + return ret; } for(register size_t n = SIZE_C(0x0);n < insz - patsz;n += SIZE_C(0x1)) { - char32_t const * str = NULL; - u8c_u32substr(&str,n,patsz - SIZE_C(0x1),_in); - uint_least8_t cmpres = UINT8_C(0x0); - u8c_u32cmp(&cmpres,str,_pat); - u8c_u32free(&str); + char32_t const * str = u8c_strsubstr(n,patsz - SIZE_C(0x1),_in).str; + uint_least8_t const cmpres = u8c_strcmp(str,_pat).res; + u8c_strfree(str); if(cmpres == UINT8_C(0x1)) { - *_pos = n; - return false; + ret.pos = n; + return ret; } } - *_pos = SIZE_C(-0x1); - return false; + ret.pos = SIZE_C(-0x1); + return ret; } diff --git a/src/u8c/u32.h.d/u32free.c b/src/u8c/str.h.d/strfree.c index a0b120b..bf6d477 100644 --- a/src/u8c/u32.h.d/u32free.c +++ b/src/u8c/str.h.d/strfree.c @@ -16,9 +16,11 @@ # include <stdbool.h> # include <stdint.h> # include <stdlib.h> -# include <u8c/u32.h> -bool u8c_u32free(char32_t const * * const _u32) { - free((char32_t *)*_u32); /* This cast does indeed discard a const-qualifier, but it is not undefined behaviour, as the array must have been allocated by calloc or malloc, meaning it's original type is not const-qualified. */ - *_u32 = NULL; - return false; +# include <u8c/str.h> +struct u8c_strfree_tuple u8c_strfree(char32_t const * const restrict _str) { + struct u8c_strfree_tuple ret = { + .stat = false, + }; + free((char32_t *)_str); /* This cast does indeed discard a const-qualifier, but it is not undefined behaviour, as the array must have been allocated by calloc or malloc, meaning it's original type is not const-qualified. */ + return ret; } diff --git a/src/u8c/str.h.d/strins.c b/src/u8c/str.h.d/strins.c new file mode 100644 index 0000000..89173ae --- /dev/null +++ b/src/u8c/str.h.d/strins.c @@ -0,0 +1,38 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <u8c/SIZE_C.h> +# include <u8c/str.h> +# include <uchar.h> +struct u8c_strins_tuple u8c_strins(size_t const _pos,char32_t const * const restrict _str0,char32_t const * const restrict _str1) { + struct u8c_strins_tuple ret = { + .stat = false, + }; + char32_t const * lstr = u8c_strsubstr(SIZE_C(0x0),_pos - SIZE_C(0x1),_str0).str; + char32_t const * rstr = u8c_strsubstr(_pos,SIZE_C(0x0),_str0).str; + ret.strsz = SIZE_C(0x0); + char32_t const * out = NULL; + { + char32_t const * tmp = u8c_strcat(lstr,_str1).str; + u8c_strfree(lstr); + out = u8c_strcat(tmp,rstr).str; + u8c_strfree(rstr); + u8c_strfree(tmp); + } + ret.str = out; + return ret; +} diff --git a/src/u8c/u32.h.d/u32substr.c b/src/u8c/str.h.d/strsubstr.c index 855d062..b9daac5 100644 --- a/src/u8c/u32.h.d/u32substr.c +++ b/src/u8c/str.h.d/strsubstr.c @@ -13,33 +13,36 @@ If not, see <https://www.gnu.org/licenses/>. */ -# include <assert.h> # include <stdbool.h> # include <stdlib.h> # include <u8c/SIZE_C.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <uchar.h> -bool u8c_u32substr(char32_t const * * const _out,size_t const _start,size_t const _len,char32_t const * const _in) { - assert(_out != NULL); - assert(_in != NULL); - u8c_u32free(_out); - size_t insz = SIZE_C(0x0); - u8c_u32sz(&insz,_in); +struct u8c_strsubstr_tuple u8c_strsubstr(size_t const _start,size_t const _len,char32_t const * const restrict _in) { + struct u8c_strsubstr_tuple ret = { + .stat = false, + }; + size_t insz = u8c_strsz(_in).sz; size_t len = _len; if(_len == SIZE_C(0x0)) { len = insz - _start; } if(insz < _start + len) { - return true; + return ret; } size_t const outsz = len + SIZE_C(0x2); char32_t * out = NULL; - if(u8c_u32alloc(&out,outsz)) { - return true; + { + struct u8c_stralloc_tuple const tuple = u8c_stralloc(outsz); + if(tuple.stat) { + ret.stat = true; + return ret; + } + out = tuple.str; } for(register size_t n = SIZE_C(0x0);n <= len;n += SIZE_C(0x1)) { out[n] = _in[n + _start]; } - *_out = out; - return false; + ret.str = out; + return ret; } diff --git a/src/u8c/str.h.d/strsz.c b/src/u8c/str.h.d/strsz.c new file mode 100644 index 0000000..f1b348a --- /dev/null +++ b/src/u8c/str.h.d/strsz.c @@ -0,0 +1,35 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <u8c/SIZE_C.h> +# include <u8c/str.h> +# include <uchar.h> +struct u8c_strsz_tuple u8c_strsz(char32_t const * const restrict _in) { + struct u8c_strsz_tuple ret = { + .stat = false, + }; + { + struct u8c_strfndchr_tuple const tuple = u8c_strfndchr(_in,UINT8_C(0x0)); + if(tuple.stat) { + ret.stat = true; + return ret; + } + ret.sz = tuple.pos; + } + return ret; +} diff --git a/src/u8c/u16.h.d/u16alloc.c b/src/u8c/u16.h.d/u16alloc.c index 3906017..ce20ecb 100644 --- a/src/u8c/u16.h.d/u16alloc.c +++ b/src/u8c/u16.h.d/u16alloc.c @@ -18,12 +18,16 @@ # include <u8c/err.h> # include <u8c/u16.h> # include <uchar.h> -bool u8c_u16alloc(char16_t * * const _u16,size_t const _sz) { +struct u8c_u16alloc_tuple u8c_u16alloc(size_t const _sz) { + struct u8c_u16alloc_tuple ret = { + .stat = false, + }; char16_t * arr = NULL; if((arr = calloc(sizeof *arr,_sz)) == NULL) { - u8c_seterr(U"u8c_u16alloc: Unable to allocate resources (not enough memory?).",u8c_errtyp_badalloc); - return false; + u8c_seterr(u8c_errtyp_badalloc,U"u8c_u16alloc: Unable to allocate resources (not enough memory?)."); + ret.stat = true; + return ret; } - *_u16 = arr; - return false; + ret.u16 = arr; + return ret; } diff --git a/src/u8c/u16.h.d/u16free.c b/src/u8c/u16.h.d/u16free.c index d447562..43e7503 100644 --- a/src/u8c/u16.h.d/u16free.c +++ b/src/u8c/u16.h.d/u16free.c @@ -17,8 +17,10 @@ # include <stdint.h> # include <stdlib.h> # include <u8c/u16.h> -bool u8c_u16free(char16_t const * * const _u16) { - free((char16_t *)*_u16); - *_u16 = NULL; - return false; +struct u8c_u16free_tuple u8c_u16free(char16_t const * const restrict _u16) { + struct u8c_u16free_tuple ret = { + .stat = false, + }; + free((char16_t *)_u16); + return ret; } diff --git a/src/u8c/u32.h.d/u32ins.c b/src/u8c/u32.h.d/u32ins.c deleted file mode 100644 index 7fccb7c..0000000 --- a/src/u8c/u32.h.d/u32ins.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of u8c. - - u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with u8c. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <assert.h> -# include <stdbool.h> -# include <stddef.h> -# include <u8c/SIZE_C.h> -# include <u8c/u32.h> -# include <uchar.h> -bool u8c_u32ins(size_t * const _sz,char32_t const * * const _out,size_t const _pos,char32_t const * const _str0,char32_t const * const _str1) { - assert(_out != NULL); - assert(_str0 != NULL); - assert(_str1 != NULL); - char32_t const * lstr = NULL; - char32_t const * rstr = NULL; - u8c_u32substr(&lstr,SIZE_C(0x0),_pos - SIZE_C(0x1),_str0); - u8c_u32substr(&rstr,_pos,SIZE_C(0x0),_str0); - size_t sz = SIZE_C(0x0); - char32_t const * out = NULL; - { - char32_t const * tmp = NULL; - u8c_u32cat(NULL,&tmp,lstr,_str1); - u8c_u32free(&lstr); - u8c_u32cat(NULL,&out,tmp,rstr); - u8c_u32free(&rstr); - u8c_u32free(&tmp); - } - if(_sz != NULL) { - *_sz = sz; - } - *_out = out; - return false; -} diff --git a/src/u8c/u8.h.d/u8alloc.c b/src/u8c/u8.h.d/u8alloc.c index ba02bc8..ba28243 100644 --- a/src/u8c/u8.h.d/u8alloc.c +++ b/src/u8c/u8.h.d/u8alloc.c @@ -17,12 +17,16 @@ # include <stdlib.h> # include <u8c/err.h> # include <u8c/u8.h> -bool u8c_u8alloc(unsigned char * * const _u8,size_t const _sz) { +struct u8c_u8alloc_tuple u8c_u8alloc(size_t const _sz) { + struct u8c_u8alloc_tuple ret = { + .stat = false, + }; unsigned char * arr = NULL; if((arr = calloc(sizeof *arr,_sz)) == NULL) { - u8c_seterr(U"u8c_u8alloc: Unable to allocate resources (not enough memory?).",u8c_errtyp_badalloc); - return false; + u8c_seterr(u8c_errtyp_badalloc,U"u8c_u8alloc: Unable to allocate resources (not enough memory?)."); + ret.stat = true; + return ret; } - *_u8 = arr; - return false; + ret.u8 = arr; + return ret; } diff --git a/src/u8c/u8.h.d/u8dec.c b/src/u8c/u8.h.d/u8dec.c index 365c81a..4cba14f 100644 --- a/src/u8c/u8.h.d/u8dec.c +++ b/src/u8c/u8.h.d/u8dec.c @@ -19,23 +19,24 @@ # include <stdint.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> +# include <u8c/str.h> # include <u8c/u8.h> # include <uchar.h> -bool u8c_u8dec(size_t * const _sz,char32_t const * * const _out,unsigned char const * const _in) { - assert(_out != NULL); - assert(_in != NULL); - register size_t insz = SIZE_C(0x0); - register size_t outsz = SIZE_C(0x1); - for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;outsz += SIZE_C(0x1)) { /* First pass: get size of input array and determine size of output array. */ +struct u8c_u8dec_tuple u8c_u8dec(unsigned char const * const restrict _in) { + struct u8c_u8dec_tuple ret = { + .stat = false, + }; + register size_t insz = SIZE_C(0x0); + for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;ret.strsz += SIZE_C(0x1)) { /* First pass: get size of input array and determine size of output array. */ register unsigned char const tmp = _in[n]; if(tmp == UINT8_C(0x0)) { /* Null-terminator: end of string has been reached. */ insz = n; goto nottoobig; } if(tmp >= UINT8_C(0b11111000)) { /* Too big. */ - u8c_seterr(U"u8c_u8dec: Character out of range (too big).",u8c_errtyp_u8oor); - return true; + u8c_seterr(u8c_errtyp_u8oor,U"u8c_u8dec: Character out of range (too big)."); + ret.stat = true; + return ret; } if(tmp >= UINT8_C(0b11110000)) { /* Four byte. */ n += SIZE_C(0x4); @@ -53,15 +54,18 @@ bool u8c_u8dec(size_t * const _sz,char32_t const * * const _out,unsigned char co n += SIZE_C(0x1); } /* Input is not null-terminated. */ - u8c_seterr(U"u8c_u8dec: Unterminated input.",u8c_errtyp_untermin); - return true; + u8c_seterr(u8c_errtyp_untermin,U"u8c_u8dec: Unterminated input."); + ret.stat = true; + return ret; nottoobig:; - if(_sz != NULL) { - *_sz = outsz; - } uint_least32_t * out = NULL; - if(u8c_u32alloc(&out,outsz + SIZE_C(0x1))) { - return false; + { + struct u8c_stralloc_tuple const tuple = u8c_stralloc(ret.strsz + SIZE_C(0x1)); + if(tuple.stat) { + ret.stat = true; + return ret; + } + out = tuple.str; } for(register size_t n = SIZE_C(0x0),outn = SIZE_C(0x0);n < insz;outn += SIZE_C(0x1)) { /* Second pass: decode UTF-8. */ if(_in[n] >= UINT8_C(0b11110000)) { /* Four bytes. */ @@ -99,7 +103,6 @@ nottoobig:; n += SIZE_C(0x1); continue; } - u8c_u32free(_out); - *_out = out; - return false; + ret.str = out; + return ret; } diff --git a/src/u8c/u8.h.d/u8enc.c b/src/u8c/u8.h.d/u8enc.c index f3f3570..2ac0007 100644 --- a/src/u8c/u8.h.d/u8enc.c +++ b/src/u8c/u8.h.d/u8enc.c @@ -19,48 +19,53 @@ # include <stdint.h> # include <u8c/SIZE_C.h> # include <u8c/err.h> -# include <u8c/u32.h> +# include <u8c/main.h> +# include <u8c/str.h> # include <u8c/u8.h> # include <uchar.h> -bool u8c_u8enc(size_t * const _sz,unsigned char const * * const _out,char32_t const * const _in) { - assert(_out != NULL); - assert(_in != NULL); - size_t insz = SIZE_C(0x0); /* Size of input array (bytes). */ - size_t outsz = SIZE_C(0x0); /* Size of output array /bytes). */ +struct u8c_u8enc_tuple u8c_u8enc(char32_t const * const restrict _in) { + struct u8c_u8enc_tuple ret = { + .stat = false, + }; + size_t insz = SIZE_C(0x0); /* Size of input array (bytes). */ for(register size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { /* First pass: get size of input array, and determine size of output array. */ register char32_t const tmp = _in[n]; - if(tmp > u8c_u32max) { /* Codepoint out of range. */ - u8c_seterr(U"u8c_u8enc: Codepoint out of range (too big).",u8c_errtyp_u32oor); - return true; + if(tmp > u8c_unimax) { /* Codepoint out of range. */ + u8c_seterr(u8c_errtyp_stroor,U"u8c_u8enc: Codepoint out of range (too big)."); + ret.stat = true; + return ret; } if(tmp >= UINT32_C(0x10000)) { /* 4 bytes. */ - outsz += SIZE_C(0x4); + ret.u8sz += SIZE_C(0x4); continue; } if(tmp >= UINT32_C(0x800)) { /* 3 bytes. */ - outsz += SIZE_C(0x3); + ret.u8sz += SIZE_C(0x3); continue; } if(tmp >= UINT32_C(0x80)) { /* 2 bytes. */ - outsz += SIZE_C(0x2); + ret.u8sz += SIZE_C(0x2); continue; } /* 1 byte. */ - outsz += SIZE_C(0x1); + ret.u8sz += SIZE_C(0x1); if(tmp == UINT32_C(0x0)) { insz = n + SIZE_C(0x1); goto nottoobig; } } - u8c_seterr(U"u8c_u8enc: Unterminated input.",u8c_errtyp_untermin); - return true; + u8c_seterr(u8c_errtyp_untermin,U"u8c_u8enc: Unterminated input."); + ret.stat = true; + return ret; nottoobig:; - if(_sz != NULL) { - *_sz = outsz; - } unsigned char * out = NULL; - if(u8c_u8alloc(&out,outsz + SIZE_C(0x1))) { - return true; + { + struct u8c_u8alloc_tuple const tuple = u8c_u8alloc(ret.u8sz + SIZE_C(0x1)); + if(tuple.stat) { + ret.stat = true; + return ret; + } + out = tuple.u8; } for(register size_t n = SIZE_C(0x0), outn = SIZE_C(0x0);n < insz;n += SIZE_C(0x1),outn += SIZE_C(0x1)) { /* Second pass: encode each codepoint into UTF-8. */ register char32_t const tmp = _in[n]; @@ -91,7 +96,6 @@ nottoobig:; /* One byte. */ out[outn] = (uint_least8_t)tmp; } - u8c_u8free(_out); - *_out = out; - return false; + ret.u8 = out; + return ret; } diff --git a/src/u8c/u8.h.d/u8free.c b/src/u8c/u8.h.d/u8free.c index af5a6bd..a0b61a8 100644 --- a/src/u8c/u8.h.d/u8free.c +++ b/src/u8c/u8.h.d/u8free.c @@ -17,8 +17,10 @@ # include <stdint.h> # include <stdlib.h> # include <u8c/u8.h> -bool u8c_u8free(unsigned char const * * const _u8) { - free((unsigned char *)*_u8); - *_u8 = NULL; - return false; +struct u8c_u8free_tuple u8c_u8free(unsigned char const * const restrict _u8) { + struct u8c_u8free_tuple ret = { + .stat = false, + }; + free((unsigned char *)_u8); + return ret; } |