diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/u8c/free.c | 22 | ||||
-rw-r--r-- | src/u8c/print.c | 96 | ||||
-rw-r--r-- | src/u8c/u8dec.c | 95 | ||||
-rw-r--r-- | src/u8c/u8enc.c | 82 |
4 files changed, 295 insertions, 0 deletions
diff --git a/src/u8c/free.c b/src/u8c/free.c new file mode 100644 index 0000000..f58f667 --- /dev/null +++ b/src/u8c/free.c @@ -0,0 +1,22 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <u8c/free.h> +# include <stdint.h> +# include <stdlib.h> +uint_least8_t u8c_free(void * ptr) { + free(ptr); + return 0x0; +} diff --git a/src/u8c/print.c b/src/u8c/print.c new file mode 100644 index 0000000..741d25a --- /dev/null +++ b/src/u8c/print.c @@ -0,0 +1,96 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <u8c/fmttyp.h> +# include <u8c/u8enc.h> +# include <stdarg.h> +# include <stdint.h> +# include <stdio.h> +# include <stdlib.h> +uint_least8_t u8c_print(uint_least32_t * msg,...) { + va_list args; + va_start(args,msg); + for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { + if(msg[n] == (uint_least32_t){0x0}) { + fwrite(&(uint_least8_t){0xA},(size_t){0x1},(size_t){0x1},stdout); + break; + } + if(msg[n] == (uint_least32_t){0xFFFD}) { + enum u8c_fmttyp fmttyp = va_arg(args,enum u8c_fmttyp); + switch(fmttyp) { + case u8c_fmttyp_chr: + { + uint_least32_t const chr = va_arg(args,uint_least32_t); + if(chr == (uint_least32_t){0x0}) { + continue; + } + size_t strsz = (size_t){0x0}; + uint_least8_t * str = NULL; + u8c_u8enc((uint_least32_t[]){chr,0x0},&strsz,&str); + fwrite(str,(size_t){0x1},strsz - (size_t){0x1},stdout); + free(str); + break; + } + case u8c_fmttyp_int: + { + int_least64_t n = va_arg(args,int_least64_t); + if(n < 0x0) { + size_t chrsz = (size_t){0x0}; + uint_least8_t * chr = NULL; + u8c_u8enc((uint_least32_t[]){0x2212,0x0},&chrsz,&chr); + fwrite(chr,(size_t){0x1},chrsz - (size_t){0x1},stdout); + free(chr); + } + for(;n != 0x0;n /= (int_least64_t){0xB}) { + + } + break; + } + case u8c_fmttyp_str: + { + size_t strsz = (size_t){0x0}; + uint_least8_t * str = NULL; + u8c_u8enc(va_arg(args,uint_least32_t *),&strsz,&str); + fwrite(str,(size_t){0x1},strsz - (size_t){0x1},stdout); + free((void *)str); + break; + } + case u8c_fmttyp_uint: + { + break; + } + } + continue; + } + size_t chrsz = (size_t){0x0}; + uint_least8_t * chr = NULL; + u8c_u8enc((uint_least32_t[]){msg[n],0x0,0x0},&chrsz,&chr); + fwrite(chr,(size_t){0x1},chrsz - (size_t){0x1},stdout); + free((void *)chr); + } + va_end(args); + return 0x0; +} +/* + ####################### + # # + # ## ## ### # # ### # + # # # # # # ### # # + # ## ## # ### # # + # # # # # ### # # + # # # # ### # # # # + # # + ####################### +*/ diff --git a/src/u8c/u8dec.c b/src/u8c/u8dec.c new file mode 100644 index 0000000..60f4c3d --- /dev/null +++ b/src/u8c/u8dec.c @@ -0,0 +1,95 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <u8c/dbgprint.h> +# include <u8c/u8dec.h> +# include <stdint.h> +# include <stdio.h> +# include <stdlib.h> +uint_least8_t u8c_u8dec(uint_least8_t * utf,size_t * codepssz,uint_least32_t * * codeps) { + size_t insz = (size_t){0x0}; + size_t outsz = (size_t){0x0}; + for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array and determine size of output array. + outsz += (size_t){0x1}; + if(utf[n] == (uint_least8_t){0x0}) { // Null-terminator. + insz = n; + break; + } + if(utf[n] >= (uint_least8_t){0xF0}) { // Four byte. + n += (size_t){0x4}; + continue; + } + if(utf[n] >= (uint_least8_t){0xE0}) { // Three bytes. + n += (size_t){0x3}; + continue; + } + if(utf[n] >= (uint_least8_t){0xC0}) { // Two bytes. + n += (size_t){0x2}; + continue; + } + } + if(codepssz != NULL) { + *codepssz = outsz; + } + *codeps = malloc(outsz); + (*codeps)[outsz - (size_t){0x1}] = (uint_least32_t){0x0}; // Create null-terminator on output array. + for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < insz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: decode UTF-8. + uint_least8_t chr = utf[n]; + if(chr >= (uint_least8_t){0xF7}) { // Out of range. + u8c_dbgprint(U"Out of range (in character decoding: byte too big)."); + return 0x1; + } + if(chr >= (uint_least8_t){0xF0}) { // Four byte. + uint_least32_t codep = (uint_least32_t){(chr ^ 0xF0) << 0x12}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint_least32_t){(chr ^ 0x80) << 0xC}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint_least32_t){(chr ^ 0x80) << 0x6}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint_least32_t){(chr ^ 0x80)}; + (*codeps)[outn] = codep; + continue; + } + if(chr >= (uint_least8_t){0xE0}) { // Three bytes. + uint_least32_t codep = (uint_least32_t){(chr ^ 0xE0) << 0xC}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint_least32_t){(chr ^ 0x80) << 0x6}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint_least32_t){(chr ^ 0x80)}; + n += (size_t){0x1}; + (*codeps)[outn] = codep; + continue; + } + if(chr >= (uint_least8_t){0xC0}) { // Two bytes. + uint_least32_t codep = (uint_least32_t){(chr ^ 0xC0) << 0x6}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint_least32_t){(chr ^ 0x80)}; + n += (size_t){0x1}; + (*codeps)[outn] = codep; + continue; + } + // One byte. + uint_least32_t codep = (uint_least32_t){chr}; + (*codeps)[outn] = codep; + continue; + } + return 0x0; +} diff --git a/src/u8c/u8enc.c b/src/u8c/u8enc.c new file mode 100644 index 0000000..3ea6cdc --- /dev/null +++ b/src/u8c/u8enc.c @@ -0,0 +1,82 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of u8c. + + u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with u8c. + + If not, see <https://www.gnu.org/licenses/>. +*/ +# include <u8c/u8enc.h> +# include <stdint.h> +# include <stdio.h> +# include <stdlib.h> +uint_least8_t u8c_u8enc(uint_least32_t * codeps,size_t * utfsz,uint_least8_t * * utf) { + size_t sz = (size_t){0x0}; // Size of input array (bytes). + size_t outsz = (size_t){0x0}; // Size of output array /bytes). + for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array, and determine size of output array. + uint_least32_t codep = codeps[n]; // Current Unicode codepoint. + if(codep == (uint_least32_t){0x0}) { // U+0000 is Null. + sz = n; + break; + } + if(codep >= (uint_least32_t){0x110000}) { // Codepoint out of range. + return 0x1; + } + if(codep >= (uint_least32_t){0x10000}) { // 4 bytes. + outsz += (size_t){0x4}; + continue; + } + if(codep >= (uint_least32_t){0x800}) { // 3 bytes. + outsz += (size_t){0x3}; + continue; + } + if(codep >= (uint_least32_t){0x80}) { // 2 bytes. + outsz += (size_t){0x2}; + continue; + } + // 1 byte. + outsz += (size_t){0x1}; + } + outsz += (size_t){0x1}; // Add space for null-terminator. + if(utfsz != NULL) { + *utfsz = outsz; + } + *utf = malloc(outsz); // Allocate space for output array. + (*utf)[outsz - (size_t){0x1}] = (uint_least8_t){0x0}; // Create null-terminator on output array. + for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: encode each codepoint into UTF-8. + if(codeps[n] >= 0x10000) { // Four bytes. + (*utf)[outn] = (uint_least8_t){0xF0 + (codeps[n] >> 0x12)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint_least8_t){0x80 + ((codeps[n] >> 0xC) & 0x3F)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint_least8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint_least8_t){0x80 + (codeps[n] & 0x3F)}; + continue; + } + if(codeps[n] >= 0x800) { // Three bytes. + (*utf)[outn] = (uint_least8_t){0xE0 + (codeps[n] >> 0xC)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint_least8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint_least8_t){0x80 + (codeps[n] & 0x3F)}; + continue; + } + if(codeps[n] >= 0x80) { // Two bytes. + (*utf)[outn] = (uint_least8_t){0xC0 + (codeps[n] >> 0x6)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint_least8_t){0x80 + (codeps[n] & 0x3F)}; + continue; + } + // One byte. + (*utf)[outn] = codeps[n]; + } + return 0x0; +} |