diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/luma/print.c | 44 | ||||
-rw-r--r-- | src/luma/utf8dec.c | 105 | ||||
-rw-r--r-- | src/luma/utf8enc.c | 82 | ||||
-rw-r--r-- | src/main.c | 50 |
4 files changed, 0 insertions, 281 deletions
diff --git a/src/luma/print.c b/src/luma/print.c deleted file mode 100644 index a085e8f..0000000 --- a/src/luma/print.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of luma. - - luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with luma. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <luma/utf8enc.h> -# include <stdarg.h> -# include <stdint.h> -# include <stdio.h> -# include <stdlib.h> -void luma_print(uint32_t * msg,...) { - va_list args; - va_start(args,msg); - for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { - if(msg[n] == (uint32_t){0x0}) { - fwrite(&(uint8_t){0xA},0x1,0x1,stdout); - break; - } - if(msg[n] == (uint32_t){0xFFFD}) { - size_t chrsz = (size_t){0x0}; - uint8_t * chr = NULL; - luma_utf8enc((uint32_t[]){va_arg(args,uint32_t),0x0},&chr,&chrsz); - fwrite(chr,0x1,chrsz - (size_t){0x1},stdout); - free((void *)chr); - continue; - } - size_t chrsz = (size_t){0x0}; - uint8_t * chr = NULL; - luma_utf8enc((uint32_t[]){msg[n],0x0,0x0},&chr,&chrsz); - fwrite(chr,0x1,chrsz - (size_t){0x1},stdout); - free((void *)chr); - } - va_end(args); -} diff --git a/src/luma/utf8dec.c b/src/luma/utf8dec.c deleted file mode 100644 index e6c302e..0000000 --- a/src/luma/utf8dec.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of luma. - - luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with luma. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <luma/utf8dec.h> -# include <stdint.h> -# include <stdio.h> -# include <stdlib.h> -int luma_utf8dec(uint8_t * utf,uint32_t * * codeps,size_t * outszptr) { - size_t sz = (size_t){0x0}; - size_t outsz = (size_t){0x0}; - for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array and determine size of output array. - if(utf[n] == (uint8_t){0x0}) { // Null-terminator. - sz = n; - break; - } - if(utf[n] >= (uint8_t){0xF0}) { // Four byte. - outsz += (size_t){0x4}; - n += (size_t){0x3}; - continue; - } - if(utf[n] >= (uint8_t){0xE0}) { // Three bytes. - outsz += (size_t){0x3}; - n += (size_t){0x2}; - continue; - } - if(utf[n] >= (uint8_t){0xC0}) { // Two bytes. - outsz += (size_t){0x2}; - n += (size_t){0x1}; - continue; - } - if(utf[n] >= (uint8_t){0x80}) { // One byte. - outsz += (size_t){0x1}; - continue; - } - // Out of range. - return 0x1; - } - outsz += (size_t){0x1}; // Reserve space for null-terminator. - if(outszptr != NULL) { - *outszptr = outsz; - } - *codeps = malloc(outsz); - (*codeps)[outsz - (size_t){0x1}] = (uint32_t){0x0}; // Create null-terminator on output array. - for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: decode UTF-8. - uint8_t chr = utf[n]; - if(chr >= (uint8_t){0xF7}) { // Out of range. - return 0x1; - } - if(chr >= (uint8_t){0xF0}) { // Four byte. - uint32_t codep = (uint32_t){(chr ^ 0xF0) << 0x12}; - n += (size_t){0x1}; - chr = utf[n]; - codep += (uint32_t){(chr ^ 0x80) << 0xC}; - n += (size_t){0x1}; - chr = utf[n]; - codep += (uint32_t){(chr ^ 0x80) << 0x6}; - n += (size_t){0x1}; - chr = utf[n]; - codep += (uint32_t){(chr ^ 0x80)}; - (*codeps)[outn] = codep; - continue; - } - if(chr >= (uint8_t){0xE0}) { // Three bytes. - uint32_t codep = (uint32_t){(chr ^ 0xE0) << 0xC}; - n += (size_t){0x1}; - chr = utf[n]; - codep += (uint32_t){(chr ^ 0x80) << 0x6}; - n += (size_t){0x1}; - chr = utf[n]; - codep += (uint32_t){(chr ^ 0x80)}; - n += (size_t){0x1}; - (*codeps)[outn] = codep; - continue; - } - if(chr >= (uint8_t){0xC0}) { // Two bytes. - uint32_t codep = (uint32_t){(chr ^ 0xC0) << 0x6}; - n += (size_t){0x1}; - chr = utf[n]; - codep += (uint32_t){(chr ^ 0x80)}; - n += (size_t){0x1}; - (*codeps)[outn] = codep; - continue; - } - if(chr > (uint8_t){0x7F}) { // One byte. - uint32_t codep = (uint32_t){chr}; - (*codeps)[outn] = codep; - continue; - } - // Out of range. - return 0x1; - } - return 0x0; -} diff --git a/src/luma/utf8enc.c b/src/luma/utf8enc.c deleted file mode 100644 index 3315026..0000000 --- a/src/luma/utf8enc.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of luma. - - luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with luma. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <luma/utf8enc.h> -# include <stdint.h> -# include <stdio.h> -# include <stdlib.h> -int luma_utf8enc(uint32_t * codeps,uint8_t * * utf,size_t * outszptr) { - size_t sz = (size_t){0x0}; // Size of input array (bytes). - size_t outsz = (size_t){0x0}; // Size of output array /bytes). - for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array, and determine size of output array. - uint32_t codep = codeps[n]; // Current Unicode codepoint. - if(codep == (uint32_t){0x0}) { // U+0000 is Null. - sz = n; - break; - } - if(codep >= (uint32_t){0x110000}) { // Codepoint out of range. - return 0x1; - } - if(codep >= (uint32_t){0x10000}) { // 4 bytes. - outsz += (size_t){0x4}; - continue; - } - if(codep >= (uint32_t){0x800}) { // 3 bytes. - outsz += (size_t){0x3}; - continue; - } - if(codep >= (uint32_t){0x80}) { // 2 bytes. - outsz += (size_t){0x2}; - continue; - } - // 1 byte. - outsz += (size_t){0x1}; - } - outsz += (size_t){0x1}; // Add space for null-terminator. - if(outszptr != NULL) { - *outszptr = outsz; - } - *utf = malloc(outsz); // Allocate space for output array. - (*utf)[outsz - (size_t){0x1}] = (uint8_t){0x0}; // Create null-terminator on output array. - for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: encode each codepoint into UTF-8. - if(codeps[n] >= 0x10000) { // Four bytes. - (*utf)[outn] = (uint8_t){0xF0 + (codeps[n] >> 0x12)}; - outn += (size_t){0x1}; - (*utf)[outn] = (uint8_t){0x80 + ((codeps[n] >> 0xC) & 0x3F)}; - outn += (size_t){0x1}; - (*utf)[outn] = (uint8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)}; - outn += (size_t){0x1}; - (*utf)[outn] = (uint8_t){0x80 + (codeps[n] & 0x3F)}; - continue; - } - if(codeps[n] >= 0x800) { // Three bytes. - (*utf)[outn] = (uint8_t){0xE0 + (codeps[n] >> 0xC)}; - outn += (size_t){0x1}; - (*utf)[outn] = (uint8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)}; - outn += (size_t){0x1}; - (*utf)[outn] = (uint8_t){0x80 + (codeps[n] & 0x3F)}; - continue; - } - if(codeps[n] >= 0x80) { // Two bytes. - (*utf)[outn] = (uint8_t){0xC0 + (codeps[n] >> 0x6)}; - outn += (size_t){0x1}; - (*utf)[outn] = (uint8_t){0x80 + (codeps[n] & 0x3F)}; - continue; - } - // One byte. - (*utf)[outn] = codeps[n]; - } - return 0x0; -} diff --git a/src/main.c b/src/main.c deleted file mode 100644 index 969bdcd..0000000 --- a/src/main.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - Copyright 2021 Gabriel Jensen - - This file is part of luma. - - luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - - luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License along with luma. - - If not, see <https://www.gnu.org/licenses/>. -*/ -# include <locale.h> -# include <luma/arch.h> -# include <luma/print.h> -# include <luma/utf8dec.h> -# include <luma/utf8enc.h> -# include <stdint.h> -# include <stdio.h> -# include <stdlib.h> -int main(void) { - setlocale(LC_ALL,"en_GB.UTF-8"); - enum luma_arch code[] = { - luma_arch_lab, - luma_arch_hello, - }; - for(size_t i = (size_t){0x0};i < sizeof code / sizeof code[0x0];++i) { - printf("Got code %d.\n",code[i]); - } - { - uint8_t * msg = NULL; - luma_utf8enc((uint32_t[]){0x00A2,0x2C,0x939,0x2C,0x10348,0x2C,0x20AC,0x2C,0x218A,0x2C,0x1F44B,0x0},&msg,NULL); - printf("Array: %s\n",msg); - free((void *){msg}); - } - { - uint32_t * codeps = NULL; - uint8_t * utf = NULL; - luma_utf8enc((uint32_t[]){0x1F44B,0x0},&utf,NULL); - luma_utf8dec(utf,&codeps,NULL); - free((void *)utf); - printf("It is %u.\n",codeps[0x0]); - free((void *)codeps); - } - luma_print((uint32_t[]){0x48,0x65,0x6C,0x6C,0x6F,0x20,0xFFFD,0x65,0x72,0x65,0x21,0x0},(uint32_t){0xF0}); - exit(EXIT_SUCCESS); -} |