diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/luma/print.c | 24 | ||||
-rw-r--r-- | src/luma/utf8dec.c | 108 | ||||
-rw-r--r-- | src/luma/utf8enc.c | 64 | ||||
-rw-r--r-- | src/main.c | 36 |
4 files changed, 152 insertions, 80 deletions
diff --git a/src/luma/print.c b/src/luma/print.c index 8d8ac1d..a085e8f 100644 --- a/src/luma/print.c +++ b/src/luma/print.c @@ -1,7 +1,23 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of luma. + + luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with luma. + + If not, see <https://www.gnu.org/licenses/>. +*/ # include <luma/utf8enc.h> # include <stdarg.h> # include <stdint.h> # include <stdio.h> +# include <stdlib.h> void luma_print(uint32_t * msg,...) { va_list args; va_start(args,msg); @@ -12,13 +28,17 @@ void luma_print(uint32_t * msg,...) { } if(msg[n] == (uint32_t){0xFFFD}) { size_t chrsz = (size_t){0x0}; - uint8_t const * chr = luma_utf8enc((uint32_t[]){va_arg(args,uint32_t),0x0},&chrsz); + uint8_t * chr = NULL; + luma_utf8enc((uint32_t[]){va_arg(args,uint32_t),0x0},&chr,&chrsz); fwrite(chr,0x1,chrsz - (size_t){0x1},stdout); + free((void *)chr); continue; } size_t chrsz = (size_t){0x0}; - uint8_t const * chr = luma_utf8enc((uint32_t[]){msg[n],0x0,0x0},&chrsz); + uint8_t * chr = NULL; + luma_utf8enc((uint32_t[]){msg[n],0x0,0x0},&chr,&chrsz); fwrite(chr,0x1,chrsz - (size_t){0x1},stdout); + free((void *)chr); } va_end(args); } diff --git a/src/luma/utf8dec.c b/src/luma/utf8dec.c index f6e29be..e6c302e 100644 --- a/src/luma/utf8dec.c +++ b/src/luma/utf8dec.c @@ -1,91 +1,105 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of luma. + + luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with luma. + + If not, see <https://www.gnu.org/licenses/>. +*/ # include <luma/utf8dec.h> # include <stdint.h> # include <stdio.h> # include <stdlib.h> -uint32_t * luma_utf8dec(uint8_t const * str,size_t * outszptr) { +int luma_utf8dec(uint8_t * utf,uint32_t * * codeps,size_t * outszptr) { size_t sz = (size_t){0x0}; size_t outsz = (size_t){0x0}; for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array and determine size of output array. - uint8_t const utf = str[n]; - if(utf == (uint8_t){0x0}) { // Null-terminator. + if(utf[n] == (uint8_t){0x0}) { // Null-terminator. sz = n; break; } - if(utf >= (uint8_t){0xF0}) { // Four byte. + if(utf[n] >= (uint8_t){0xF0}) { // Four byte. outsz += (size_t){0x4}; n += (size_t){0x3}; continue; } - if(utf >= (uint8_t){0xE0}) { // Three bytes. + if(utf[n] >= (uint8_t){0xE0}) { // Three bytes. outsz += (size_t){0x3}; n += (size_t){0x2}; continue; } - if(utf >= (uint8_t){0xC0}) { // Two bytes. + if(utf[n] >= (uint8_t){0xC0}) { // Two bytes. outsz += (size_t){0x2}; n += (size_t){0x1}; continue; } - if(utf >= (uint8_t){0x80}) { // One byte. + if(utf[n] >= (uint8_t){0x80}) { // One byte. outsz += (size_t){0x1}; continue; } // Out of range. - return NULL; + return 0x1; } outsz += (size_t){0x1}; // Reserve space for null-terminator. if(outszptr != NULL) { *outszptr = outsz; } - uint32_t * codeps = malloc(outsz); - codeps[outsz - (size_t){0x1}] = (uint32_t){0x0}; // Create null-terminator on output array. + *codeps = malloc(outsz); + (*codeps)[outsz - (size_t){0x1}] = (uint32_t){0x0}; // Create null-terminator on output array. for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: decode UTF-8. - uint8_t utf = str[n]; - if(utf >= (uint8_t){0xF7}) { // Out of range. - return NULL; + uint8_t chr = utf[n]; + if(chr >= (uint8_t){0xF7}) { // Out of range. + return 0x1; } - if(utf >= (uint8_t){0xF0}) { // Four byte. - uint32_t codep = (uint32_t){(utf ^ 0xF0) << 0x12}; - n += (size_t){0x1}; - utf = str[n]; - codep += (uint32_t){(utf ^ 0x80) << 0xC}; - n += (size_t){0x1}; - utf = str[n]; - codep += (uint32_t){(utf ^ 0x80) << 0x6}; - n += (size_t){0x1}; - utf = str[n]; - codep += (uint32_t){(utf ^ 0x80)}; - codeps[outn] = codep; + if(chr >= (uint8_t){0xF0}) { // Four byte. + uint32_t codep = (uint32_t){(chr ^ 0xF0) << 0x12}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint32_t){(chr ^ 0x80) << 0xC}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint32_t){(chr ^ 0x80) << 0x6}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint32_t){(chr ^ 0x80)}; + (*codeps)[outn] = codep; continue; } - if(utf >= (uint8_t){0xE0}) { // Three bytes. - uint32_t codep = (uint32_t){(utf ^ 0xE0) << 0xC}; - n += (size_t){0x1}; - utf = str[n]; - codep += (uint32_t){(utf ^ 0x80) << 0x6}; - n += (size_t){0x1}; - utf = str[n]; - codep += (uint32_t){(utf ^ 0x80)}; - n += (size_t){0x1}; - codeps[outn] = codep; + if(chr >= (uint8_t){0xE0}) { // Three bytes. + uint32_t codep = (uint32_t){(chr ^ 0xE0) << 0xC}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint32_t){(chr ^ 0x80) << 0x6}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint32_t){(chr ^ 0x80)}; + n += (size_t){0x1}; + (*codeps)[outn] = codep; continue; } - if(utf >= (uint8_t){0xC0}) { // Two bytes. - uint32_t codep = (uint32_t){(utf ^ 0xC0) << 0x6}; - n += (size_t){0x1}; - utf = str[n]; - codep += (uint32_t){(utf ^ 0x80)}; - n += (size_t){0x1}; - codeps[outn] = codep; + if(chr >= (uint8_t){0xC0}) { // Two bytes. + uint32_t codep = (uint32_t){(chr ^ 0xC0) << 0x6}; + n += (size_t){0x1}; + chr = utf[n]; + codep += (uint32_t){(chr ^ 0x80)}; + n += (size_t){0x1}; + (*codeps)[outn] = codep; continue; } - if(utf > (uint8_t){0x7F}) { // One byte. - uint32_t codep = (uint32_t){utf}; - codeps[outn] = codep; + if(chr > (uint8_t){0x7F}) { // One byte. + uint32_t codep = (uint32_t){chr}; + (*codeps)[outn] = codep; continue; } // Out of range. - return NULL; + return 0x1; } - return codeps; + return 0x0; } diff --git a/src/luma/utf8enc.c b/src/luma/utf8enc.c index 296b56f..3315026 100644 --- a/src/luma/utf8enc.c +++ b/src/luma/utf8enc.c @@ -1,8 +1,23 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of luma. + + luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with luma. + + If not, see <https://www.gnu.org/licenses/>. +*/ # include <luma/utf8enc.h> # include <stdint.h> # include <stdio.h> # include <stdlib.h> -uint8_t const * luma_utf8enc(uint32_t * codeps,size_t * outszptr) { +int luma_utf8enc(uint32_t * codeps,uint8_t * * utf,size_t * outszptr) { size_t sz = (size_t){0x0}; // Size of input array (bytes). size_t outsz = (size_t){0x0}; // Size of output array /bytes). for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array, and determine size of output array. @@ -12,7 +27,7 @@ uint8_t const * luma_utf8enc(uint32_t * codeps,size_t * outszptr) { break; } if(codep >= (uint32_t){0x110000}) { // Codepoint out of range. - return NULL; + return 0x1; } if(codep >= (uint32_t){0x10000}) { // 4 bytes. outsz += (size_t){0x4}; @@ -33,36 +48,35 @@ uint8_t const * luma_utf8enc(uint32_t * codeps,size_t * outszptr) { if(outszptr != NULL) { *outszptr = outsz; } - uint8_t * str = malloc(outsz); // Allocate space for output array. - str[outsz - (size_t){0x1}] = (uint8_t){0x0}; // Create null-terminator on output array. + *utf = malloc(outsz); // Allocate space for output array. + (*utf)[outsz - (size_t){0x1}] = (uint8_t){0x0}; // Create null-terminator on output array. for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: encode each codepoint into UTF-8. - uint32_t codep = codeps[n]; // Current Unicode codepoint. - if(codep >= 0x10000) { // Four bytes. - str[outn] = (uint8_t){0xF0 + (codep >> 0x12)}; - outn += (size_t){0x1}; - str[outn] = (uint8_t){0x80 + ((codep >> 0xC) & 0x3F)}; - outn += (size_t){0x1}; - str[outn] = (uint8_t){0x80 + ((codep >> 0x6) & 0x3F)}; - outn += (size_t){0x1}; - str[outn] = (uint8_t){0x80 + (codep & 0x3F)}; + if(codeps[n] >= 0x10000) { // Four bytes. + (*utf)[outn] = (uint8_t){0xF0 + (codeps[n] >> 0x12)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint8_t){0x80 + ((codeps[n] >> 0xC) & 0x3F)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint8_t){0x80 + (codeps[n] & 0x3F)}; continue; } - if(codep >= 0x800) { // Three bytes. - str[outn] = (uint8_t){0xE0 + (codep >> 0xC)}; - outn += (size_t){0x1}; - str[outn] = (uint8_t){0x80 + ((codep >> 0x6) & 0x3F)}; - outn += (size_t){0x1}; - str[outn] = (uint8_t){0x80 + (codep & 0x3F)}; + if(codeps[n] >= 0x800) { // Three bytes. + (*utf)[outn] = (uint8_t){0xE0 + (codeps[n] >> 0xC)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint8_t){0x80 + (codeps[n] & 0x3F)}; continue; } - if(codep >= 0x80) { // Two bytes. - str[outn] = (uint8_t){0xC0 + (codep >> 0x6)}; - outn += (size_t){0x1}; - str[outn] = (uint8_t){0x80 + (codep & 0x3F)}; + if(codeps[n] >= 0x80) { // Two bytes. + (*utf)[outn] = (uint8_t){0xC0 + (codeps[n] >> 0x6)}; + outn += (size_t){0x1}; + (*utf)[outn] = (uint8_t){0x80 + (codeps[n] & 0x3F)}; continue; } // One byte. - str[outn] = codep; + (*utf)[outn] = codeps[n]; } - return (uint8_t const *){str}; + return 0x0; } @@ -1,3 +1,18 @@ +/* + Copyright 2021 Gabriel Jensen + + This file is part of luma. + + luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with luma. + + If not, see <https://www.gnu.org/licenses/>. +*/ # include <locale.h> # include <luma/arch.h> # include <luma/print.h> @@ -15,12 +30,21 @@ int main(void) { for(size_t i = (size_t){0x0};i < sizeof code / sizeof code[0x0];++i) { printf("Got code %d.\n",code[i]); } - uint8_t const * msg = luma_utf8enc((uint32_t[]){0x00A2,0x2C,0x939,0x2C,0x10348,0x2C,0x20AC,0x2C,0x218A,0x2C,0x1F44B,0x0},NULL); - printf("Array: %s\n",msg); - free((void *)msg); - uint32_t * codeps = luma_utf8dec(luma_utf8enc((uint32_t[]){0x1F44B,0x0},NULL),NULL); - printf("It is %u.\n",codeps[0x0]); + { + uint8_t * msg = NULL; + luma_utf8enc((uint32_t[]){0x00A2,0x2C,0x939,0x2C,0x10348,0x2C,0x20AC,0x2C,0x218A,0x2C,0x1F44B,0x0},&msg,NULL); + printf("Array: %s\n",msg); + free((void *){msg}); + } + { + uint32_t * codeps = NULL; + uint8_t * utf = NULL; + luma_utf8enc((uint32_t[]){0x1F44B,0x0},&utf,NULL); + luma_utf8dec(utf,&codeps,NULL); + free((void *)utf); + printf("It is %u.\n",codeps[0x0]); + free((void *)codeps); + } luma_print((uint32_t[]){0x48,0x65,0x6C,0x6C,0x6F,0x20,0xFFFD,0x65,0x72,0x65,0x21,0x0},(uint32_t){0xF0}); - free((void *)codeps); exit(EXIT_SUCCESS); } |