summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/u8c/free.c22
-rw-r--r--src/u8c/print.c96
-rw-r--r--src/u8c/u8dec.c95
-rw-r--r--src/u8c/u8enc.c82
4 files changed, 295 insertions, 0 deletions
diff --git a/src/u8c/free.c b/src/u8c/free.c
new file mode 100644
index 0000000..f58f667
--- /dev/null
+++ b/src/u8c/free.c
@@ -0,0 +1,22 @@
+/*
+ Copyright 2021 Gabriel Jensen
+
+ This file is part of u8c.
+
+ u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+
+ u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ See the GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License along with u8c.
+
+ If not, see <https://www.gnu.org/licenses/>.
+*/
+# include <u8c/free.h>
+# include <stdint.h>
+# include <stdlib.h>
+uint_least8_t u8c_free(void * ptr) {
+ free(ptr);
+ return 0x0;
+}
diff --git a/src/u8c/print.c b/src/u8c/print.c
new file mode 100644
index 0000000..741d25a
--- /dev/null
+++ b/src/u8c/print.c
@@ -0,0 +1,96 @@
+/*
+ Copyright 2021 Gabriel Jensen
+
+ This file is part of u8c.
+
+ u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+
+ u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ See the GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License along with u8c.
+
+ If not, see <https://www.gnu.org/licenses/>.
+*/
+# include <u8c/fmttyp.h>
+# include <u8c/u8enc.h>
+# include <stdarg.h>
+# include <stdint.h>
+# include <stdio.h>
+# include <stdlib.h>
+uint_least8_t u8c_print(uint_least32_t * msg,...) {
+ va_list args;
+ va_start(args,msg);
+ for(size_t n = (size_t){0x0};;n += (size_t){0x1}) {
+ if(msg[n] == (uint_least32_t){0x0}) {
+ fwrite(&(uint_least8_t){0xA},(size_t){0x1},(size_t){0x1},stdout);
+ break;
+ }
+ if(msg[n] == (uint_least32_t){0xFFFD}) {
+ enum u8c_fmttyp fmttyp = va_arg(args,enum u8c_fmttyp);
+ switch(fmttyp) {
+ case u8c_fmttyp_chr:
+ {
+ uint_least32_t const chr = va_arg(args,uint_least32_t);
+ if(chr == (uint_least32_t){0x0}) {
+ continue;
+ }
+ size_t strsz = (size_t){0x0};
+ uint_least8_t * str = NULL;
+ u8c_u8enc((uint_least32_t[]){chr,0x0},&strsz,&str);
+ fwrite(str,(size_t){0x1},strsz - (size_t){0x1},stdout);
+ free(str);
+ break;
+ }
+ case u8c_fmttyp_int:
+ {
+ int_least64_t n = va_arg(args,int_least64_t);
+ if(n < 0x0) {
+ size_t chrsz = (size_t){0x0};
+ uint_least8_t * chr = NULL;
+ u8c_u8enc((uint_least32_t[]){0x2212,0x0},&chrsz,&chr);
+ fwrite(chr,(size_t){0x1},chrsz - (size_t){0x1},stdout);
+ free(chr);
+ }
+ for(;n != 0x0;n /= (int_least64_t){0xB}) {
+
+ }
+ break;
+ }
+ case u8c_fmttyp_str:
+ {
+ size_t strsz = (size_t){0x0};
+ uint_least8_t * str = NULL;
+ u8c_u8enc(va_arg(args,uint_least32_t *),&strsz,&str);
+ fwrite(str,(size_t){0x1},strsz - (size_t){0x1},stdout);
+ free((void *)str);
+ break;
+ }
+ case u8c_fmttyp_uint:
+ {
+ break;
+ }
+ }
+ continue;
+ }
+ size_t chrsz = (size_t){0x0};
+ uint_least8_t * chr = NULL;
+ u8c_u8enc((uint_least32_t[]){msg[n],0x0,0x0},&chrsz,&chr);
+ fwrite(chr,(size_t){0x1},chrsz - (size_t){0x1},stdout);
+ free((void *)chr);
+ }
+ va_end(args);
+ return 0x0;
+}
+/*
+ #######################
+ # #
+ # ## ## ### # # ### #
+ # # # # # # ### # #
+ # ## ## # ### # #
+ # # # # # ### # #
+ # # # # ### # # # #
+ # #
+ #######################
+*/
diff --git a/src/u8c/u8dec.c b/src/u8c/u8dec.c
new file mode 100644
index 0000000..60f4c3d
--- /dev/null
+++ b/src/u8c/u8dec.c
@@ -0,0 +1,95 @@
+/*
+ Copyright 2021 Gabriel Jensen
+
+ This file is part of u8c.
+
+ u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+
+ u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ See the GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License along with u8c.
+
+ If not, see <https://www.gnu.org/licenses/>.
+*/
+# include <u8c/dbgprint.h>
+# include <u8c/u8dec.h>
+# include <stdint.h>
+# include <stdio.h>
+# include <stdlib.h>
+uint_least8_t u8c_u8dec(uint_least8_t * utf,size_t * codepssz,uint_least32_t * * codeps) {
+ size_t insz = (size_t){0x0};
+ size_t outsz = (size_t){0x0};
+ for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array and determine size of output array.
+ outsz += (size_t){0x1};
+ if(utf[n] == (uint_least8_t){0x0}) { // Null-terminator.
+ insz = n;
+ break;
+ }
+ if(utf[n] >= (uint_least8_t){0xF0}) { // Four byte.
+ n += (size_t){0x4};
+ continue;
+ }
+ if(utf[n] >= (uint_least8_t){0xE0}) { // Three bytes.
+ n += (size_t){0x3};
+ continue;
+ }
+ if(utf[n] >= (uint_least8_t){0xC0}) { // Two bytes.
+ n += (size_t){0x2};
+ continue;
+ }
+ }
+ if(codepssz != NULL) {
+ *codepssz = outsz;
+ }
+ *codeps = malloc(outsz);
+ (*codeps)[outsz - (size_t){0x1}] = (uint_least32_t){0x0}; // Create null-terminator on output array.
+ for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < insz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: decode UTF-8.
+ uint_least8_t chr = utf[n];
+ if(chr >= (uint_least8_t){0xF7}) { // Out of range.
+ u8c_dbgprint(U"Out of range (in character decoding: byte too big).");
+ return 0x1;
+ }
+ if(chr >= (uint_least8_t){0xF0}) { // Four byte.
+ uint_least32_t codep = (uint_least32_t){(chr ^ 0xF0) << 0x12};
+ n += (size_t){0x1};
+ chr = utf[n];
+ codep += (uint_least32_t){(chr ^ 0x80) << 0xC};
+ n += (size_t){0x1};
+ chr = utf[n];
+ codep += (uint_least32_t){(chr ^ 0x80) << 0x6};
+ n += (size_t){0x1};
+ chr = utf[n];
+ codep += (uint_least32_t){(chr ^ 0x80)};
+ (*codeps)[outn] = codep;
+ continue;
+ }
+ if(chr >= (uint_least8_t){0xE0}) { // Three bytes.
+ uint_least32_t codep = (uint_least32_t){(chr ^ 0xE0) << 0xC};
+ n += (size_t){0x1};
+ chr = utf[n];
+ codep += (uint_least32_t){(chr ^ 0x80) << 0x6};
+ n += (size_t){0x1};
+ chr = utf[n];
+ codep += (uint_least32_t){(chr ^ 0x80)};
+ n += (size_t){0x1};
+ (*codeps)[outn] = codep;
+ continue;
+ }
+ if(chr >= (uint_least8_t){0xC0}) { // Two bytes.
+ uint_least32_t codep = (uint_least32_t){(chr ^ 0xC0) << 0x6};
+ n += (size_t){0x1};
+ chr = utf[n];
+ codep += (uint_least32_t){(chr ^ 0x80)};
+ n += (size_t){0x1};
+ (*codeps)[outn] = codep;
+ continue;
+ }
+ // One byte.
+ uint_least32_t codep = (uint_least32_t){chr};
+ (*codeps)[outn] = codep;
+ continue;
+ }
+ return 0x0;
+}
diff --git a/src/u8c/u8enc.c b/src/u8c/u8enc.c
new file mode 100644
index 0000000..3ea6cdc
--- /dev/null
+++ b/src/u8c/u8enc.c
@@ -0,0 +1,82 @@
+/*
+ Copyright 2021 Gabriel Jensen
+
+ This file is part of u8c.
+
+ u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+
+ u8c is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ See the GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License along with u8c.
+
+ If not, see <https://www.gnu.org/licenses/>.
+*/
+# include <u8c/u8enc.h>
+# include <stdint.h>
+# include <stdio.h>
+# include <stdlib.h>
+uint_least8_t u8c_u8enc(uint_least32_t * codeps,size_t * utfsz,uint_least8_t * * utf) {
+ size_t sz = (size_t){0x0}; // Size of input array (bytes).
+ size_t outsz = (size_t){0x0}; // Size of output array /bytes).
+ for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array, and determine size of output array.
+ uint_least32_t codep = codeps[n]; // Current Unicode codepoint.
+ if(codep == (uint_least32_t){0x0}) { // U+0000 is Null.
+ sz = n;
+ break;
+ }
+ if(codep >= (uint_least32_t){0x110000}) { // Codepoint out of range.
+ return 0x1;
+ }
+ if(codep >= (uint_least32_t){0x10000}) { // 4 bytes.
+ outsz += (size_t){0x4};
+ continue;
+ }
+ if(codep >= (uint_least32_t){0x800}) { // 3 bytes.
+ outsz += (size_t){0x3};
+ continue;
+ }
+ if(codep >= (uint_least32_t){0x80}) { // 2 bytes.
+ outsz += (size_t){0x2};
+ continue;
+ }
+ // 1 byte.
+ outsz += (size_t){0x1};
+ }
+ outsz += (size_t){0x1}; // Add space for null-terminator.
+ if(utfsz != NULL) {
+ *utfsz = outsz;
+ }
+ *utf = malloc(outsz); // Allocate space for output array.
+ (*utf)[outsz - (size_t){0x1}] = (uint_least8_t){0x0}; // Create null-terminator on output array.
+ for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: encode each codepoint into UTF-8.
+ if(codeps[n] >= 0x10000) { // Four bytes.
+ (*utf)[outn] = (uint_least8_t){0xF0 + (codeps[n] >> 0x12)};
+ outn += (size_t){0x1};
+ (*utf)[outn] = (uint_least8_t){0x80 + ((codeps[n] >> 0xC) & 0x3F)};
+ outn += (size_t){0x1};
+ (*utf)[outn] = (uint_least8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)};
+ outn += (size_t){0x1};
+ (*utf)[outn] = (uint_least8_t){0x80 + (codeps[n] & 0x3F)};
+ continue;
+ }
+ if(codeps[n] >= 0x800) { // Three bytes.
+ (*utf)[outn] = (uint_least8_t){0xE0 + (codeps[n] >> 0xC)};
+ outn += (size_t){0x1};
+ (*utf)[outn] = (uint_least8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)};
+ outn += (size_t){0x1};
+ (*utf)[outn] = (uint_least8_t){0x80 + (codeps[n] & 0x3F)};
+ continue;
+ }
+ if(codeps[n] >= 0x80) { // Two bytes.
+ (*utf)[outn] = (uint_least8_t){0xC0 + (codeps[n] >> 0x6)};
+ outn += (size_t){0x1};
+ (*utf)[outn] = (uint_least8_t){0x80 + (codeps[n] & 0x3F)};
+ continue;
+ }
+ // One byte.
+ (*utf)[outn] = codeps[n];
+ }
+ return 0x0;
+}