4 files changed, 295 insertions, 0 deletions
diff --git a/src/u8c/free.c b/src/u8c/free.c
new file mode 100644
index 0000000..f58f667
--- /dev/null
+++ b/src/u8c/free.c
@@ -0,0 +1,22 @@
+/*
+	Copyright 2021 Gabriel Jensen
+
+	This file is part of u8c.
+
+	u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or	(at your option) any later version.
+
+	u8c is distributed in the hope that it will be useful,	but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the	GNU Affero General Public License for more details.
+
+	You should have received a copy of the GNU Affero General Public License along with u8c.
+
+	If not, see <https://www.gnu.org/licenses/>.
+*/
+# include <u8c/free.h>
+# include <stdint.h>
+# include <stdlib.h>
+uint_least8_t u8c_free(void * ptr) {
+	free(ptr);
+	return 0x0;
+}
diff --git a/src/u8c/print.c b/src/u8c/print.c
new file mode 100644
index 0000000..741d25a
--- /dev/null
+++ b/src/u8c/print.c
@@ -0,0 +1,96 @@
+/*
+	Copyright 2021 Gabriel Jensen
+
+	This file is part of u8c.
+
+	u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or	(at your option) any later version.
+
+	u8c is distributed in the hope that it will be useful,	but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the	GNU Affero General Public License for more details.
+
+	You should have received a copy of the GNU Affero General Public License along with u8c.
+
+	If not, see <https://www.gnu.org/licenses/>.
+*/
+# include <u8c/fmttyp.h>
+# include <u8c/u8enc.h>
+# include <stdarg.h>
+# include <stdint.h>
+# include <stdio.h>
+# include <stdlib.h>
+uint_least8_t u8c_print(uint_least32_t * msg,...) {
+	va_list args;
+	va_start(args,msg);
+	for(size_t n = (size_t){0x0};;n += (size_t){0x1}) {
+		if(msg[n] == (uint_least32_t){0x0}) {
+			fwrite(&(uint_least8_t){0xA},(size_t){0x1},(size_t){0x1},stdout);
+			break;
+		}
+		if(msg[n] == (uint_least32_t){0xFFFD}) {
+			enum u8c_fmttyp fmttyp = va_arg(args,enum u8c_fmttyp);
+			switch(fmttyp) {
+			case u8c_fmttyp_chr:
+				{
+					uint_least32_t const chr = va_arg(args,uint_least32_t);
+					if(chr == (uint_least32_t){0x0}) {
+						continue;
+					}
+					size_t          strsz = (size_t){0x0};
+					uint_least8_t * str   = NULL;
+					u8c_u8enc((uint_least32_t[]){chr,0x0},&strsz,&str);
+					fwrite(str,(size_t){0x1},strsz - (size_t){0x1},stdout);
+					free(str);
+					break;
+				}
+			case u8c_fmttyp_int:
+				{
+					int_least64_t n = va_arg(args,int_least64_t);
+					if(n < 0x0) {
+						size_t               chrsz = (size_t){0x0};
+						uint_least8_t *      chr   = NULL;
+						u8c_u8enc((uint_least32_t[]){0x2212,0x0},&chrsz,&chr);
+						fwrite(chr,(size_t){0x1},chrsz - (size_t){0x1},stdout);
+						free(chr);
+					}
+					for(;n != 0x0;n /= (int_least64_t){0xB}) {
+						
+					}
+					break;
+				}
+			case u8c_fmttyp_str:
+				{
+					size_t          strsz = (size_t){0x0};
+					uint_least8_t * str   = NULL;
+					u8c_u8enc(va_arg(args,uint_least32_t *),&strsz,&str);
+					fwrite(str,(size_t){0x1},strsz - (size_t){0x1},stdout);
+					free((void *)str);
+					break;
+				}
+			case u8c_fmttyp_uint:
+				{
+					break;
+				}
+			}
+			continue;
+		}
+		size_t          chrsz = (size_t){0x0};
+		uint_least8_t * chr   = NULL;
+		u8c_u8enc((uint_least32_t[]){msg[n],0x0,0x0},&chrsz,&chr);
+		fwrite(chr,(size_t){0x1},chrsz - (size_t){0x1},stdout);
+		free((void *)chr);
+	}
+	va_end(args);
+	return 0x0;
+}
+/*
+	#######################
+	#                     #
+	# ##  ##  ### # # ### #
+	# # # # #  #  ###  #  #
+	# ##  ##   #  ###  #  #
+	# #   # #  #  ###  #  #
+	# #   # # ### # #  #  #
+	#                     #
+	#######################
+*/
diff --git a/src/u8c/u8dec.c b/src/u8c/u8dec.c
new file mode 100644
index 0000000..60f4c3d
--- /dev/null
+++ b/src/u8c/u8dec.c
@@ -0,0 +1,95 @@
+/*
+	Copyright 2021 Gabriel Jensen
+
+	This file is part of u8c.
+
+	u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or	(at your option) any later version.
+
+	u8c is distributed in the hope that it will be useful,	but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the	GNU Affero General Public License for more details.
+
+	You should have received a copy of the GNU Affero General Public License along with u8c.
+
+	If not, see <https://www.gnu.org/licenses/>.
+*/
+# include <u8c/dbgprint.h>
+# include <u8c/u8dec.h>
+# include <stdint.h>
+# include <stdio.h>
+# include <stdlib.h>
+uint_least8_t u8c_u8dec(uint_least8_t * utf,size_t * codepssz,uint_least32_t * * codeps) {
+	size_t insz  = (size_t){0x0};
+	size_t outsz = (size_t){0x0};
+	for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array and determine size of output array.
+		outsz += (size_t){0x1};
+		if(utf[n] == (uint_least8_t){0x0}) { // Null-terminator.
+			insz = n;
+			break;
+		}
+		if(utf[n] >= (uint_least8_t){0xF0}) { // Four byte.
+			n     += (size_t){0x4};
+			continue;
+		}
+		if(utf[n] >= (uint_least8_t){0xE0}) { // Three bytes.
+			n     += (size_t){0x3};
+			continue;
+		}
+		if(utf[n] >= (uint_least8_t){0xC0}) { // Two bytes.
+			n     += (size_t){0x2};
+			continue;
+		}
+	}
+	if(codepssz != NULL) {
+		*codepssz = outsz;
+	}
+	*codeps                          = malloc(outsz);
+	(*codeps)[outsz - (size_t){0x1}] = (uint_least32_t){0x0}; // Create null-terminator on output array.
+	for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < insz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: decode UTF-8.
+		uint_least8_t chr = utf[n];
+		if(chr >= (uint_least8_t){0xF7}) { // Out of range.
+			u8c_dbgprint(U"Out of range (in character decoding: byte too big).");
+			return 0x1;
+		}
+		if(chr >= (uint_least8_t){0xF0}) { // Four byte.
+			uint_least32_t codep  =  (uint_least32_t){(chr ^ 0xF0) << 0x12};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint_least32_t){(chr ^ 0x80) << 0xC};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint_least32_t){(chr ^ 0x80) << 0x6};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint_least32_t){(chr ^ 0x80)};
+			(*codeps)[outn] =  codep;
+			continue;
+		}
+		if(chr >= (uint_least8_t){0xE0}) { // Three bytes.
+			uint_least32_t codep  =  (uint_least32_t){(chr ^ 0xE0) << 0xC};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint_least32_t){(chr ^ 0x80) << 0x6};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint_least32_t){(chr ^ 0x80)};
+			n               += (size_t){0x1};
+			(*codeps)[outn] =  codep;
+			continue;
+		}
+		if(chr >= (uint_least8_t){0xC0}) { // Two bytes.
+			uint_least32_t codep  =  (uint_least32_t){(chr ^ 0xC0) << 0x6};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint_least32_t){(chr ^ 0x80)};
+			n               += (size_t){0x1};
+			(*codeps)[outn] =  codep;
+			continue;
+		}
+		// One byte.
+		uint_least32_t codep  = (uint_least32_t){chr};
+		(*codeps)[outn] = codep;
+		continue;
+	}
+	return 0x0;
+}
diff --git a/src/u8c/u8enc.c b/src/u8c/u8enc.c
new file mode 100644
index 0000000..3ea6cdc
--- /dev/null
+++ b/src/u8c/u8enc.c
@@ -0,0 +1,82 @@
+/*
+	Copyright 2021 Gabriel Jensen
+
+	This file is part of u8c.
+
+	u8c is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or	(at your option) any later version.
+
+	u8c is distributed in the hope that it will be useful,	but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the	GNU Affero General Public License for more details.
+
+	You should have received a copy of the GNU Affero General Public License along with u8c.
+
+	If not, see <https://www.gnu.org/licenses/>.
+*/
+# include <u8c/u8enc.h>
+# include <stdint.h>
+# include <stdio.h>
+# include <stdlib.h>
+uint_least8_t u8c_u8enc(uint_least32_t * codeps,size_t * utfsz,uint_least8_t * * utf) {
+	size_t sz    = (size_t){0x0}; // Size of input array (bytes).
+	size_t outsz = (size_t){0x0}; // Size of output array /bytes).
+	for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array, and determine size of output array.
+		uint_least32_t codep = codeps[n]; // Current Unicode codepoint.
+		if(codep == (uint_least32_t){0x0}) { // U+0000 is Null.
+			sz = n;
+			break;
+		}
+		if(codep >= (uint_least32_t){0x110000}) { // Codepoint out of range.
+			return 0x1;
+		}
+		if(codep >= (uint_least32_t){0x10000}) { // 4 bytes.
+			outsz += (size_t){0x4};
+			continue;
+		}
+		if(codep >= (uint_least32_t){0x800}) { // 3 bytes.
+			outsz += (size_t){0x3};
+			continue;
+		}
+		if(codep >= (uint_least32_t){0x80}) { // 2 bytes.
+			outsz += (size_t){0x2};
+			continue;
+		}
+		// 1 byte.
+		outsz += (size_t){0x1};
+	}
+	outsz += (size_t){0x1}; // Add space for null-terminator.
+	if(utfsz != NULL) {
+		*utfsz = outsz;
+	}
+	*utf                        = malloc(outsz); // Allocate space for output array.
+	(*utf)[outsz - (size_t){0x1}] = (uint_least8_t){0x0}; // Create null-terminator on output array.
+	for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: encode each codepoint into UTF-8.
+		if(codeps[n] >= 0x10000) { // Four bytes.
+			(*utf)[outn] = (uint_least8_t){0xF0 + (codeps[n] >> 0x12)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint_least8_t){0x80 + ((codeps[n] >> 0xC) & 0x3F)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint_least8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint_least8_t){0x80 + (codeps[n] & 0x3F)};
+			continue;
+		}
+		if(codeps[n] >= 0x800) { // Three bytes.
+			(*utf)[outn] =  (uint_least8_t){0xE0 + (codeps[n] >> 0xC)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint_least8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint_least8_t){0x80 + (codeps[n] & 0x3F)};
+			continue;
+		}
+		if(codeps[n] >= 0x80) { // Two bytes.
+			(*utf)[outn] =  (uint_least8_t){0xC0 + (codeps[n] >> 0x6)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint_least8_t){0x80 + (codeps[n] & 0x3F)};
+			continue;
+		}
+		// One byte.
+		(*utf)[outn] = codeps[n];
+	}
+	return 0x0;
+}