4 files changed, 152 insertions, 80 deletions
diff --git a/src/luma/print.c b/src/luma/print.c
index 8d8ac1d..a085e8f 100644
--- a/src/luma/print.c
+++ b/src/luma/print.c
@@ -1,7 +1,23 @@
+/*
+	Copyright 2021 Gabriel Jensen
+
+	This file is part of luma.
+
+	luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or	(at your option) any later version.
+
+	luma is distributed in the hope that it will be useful,	but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the	GNU Affero General Public License for more details.
+
+	You should have received a copy of the GNU Affero General Public License along with luma.
+
+	If not, see <https://www.gnu.org/licenses/>.
+*/
 # include <luma/utf8enc.h>
 # include <stdarg.h>
 # include <stdint.h>
 # include <stdio.h>
+# include <stdlib.h>
 void luma_print(uint32_t * msg,...) {
 	va_list args;
 	va_start(args,msg);
@@ -12,13 +28,17 @@ void luma_print(uint32_t * msg,...) {
 		}
 		if(msg[n] == (uint32_t){0xFFFD}) {
 			size_t          chrsz = (size_t){0x0};
-			uint8_t const * chr   = luma_utf8enc((uint32_t[]){va_arg(args,uint32_t),0x0},&chrsz);
+			uint8_t * chr   = NULL;
+			luma_utf8enc((uint32_t[]){va_arg(args,uint32_t),0x0},&chr,&chrsz);
 			fwrite(chr,0x1,chrsz - (size_t){0x1},stdout);
+			free((void *)chr);
 			continue;
 		}
 		size_t          chrsz = (size_t){0x0};
-		uint8_t const * chr   = luma_utf8enc((uint32_t[]){msg[n],0x0,0x0},&chrsz);
+		uint8_t * chr   = NULL;
+		luma_utf8enc((uint32_t[]){msg[n],0x0,0x0},&chr,&chrsz);
 		fwrite(chr,0x1,chrsz - (size_t){0x1},stdout);
+		free((void *)chr);
 	}
 	va_end(args);
 }
diff --git a/src/luma/utf8dec.c b/src/luma/utf8dec.c
index f6e29be..e6c302e 100644
--- a/src/luma/utf8dec.c
+++ b/src/luma/utf8dec.c
@@ -1,91 +1,105 @@
+/*
+	Copyright 2021 Gabriel Jensen
+
+	This file is part of luma.
+
+	luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or	(at your option) any later version.
+
+	luma is distributed in the hope that it will be useful,	but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the	GNU Affero General Public License for more details.
+
+	You should have received a copy of the GNU Affero General Public License along with luma.
+
+	If not, see <https://www.gnu.org/licenses/>.
+*/
 # include <luma/utf8dec.h>
 # include <stdint.h>
 # include <stdio.h>
 # include <stdlib.h>
-uint32_t * luma_utf8dec(uint8_t const * str,size_t * outszptr) {
+int luma_utf8dec(uint8_t * utf,uint32_t * * codeps,size_t * outszptr) {
 	size_t sz    = (size_t){0x0};
 	size_t outsz = (size_t){0x0};
 	for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array and determine size of output array.
-		uint8_t const utf = str[n];
-		if(utf == (uint8_t){0x0}) { // Null-terminator.
+		if(utf[n] == (uint8_t){0x0}) { // Null-terminator.
 			sz = n;
 			break;
 		}
-		if(utf >= (uint8_t){0xF0}) { // Four byte.
+		if(utf[n] >= (uint8_t){0xF0}) { // Four byte.
 			outsz += (size_t){0x4};
 			n     += (size_t){0x3};
 			continue;
 		}
-		if(utf >= (uint8_t){0xE0}) { // Three bytes.
+		if(utf[n] >= (uint8_t){0xE0}) { // Three bytes.
 			outsz += (size_t){0x3};
 			n     += (size_t){0x2};
 			continue;
 		}
-		if(utf >= (uint8_t){0xC0}) { // Two bytes.
+		if(utf[n] >= (uint8_t){0xC0}) { // Two bytes.
 			outsz += (size_t){0x2};
 			n     += (size_t){0x1};
 			continue;
 		}
-		if(utf >= (uint8_t){0x80}) { // One byte.
+		if(utf[n] >= (uint8_t){0x80}) { // One byte.
 			outsz += (size_t){0x1};
 			continue;
 		}
 		// Out of range.
-		return NULL;
+		return 0x1;
 	}
 	outsz += (size_t){0x1}; // Reserve space for null-terminator.
 	if(outszptr != NULL) {
 		*outszptr = outsz;
 	}
-	uint32_t * codeps             = malloc(outsz);
-	codeps[outsz - (size_t){0x1}] = (uint32_t){0x0}; // Create null-terminator on output array.
+	*codeps                          = malloc(outsz);
+	(*codeps)[outsz - (size_t){0x1}] = (uint32_t){0x0}; // Create null-terminator on output array.
 	for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: decode UTF-8.
-		uint8_t utf = str[n];
-		if(utf >= (uint8_t){0xF7}) { // Out of range.
-			return NULL;
+		uint8_t chr = utf[n];
+		if(chr >= (uint8_t){0xF7}) { // Out of range.
+			return 0x1;
 		}
-		if(utf >= (uint8_t){0xF0}) { // Four byte.
-			uint32_t codep =  (uint32_t){(utf ^ 0xF0) << 0x12};
-			n              += (size_t){0x1};
-			utf            =  str[n];
-			codep          += (uint32_t){(utf ^ 0x80) << 0xC};
-			n              += (size_t){0x1};
-			utf            =  str[n];
-			codep          += (uint32_t){(utf ^ 0x80) << 0x6};
-			n              += (size_t){0x1};
-			utf            =  str[n];
-			codep          += (uint32_t){(utf ^ 0x80)};
-			codeps[outn]   =  codep;
+		if(chr >= (uint8_t){0xF0}) { // Four byte.
+			uint32_t codep  =  (uint32_t){(chr ^ 0xF0) << 0x12};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint32_t){(chr ^ 0x80) << 0xC};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint32_t){(chr ^ 0x80) << 0x6};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint32_t){(chr ^ 0x80)};
+			(*codeps)[outn] =  codep;
 			continue;
 		}
-		if(utf >= (uint8_t){0xE0}) { // Three bytes.
-			uint32_t codep =  (uint32_t){(utf ^ 0xE0) << 0xC};
-			n              += (size_t){0x1};
-			utf            =  str[n];
-			codep          += (uint32_t){(utf ^ 0x80) << 0x6};
-			n              += (size_t){0x1};
-			utf            =  str[n];
-			codep          += (uint32_t){(utf ^ 0x80)};
-			n              += (size_t){0x1};
-			codeps[outn]   =  codep;
+		if(chr >= (uint8_t){0xE0}) { // Three bytes.
+			uint32_t codep  =  (uint32_t){(chr ^ 0xE0) << 0xC};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint32_t){(chr ^ 0x80) << 0x6};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint32_t){(chr ^ 0x80)};
+			n               += (size_t){0x1};
+			(*codeps)[outn] =  codep;
 			continue;
 		}
-		if(utf >= (uint8_t){0xC0}) { // Two bytes.
-			uint32_t codep =  (uint32_t){(utf ^ 0xC0) << 0x6};
-			n              += (size_t){0x1};
-			utf            =  str[n];
-			codep          += (uint32_t){(utf ^ 0x80)};
-			n              += (size_t){0x1};
-			codeps[outn]   =  codep;
+		if(chr >= (uint8_t){0xC0}) { // Two bytes.
+			uint32_t codep  =  (uint32_t){(chr ^ 0xC0) << 0x6};
+			n               += (size_t){0x1};
+			chr             =  utf[n];
+			codep           += (uint32_t){(chr ^ 0x80)};
+			n               += (size_t){0x1};
+			(*codeps)[outn] =  codep;
 			continue;
 		}
-		if(utf > (uint8_t){0x7F}) { // One byte.
-			uint32_t codep = (uint32_t){utf};
-			codeps[outn]   = codep;
+		if(chr > (uint8_t){0x7F}) { // One byte.
+			uint32_t codep  = (uint32_t){chr};
+			(*codeps)[outn] = codep;
 			continue;
 		}
 		// Out of range.
-		return NULL;
+		return 0x1;
 	}
-	return codeps;
+	return 0x0;
 }
diff --git a/src/luma/utf8enc.c b/src/luma/utf8enc.c
index 296b56f..3315026 100644
--- a/src/luma/utf8enc.c
+++ b/src/luma/utf8enc.c
@@ -1,8 +1,23 @@
+/*
+	Copyright 2021 Gabriel Jensen
+
+	This file is part of luma.
+
+	luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or	(at your option) any later version.
+
+	luma is distributed in the hope that it will be useful,	but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the	GNU Affero General Public License for more details.
+
+	You should have received a copy of the GNU Affero General Public License along with luma.
+
+	If not, see <https://www.gnu.org/licenses/>.
+*/
 # include <luma/utf8enc.h>
 # include <stdint.h>
 # include <stdio.h>
 # include <stdlib.h>
-uint8_t const * luma_utf8enc(uint32_t * codeps,size_t * outszptr) {
+int luma_utf8enc(uint32_t * codeps,uint8_t * * utf,size_t * outszptr) {
 	size_t sz    = (size_t){0x0}; // Size of input array (bytes).
 	size_t outsz = (size_t){0x0}; // Size of output array /bytes).
 	for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array, and determine size of output array.
@@ -12,7 +27,7 @@ uint8_t const * luma_utf8enc(uint32_t * codeps,size_t * outszptr) {
 			break;
 		}
 		if(codep >= (uint32_t){0x110000}) { // Codepoint out of range.
-			return NULL;
+			return 0x1;
 		}
 		if(codep >= (uint32_t){0x10000}) { // 4 bytes.
 			outsz += (size_t){0x4};
@@ -33,36 +48,35 @@ uint8_t const * luma_utf8enc(uint32_t * codeps,size_t * outszptr) {
 	if(outszptr != NULL) {
 		*outszptr = outsz;
 	}
-	uint8_t * str              = malloc(outsz); // Allocate space for output array.
-	str[outsz - (size_t){0x1}] = (uint8_t){0x0}; // Create null-terminator on output array.
+	*utf                        = malloc(outsz); // Allocate space for output array.
+	(*utf)[outsz - (size_t){0x1}] = (uint8_t){0x0}; // Create null-terminator on output array.
 	for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: encode each codepoint into UTF-8.
-		uint32_t codep = codeps[n]; // Current Unicode codepoint.
-		if(codep >= 0x10000) { // Four bytes.
-			str[outn] = (uint8_t){0xF0 + (codep >> 0x12)};
-			outn      += (size_t){0x1};
-			str[outn] =  (uint8_t){0x80 + ((codep >> 0xC) & 0x3F)};
-			outn      += (size_t){0x1};
-			str[outn] =  (uint8_t){0x80 + ((codep >> 0x6) & 0x3F)};
-			outn      += (size_t){0x1};
-			str[outn] =  (uint8_t){0x80 + (codep & 0x3F)};
+		if(codeps[n] >= 0x10000) { // Four bytes.
+			(*utf)[outn] = (uint8_t){0xF0 + (codeps[n] >> 0x12)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint8_t){0x80 + ((codeps[n] >> 0xC) & 0x3F)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint8_t){0x80 + (codeps[n] & 0x3F)};
 			continue;
 		}
-		if(codep >= 0x800) { // Three bytes.
-			str[outn] =  (uint8_t){0xE0 + (codep >> 0xC)};
-			outn      += (size_t){0x1};
-			str[outn] =  (uint8_t){0x80 + ((codep >> 0x6) & 0x3F)};
-			outn      += (size_t){0x1};
-			str[outn] =  (uint8_t){0x80 + (codep & 0x3F)};
+		if(codeps[n] >= 0x800) { // Three bytes.
+			(*utf)[outn] =  (uint8_t){0xE0 + (codeps[n] >> 0xC)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint8_t){0x80 + (codeps[n] & 0x3F)};
 			continue;
 		}
-		if(codep >= 0x80) { // Two bytes.
-			str[outn] =  (uint8_t){0xC0 + (codep >> 0x6)};
-			outn      += (size_t){0x1};
-			str[outn] =  (uint8_t){0x80 + (codep & 0x3F)};
+		if(codeps[n] >= 0x80) { // Two bytes.
+			(*utf)[outn] =  (uint8_t){0xC0 + (codeps[n] >> 0x6)};
+			outn         += (size_t){0x1};
+			(*utf)[outn] =  (uint8_t){0x80 + (codeps[n] & 0x3F)};
 			continue;
 		}
 		// One byte.
-		str[outn] =  codep;
+		(*utf)[outn] = codeps[n];
 	}
-	return (uint8_t const *){str};
+	return 0x0;
 }
diff --git a/src/main.c b/src/main.c
index e5c7ef6..969bdcd 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,3 +1,18 @@
+/*
+	Copyright 2021 Gabriel Jensen
+
+	This file is part of luma.
+
+	luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or	(at your option) any later version.
+
+	luma is distributed in the hope that it will be useful,	but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the	GNU Affero General Public License for more details.
+
+	You should have received a copy of the GNU Affero General Public License along with luma.
+
+	If not, see <https://www.gnu.org/licenses/>.
+*/
 # include <locale.h>
 # include <luma/arch.h>
 # include <luma/print.h>
@@ -15,12 +30,21 @@ int main(void) {
 	for(size_t i = (size_t){0x0};i < sizeof code / sizeof code[0x0];++i) {
 		printf("Got code %d.\n",code[i]);
 	}
-	uint8_t const * msg = luma_utf8enc((uint32_t[]){0x00A2,0x2C,0x939,0x2C,0x10348,0x2C,0x20AC,0x2C,0x218A,0x2C,0x1F44B,0x0},NULL);
-	printf("Array: %s\n",msg);
-	free((void *)msg);
-	uint32_t * codeps = luma_utf8dec(luma_utf8enc((uint32_t[]){0x1F44B,0x0},NULL),NULL);
-	printf("It is %u.\n",codeps[0x0]);
+	{
+		uint8_t * msg = NULL;
+		luma_utf8enc((uint32_t[]){0x00A2,0x2C,0x939,0x2C,0x10348,0x2C,0x20AC,0x2C,0x218A,0x2C,0x1F44B,0x0},&msg,NULL);
+		printf("Array: %s\n",msg);
+		free((void *){msg});
+	}
+	{
+		uint32_t * codeps = NULL;
+		uint8_t *  utf    = NULL;
+		luma_utf8enc((uint32_t[]){0x1F44B,0x0},&utf,NULL);
+		luma_utf8dec(utf,&codeps,NULL);
+		free((void *)utf);
+		printf("It is %u.\n",codeps[0x0]);
+		free((void *)codeps);
+	}
 	luma_print((uint32_t[]){0x48,0x65,0x6C,0x6C,0x6F,0x20,0xFFFD,0x65,0x72,0x65,0x21,0x0},(uint32_t){0xF0});
-	free((void *)codeps);
 	exit(EXIT_SUCCESS);
 }