summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/luma/print.c44
-rw-r--r--src/luma/utf8dec.c105
-rw-r--r--src/luma/utf8enc.c82
-rw-r--r--src/main.c50
4 files changed, 0 insertions, 281 deletions
diff --git a/src/luma/print.c b/src/luma/print.c
deleted file mode 100644
index a085e8f..0000000
--- a/src/luma/print.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- Copyright 2021 Gabriel Jensen
-
- This file is part of luma.
-
- luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
-
- luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
- See the GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License along with luma.
-
- If not, see <https://www.gnu.org/licenses/>.
-*/
-# include <luma/utf8enc.h>
-# include <stdarg.h>
-# include <stdint.h>
-# include <stdio.h>
-# include <stdlib.h>
-void luma_print(uint32_t * msg,...) {
- va_list args;
- va_start(args,msg);
- for(size_t n = (size_t){0x0};;n += (size_t){0x1}) {
- if(msg[n] == (uint32_t){0x0}) {
- fwrite(&(uint8_t){0xA},0x1,0x1,stdout);
- break;
- }
- if(msg[n] == (uint32_t){0xFFFD}) {
- size_t chrsz = (size_t){0x0};
- uint8_t * chr = NULL;
- luma_utf8enc((uint32_t[]){va_arg(args,uint32_t),0x0},&chr,&chrsz);
- fwrite(chr,0x1,chrsz - (size_t){0x1},stdout);
- free((void *)chr);
- continue;
- }
- size_t chrsz = (size_t){0x0};
- uint8_t * chr = NULL;
- luma_utf8enc((uint32_t[]){msg[n],0x0,0x0},&chr,&chrsz);
- fwrite(chr,0x1,chrsz - (size_t){0x1},stdout);
- free((void *)chr);
- }
- va_end(args);
-}
diff --git a/src/luma/utf8dec.c b/src/luma/utf8dec.c
deleted file mode 100644
index e6c302e..0000000
--- a/src/luma/utf8dec.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- Copyright 2021 Gabriel Jensen
-
- This file is part of luma.
-
- luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
-
- luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
- See the GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License along with luma.
-
- If not, see <https://www.gnu.org/licenses/>.
-*/
-# include <luma/utf8dec.h>
-# include <stdint.h>
-# include <stdio.h>
-# include <stdlib.h>
-int luma_utf8dec(uint8_t * utf,uint32_t * * codeps,size_t * outszptr) {
- size_t sz = (size_t){0x0};
- size_t outsz = (size_t){0x0};
- for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array and determine size of output array.
- if(utf[n] == (uint8_t){0x0}) { // Null-terminator.
- sz = n;
- break;
- }
- if(utf[n] >= (uint8_t){0xF0}) { // Four byte.
- outsz += (size_t){0x4};
- n += (size_t){0x3};
- continue;
- }
- if(utf[n] >= (uint8_t){0xE0}) { // Three bytes.
- outsz += (size_t){0x3};
- n += (size_t){0x2};
- continue;
- }
- if(utf[n] >= (uint8_t){0xC0}) { // Two bytes.
- outsz += (size_t){0x2};
- n += (size_t){0x1};
- continue;
- }
- if(utf[n] >= (uint8_t){0x80}) { // One byte.
- outsz += (size_t){0x1};
- continue;
- }
- // Out of range.
- return 0x1;
- }
- outsz += (size_t){0x1}; // Reserve space for null-terminator.
- if(outszptr != NULL) {
- *outszptr = outsz;
- }
- *codeps = malloc(outsz);
- (*codeps)[outsz - (size_t){0x1}] = (uint32_t){0x0}; // Create null-terminator on output array.
- for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: decode UTF-8.
- uint8_t chr = utf[n];
- if(chr >= (uint8_t){0xF7}) { // Out of range.
- return 0x1;
- }
- if(chr >= (uint8_t){0xF0}) { // Four byte.
- uint32_t codep = (uint32_t){(chr ^ 0xF0) << 0x12};
- n += (size_t){0x1};
- chr = utf[n];
- codep += (uint32_t){(chr ^ 0x80) << 0xC};
- n += (size_t){0x1};
- chr = utf[n];
- codep += (uint32_t){(chr ^ 0x80) << 0x6};
- n += (size_t){0x1};
- chr = utf[n];
- codep += (uint32_t){(chr ^ 0x80)};
- (*codeps)[outn] = codep;
- continue;
- }
- if(chr >= (uint8_t){0xE0}) { // Three bytes.
- uint32_t codep = (uint32_t){(chr ^ 0xE0) << 0xC};
- n += (size_t){0x1};
- chr = utf[n];
- codep += (uint32_t){(chr ^ 0x80) << 0x6};
- n += (size_t){0x1};
- chr = utf[n];
- codep += (uint32_t){(chr ^ 0x80)};
- n += (size_t){0x1};
- (*codeps)[outn] = codep;
- continue;
- }
- if(chr >= (uint8_t){0xC0}) { // Two bytes.
- uint32_t codep = (uint32_t){(chr ^ 0xC0) << 0x6};
- n += (size_t){0x1};
- chr = utf[n];
- codep += (uint32_t){(chr ^ 0x80)};
- n += (size_t){0x1};
- (*codeps)[outn] = codep;
- continue;
- }
- if(chr > (uint8_t){0x7F}) { // One byte.
- uint32_t codep = (uint32_t){chr};
- (*codeps)[outn] = codep;
- continue;
- }
- // Out of range.
- return 0x1;
- }
- return 0x0;
-}
diff --git a/src/luma/utf8enc.c b/src/luma/utf8enc.c
deleted file mode 100644
index 3315026..0000000
--- a/src/luma/utf8enc.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- Copyright 2021 Gabriel Jensen
-
- This file is part of luma.
-
- luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
-
- luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
- See the GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License along with luma.
-
- If not, see <https://www.gnu.org/licenses/>.
-*/
-# include <luma/utf8enc.h>
-# include <stdint.h>
-# include <stdio.h>
-# include <stdlib.h>
-int luma_utf8enc(uint32_t * codeps,uint8_t * * utf,size_t * outszptr) {
- size_t sz = (size_t){0x0}; // Size of input array (bytes).
- size_t outsz = (size_t){0x0}; // Size of output array /bytes).
- for(size_t n = (size_t){0x0};;n += (size_t){0x1}) { // First pass: get size of input array, and determine size of output array.
- uint32_t codep = codeps[n]; // Current Unicode codepoint.
- if(codep == (uint32_t){0x0}) { // U+0000 is Null.
- sz = n;
- break;
- }
- if(codep >= (uint32_t){0x110000}) { // Codepoint out of range.
- return 0x1;
- }
- if(codep >= (uint32_t){0x10000}) { // 4 bytes.
- outsz += (size_t){0x4};
- continue;
- }
- if(codep >= (uint32_t){0x800}) { // 3 bytes.
- outsz += (size_t){0x3};
- continue;
- }
- if(codep >= (uint32_t){0x80}) { // 2 bytes.
- outsz += (size_t){0x2};
- continue;
- }
- // 1 byte.
- outsz += (size_t){0x1};
- }
- outsz += (size_t){0x1}; // Add space for null-terminator.
- if(outszptr != NULL) {
- *outszptr = outsz;
- }
- *utf = malloc(outsz); // Allocate space for output array.
- (*utf)[outsz - (size_t){0x1}] = (uint8_t){0x0}; // Create null-terminator on output array.
- for(size_t n = (size_t){0x0}, outn = (size_t){0x0};n < sz;n += (size_t){0x1},outn += (size_t){0x1}) { // Second pass: encode each codepoint into UTF-8.
- if(codeps[n] >= 0x10000) { // Four bytes.
- (*utf)[outn] = (uint8_t){0xF0 + (codeps[n] >> 0x12)};
- outn += (size_t){0x1};
- (*utf)[outn] = (uint8_t){0x80 + ((codeps[n] >> 0xC) & 0x3F)};
- outn += (size_t){0x1};
- (*utf)[outn] = (uint8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)};
- outn += (size_t){0x1};
- (*utf)[outn] = (uint8_t){0x80 + (codeps[n] & 0x3F)};
- continue;
- }
- if(codeps[n] >= 0x800) { // Three bytes.
- (*utf)[outn] = (uint8_t){0xE0 + (codeps[n] >> 0xC)};
- outn += (size_t){0x1};
- (*utf)[outn] = (uint8_t){0x80 + ((codeps[n] >> 0x6) & 0x3F)};
- outn += (size_t){0x1};
- (*utf)[outn] = (uint8_t){0x80 + (codeps[n] & 0x3F)};
- continue;
- }
- if(codeps[n] >= 0x80) { // Two bytes.
- (*utf)[outn] = (uint8_t){0xC0 + (codeps[n] >> 0x6)};
- outn += (size_t){0x1};
- (*utf)[outn] = (uint8_t){0x80 + (codeps[n] & 0x3F)};
- continue;
- }
- // One byte.
- (*utf)[outn] = codeps[n];
- }
- return 0x0;
-}
diff --git a/src/main.c b/src/main.c
deleted file mode 100644
index 969bdcd..0000000
--- a/src/main.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- Copyright 2021 Gabriel Jensen
-
- This file is part of luma.
-
- luma is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
-
- luma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
- See the GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License along with luma.
-
- If not, see <https://www.gnu.org/licenses/>.
-*/
-# include <locale.h>
-# include <luma/arch.h>
-# include <luma/print.h>
-# include <luma/utf8dec.h>
-# include <luma/utf8enc.h>
-# include <stdint.h>
-# include <stdio.h>
-# include <stdlib.h>
-int main(void) {
- setlocale(LC_ALL,"en_GB.UTF-8");
- enum luma_arch code[] = {
- luma_arch_lab,
- luma_arch_hello,
- };
- for(size_t i = (size_t){0x0};i < sizeof code / sizeof code[0x0];++i) {
- printf("Got code %d.\n",code[i]);
- }
- {
- uint8_t * msg = NULL;
- luma_utf8enc((uint32_t[]){0x00A2,0x2C,0x939,0x2C,0x10348,0x2C,0x20AC,0x2C,0x218A,0x2C,0x1F44B,0x0},&msg,NULL);
- printf("Array: %s\n",msg);
- free((void *){msg});
- }
- {
- uint32_t * codeps = NULL;
- uint8_t * utf = NULL;
- luma_utf8enc((uint32_t[]){0x1F44B,0x0},&utf,NULL);
- luma_utf8dec(utf,&codeps,NULL);
- free((void *)utf);
- printf("It is %u.\n",codeps[0x0]);
- free((void *)codeps);
- }
- luma_print((uint32_t[]){0x48,0x65,0x6C,0x6C,0x6F,0x20,0xFFFD,0x65,0x72,0x65,0x21,0x0},(uint32_t){0xF0});
- exit(EXIT_SUCCESS);
-}