summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--PKGBUILD2
-rw-r--r--changelog.md5
-rw-r--r--src/luma/utf8enc.c18
-rw-r--r--src/main.c4
4 files changed, 21 insertions, 8 deletions
diff --git a/PKGBUILD b/PKGBUILD
index 1b15366..6092983 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -1,6 +1,6 @@
# Maintainer: Gabriel Jensen
pkgname=luma
-pkgver=19.0.0
+pkgver=20.0.0
pkgrel=1
pkgdesc="luma programming language - runtime environment"
arch=("any")
diff --git a/changelog.md b/changelog.md
index cb530ae..c5c9875 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,3 +1,8 @@
+# 18
+
+* Complete UTF-8 encoder.
+* Fix #1.
+
# 17
* Reformat changelog to Markdown.
diff --git a/src/luma/utf8enc.c b/src/luma/utf8enc.c
index cd1edff..5012f5c 100644
--- a/src/luma/utf8enc.c
+++ b/src/luma/utf8enc.c
@@ -15,7 +15,7 @@ uint8_t const * luma_utf8enc(uint32_t * codeps) {
return NULL;
}
if(codep > 0xFFFF) { // 4 bytes.
- outsz += (size_t){0x2};
+ outsz += (size_t){0x4};
continue;
}
if(codep > 0x7FF) { // 3 bytes.
@@ -38,19 +38,29 @@ uint8_t const * luma_utf8enc(uint32_t * codeps) {
for(size_t n = (size_t){0x0};n < sz;n += (size_t){0x1}) {
uint32_t codep = codeps[n]; // Current Unicode codepoint.
if(codep > 0xFFFF) {
- outstr[outn] = (uint8_t){0x3F};
+ outstr[outn] = (uint8_t){0xF0 + (codep >> 0x12)};
+ outn += (size_t){0x1};
+ outstr[outn] = (uint8_t){0x80 + ((codep >> 0xC) & 0x3F)};
+ outn += (size_t){0x1};
+ outstr[outn] = (uint8_t){0x80 + ((codep >> 0x6) & 0x3F)};
+ outn += (size_t){0x1};
+ outstr[outn] = (uint8_t){0x80 + ((codep >> 0x0) & 0x3F)};
outn += (size_t){0x1};
continue;
}
if(codep > 0x7FF) {
- outstr[outn] = (uint8_t){0x3F};
+ outstr[outn] = (uint8_t){0xE0 + (codep >> 0xC)};
+ outn += (size_t){0x1};
+ outstr[outn] = (uint8_t){0x80 + ((codep >> 0x6) & 0x3F)};
+ outn += (size_t){0x1};
+ outstr[outn] = (uint8_t){0x80 + ((codep >> 0x0) & 0x3F)};
outn += (size_t){0x1};
continue;
}
if(codep > 0x7F) {
outstr[outn] = (uint8_t){0xC0 + (codep >> 0x6)};
outn += (size_t){0x1};
- outstr[outn] = (uint8_t){0x80 + ((uint8_t){codep << 0x2} >> 0x2)};
+ outstr[outn] = (uint8_t){0x80 + ((codep >> 0x0) & 0x3F)};
outn += (size_t){0x1};
continue;
}
diff --git a/src/main.c b/src/main.c
index 8f55eec..971a861 100644
--- a/src/main.c
+++ b/src/main.c
@@ -14,9 +14,7 @@ int main(void) {
for(size_t i = (size_t){0x0};i < sizeof code / sizeof code[0x0];++i) {
printf("Got code %d.\n",code[i]);
}
- uint8_t const * msg = luma_utf8enc((uint32_t[]){0x00A2,0x2C,0x20Ac,0x2C,0x218A,0x2C,0x1F44B,0x0});
- printf("%u\n",msg[0x0]);
- printf("%u\n",msg[0x1]);
+ uint8_t const * msg = luma_utf8enc((uint32_t[]){0x00A2,0x2C,0x939,0x2C,0x10348,0x2C,0x20Ac,0x2C,0x218A,0x2C,0x1F44B,0x0});
printf("%s\n",msg);
//uint32_t * utf = luma_utf8dec(msg);
free((void *)msg);