diff options
-rw-r--r-- | PKGBUILD | 2 | ||||
-rw-r--r-- | changelog.md | 5 | ||||
-rw-r--r-- | src/luma/utf8enc.c | 18 | ||||
-rw-r--r-- | src/main.c | 4 |
4 files changed, 21 insertions, 8 deletions
@@ -1,6 +1,6 @@ # Maintainer: Gabriel Jensen pkgname=luma -pkgver=19.0.0 +pkgver=20.0.0 pkgrel=1 pkgdesc="luma programming language - runtime environment" arch=("any") diff --git a/changelog.md b/changelog.md index cb530ae..c5c9875 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,8 @@ +# 18 + +* Complete UTF-8 encoder. +* Fix #1. + # 17 * Reformat changelog to Markdown. diff --git a/src/luma/utf8enc.c b/src/luma/utf8enc.c index cd1edff..5012f5c 100644 --- a/src/luma/utf8enc.c +++ b/src/luma/utf8enc.c @@ -15,7 +15,7 @@ uint8_t const * luma_utf8enc(uint32_t * codeps) { return NULL; } if(codep > 0xFFFF) { // 4 bytes. - outsz += (size_t){0x2}; + outsz += (size_t){0x4}; continue; } if(codep > 0x7FF) { // 3 bytes. @@ -38,19 +38,29 @@ uint8_t const * luma_utf8enc(uint32_t * codeps) { for(size_t n = (size_t){0x0};n < sz;n += (size_t){0x1}) { uint32_t codep = codeps[n]; // Current Unicode codepoint. if(codep > 0xFFFF) { - outstr[outn] = (uint8_t){0x3F}; + outstr[outn] = (uint8_t){0xF0 + (codep >> 0x12)}; + outn += (size_t){0x1}; + outstr[outn] = (uint8_t){0x80 + ((codep >> 0xC) & 0x3F)}; + outn += (size_t){0x1}; + outstr[outn] = (uint8_t){0x80 + ((codep >> 0x6) & 0x3F)}; + outn += (size_t){0x1}; + outstr[outn] = (uint8_t){0x80 + ((codep >> 0x0) & 0x3F)}; outn += (size_t){0x1}; continue; } if(codep > 0x7FF) { - outstr[outn] = (uint8_t){0x3F}; + outstr[outn] = (uint8_t){0xE0 + (codep >> 0xC)}; + outn += (size_t){0x1}; + outstr[outn] = (uint8_t){0x80 + ((codep >> 0x6) & 0x3F)}; + outn += (size_t){0x1}; + outstr[outn] = (uint8_t){0x80 + ((codep >> 0x0) & 0x3F)}; outn += (size_t){0x1}; continue; } if(codep > 0x7F) { outstr[outn] = (uint8_t){0xC0 + (codep >> 0x6)}; outn += (size_t){0x1}; - outstr[outn] = (uint8_t){0x80 + ((uint8_t){codep << 0x2} >> 0x2)}; + outstr[outn] = (uint8_t){0x80 + ((codep >> 0x0) & 0x3F)}; outn += (size_t){0x1}; continue; } @@ -14,9 +14,7 @@ int main(void) { for(size_t i = (size_t){0x0};i < sizeof code / sizeof code[0x0];++i) { printf("Got code %d.\n",code[i]); } - uint8_t const * msg = luma_utf8enc((uint32_t[]){0x00A2,0x2C,0x20Ac,0x2C,0x218A,0x2C,0x1F44B,0x0}); - printf("%u\n",msg[0x0]); - printf("%u\n",msg[0x1]); + uint8_t const * msg = luma_utf8enc((uint32_t[]){0x00A2,0x2C,0x939,0x2C,0x10348,0x2C,0x20Ac,0x2C,0x218A,0x2C,0x1F44B,0x0}); printf("%s\n",msg); //uint32_t * utf = luma_utf8dec(msg); free((void *)msg); |