diff options
Diffstat (limited to 'zap/src/mem/utf8dec.c')
-rw-r--r-- | zap/src/mem/utf8dec.c | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/zap/src/mem/utf8dec.c b/zap/src/mem/utf8dec.c new file mode 100644 index 0000000..32ebd00 --- /dev/null +++ b/zap/src/mem/utf8dec.c @@ -0,0 +1,51 @@ +/* + Copyright 2022 Gabriel Jensen. + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. + If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. +*/ + +#include <zap/priv.h> + +#include <zap/mem.h> + +void zap_utf8dec(zap_chr8 const * const _in,zap_chr20 * const _out) { + zap_chr8 const * in = _in; + zap_chr20 * out = _out; + for (;;++out) { + zap_chr8 const oct = *in; + if (oct >= 0xF0u) { /* Four octets. */ + zap_chr20 chr = ((zap_chr20)oct ^ 0xF0u) << 0x12u; + ++in; + chr += ((zap_chr20)*in ^ 0x80u) << 0xCu; + ++in; + chr += ((zap_chr20)*in ^ 0x80u) << 0x6u; + ++in; + chr += (zap_chr20)*in ^ 0x80u; + ++in; + *out = chr; + continue; + } + if (oct >= 0xE0u) { /* Three octets. */ + zap_chr20 chr = ((zap_chr20)oct ^ 0xE0u) << 0xCu; + ++in; + chr += ((zap_chr20)*in ^ 0x80u) << 0x6u; + ++in; + chr += (zap_chr20)*in ^ 0x80u; + ++in; + *out = chr; + continue; + } + if (oct >= 0xC0u) { /* Two octets. */ + zap_chr20 chr = ((zap_chr20)oct ^ 0xC0u) << 0x6u; + ++in; + chr += (zap_chr20)*in ^ 0x80u; + ++in; + *out = chr; + continue; + } + /* One octet. */ + *out = oct; + ++in; + sus_unlikely (oct == 0x0u) {break;} + } +} |