summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md6
-rw-r--r--changelog.md8
-rw-r--r--include/u8c/ver.h2
-rw-r--r--src/u8c/end.c2
-rw-r--r--src/u8c/u8dec.c45
-rw-r--r--src/u8c/u8enc.c12
-rw-r--r--u8c.svg23
7 files changed, 64 insertions, 34 deletions
diff --git a/README.md b/README.md
index 4d21fb6..94da579 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
# u8c
-[*u8c*](https://mandelbrot.dk/delta/u8c) is a free and open-source C-based library for transforming Unicode codepoints, as well as encoding them into UTF-8, even on implementations that use a different 32 bit encoding.
+[*u8c*](https://mandelbrot.dk/delta/u8c) is a free, open-source, and portable C-based library for transforming Unicode codepoints, as well as encoding them into UTF-8, even on implementations that use a different 32 bit encoding (I actually don't know what else it could be).
## Installing
-*u8c* can be installed either via the `install` target in the provided Makefile, or using the PKGBUILD found [here](https://mandelbrot.dk/pkgbuild/delta/u8c).
+*u8c* can be installed, either via the `install` target in the provided Makefile, or using the PKGBUILD found [here](https://mandelbrot.dk/pkgbuild/delta/u8c).
-Using the PKGBUILD is as simple as `git clone https://mandelbrot.dk/pkgbuild/delta/u8c.git && cd u8c && makepkg --clean --install --syncdeps`.
+Using the PKGBUILD is as simple as `git clone https://mandelbrot.dk/pkgbuild/delta/u8c.git && cd u8c && makepkg --clean --install --syncdeps` (on Arch-based distributions).
## Copyright & License
diff --git a/changelog.md b/changelog.md
index f31e1d6..9607c40 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,3 +1,9 @@
+# 5
+
+* Add logo (`u8c.svg`).
+* Fix UTF-8 decoder.
+* Update README.
+
# 4
* Add link to PKGBUILD in README.
@@ -28,6 +34,8 @@
* Add test-program (run via `make runtest`).
* Add program to make human-readable UTF-32 strings machine-readable.
* Turn `u8c_ver` into a compile-time macro.
+* Enable more warnings when compiling.
+* Add assertions.
# 1
diff --git a/include/u8c/ver.h b/include/u8c/ver.h
index d8e6062..a626a0c 100644
--- a/include/u8c/ver.h
+++ b/include/u8c/ver.h
@@ -16,5 +16,5 @@
/* Version */
# if !defined(u8c_ver)
# include <stdint.h>
-# define u8c_ver (UINT64_C(0x4))
+# define u8c_ver (UINT64_C(0x5))
# endif
diff --git a/src/u8c/end.c b/src/u8c/end.c
index b4e79f2..60ed58d 100644
--- a/src/u8c/end.c
+++ b/src/u8c/end.c
@@ -24,7 +24,7 @@
# include <threads.h>
# endif
uint_least8_t u8c_end(void) {
- if(u8c_stat > UINT8_C(0x0)) {
+ if(u8c_stat) {
return UINT8_C(0x0);
}
# if defined(u8c_bethrdsafe)
diff --git a/src/u8c/u8dec.c b/src/u8c/u8dec.c
index d7ddb05..fa530ee 100644
--- a/src/u8c/u8dec.c
+++ b/src/u8c/u8dec.c
@@ -23,11 +23,10 @@
uint_least8_t u8c_u8dec(size_t * _outsz,uint_least32_t * * _out,uint_least8_t * _in) {
assert(_in != NULL);
size_t insz = SIZE_C(0x0);
- size_t outsz = SIZE_C(0x0);
- for(size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { /* First pass: get size of input array and determine size of output array. */
- outsz += SIZE_C(0x1);
+ size_t outsz = SIZE_C(0x1);
+ for(size_t n = SIZE_C(0x0);n <= SIZE_MAX;outsz += SIZE_C(0x1)) { /* First pass: get size of input array and determine size of output array. */
if(_in[n] == UINT8_C(0x0)) { /* Null-terminator: end of string has been reached. */
- insz = n;
+ insz = n + SIZE_C(0x1);
goto nottoobig;
}
if(_in[n] >= UINT8_C(0xF8)) { /* Too big. */
@@ -35,29 +34,29 @@ uint_least8_t u8c_u8dec(size_t * _outsz,uint_least32_t * * _out,uint_least8_t *
return UINT8_C(0x1);
}
if(_in[n] >= UINT8_C(0xF0)) { /* Four byte. */
- n += SIZE_C(0x4);
+ n += SIZE_C(0x4);
continue;
}
if(_in[n] >= UINT8_C(0xE0)) { /* Three bytes. */
- n += SIZE_C(0x3);
+ n += SIZE_C(0x3);
continue;
}
if(_in[n] >= UINT8_C(0xC0)) { /* Two bytes. */
- n += SIZE_C(0x2);
+ n += SIZE_C(0x2);
continue;
}
/* One byte. */
n += SIZE_C(0x1);
}
+ /* Input is not null-terminated. */
u8c_seterr((uint_least32_t[]){UINT32_C(0x75),UINT32_C(0x38),UINT32_C(0x63),UINT32_C(0x5F),UINT32_C(0x75),UINT32_C(0x38),UINT32_C(0x64),UINT32_C(0x65),UINT32_C(0x63),UINT32_C(0x3A),UINT32_C(0x20),UINT32_C(0x55),UINT32_C(0x6E),UINT32_C(0x74),UINT32_C(0x65),UINT32_C(0x72),UINT32_C(0x6D),UINT32_C(0x69),UINT32_C(0x6E),UINT32_C(0x61),UINT32_C(0x74),UINT32_C(0x65),UINT32_C(0x64),UINT32_C(0x20),UINT32_C(0x69),UINT32_C(0x6E),UINT32_C(0x70),UINT32_C(0x75),UINT32_C(0x74),UINT32_C(0x2E),UINT32_C(0x0),}); /* u8c_u8dec: Unterminated input. */
return UINT8_C(0x1);
nottoobig:;
if(_outsz != NULL) {
*_outsz = outsz;
}
- *_out = calloc(sizeof(uint_least8_t),outsz);
- (*_out)[outsz - SIZE_C(0x1)] = (uint_least32_t){0x0}; /* Create null-terminator on output array. */
- for(size_t n = SIZE_C(0x0), outn = SIZE_C(0x0);n < insz;n += SIZE_C(0x1),outn += SIZE_C(0x1)) { /* Second pass: decode UTF-8. */
+ *_out = calloc(sizeof(uint_least32_t),outsz);
+ for(size_t n = SIZE_C(0x0),outn = SIZE_C(0x0);n < insz;outn += SIZE_C(0x1)) { /* Second pass: decode UTF-8. */
if(_in[n] >= UINT8_C(0xF0)) { /* Four byte. */
uint_least32_t codep = (_in[n] ^ UINT32_C(0xF0)) << UINT32_C(0x12);
n += SIZE_C(0x1);
@@ -66,29 +65,31 @@ nottoobig:;
codep += (_in[n] ^ UINT32_C(0x80)) << UINT32_C(0x6);
n += SIZE_C(0x1);
codep += (uint_least32_t)(_in[n]) ^ SIZE_C(0x80);
+ n += SIZE_C(0x1);
(*_out)[outn] = codep;
continue;
}
if(_in[n] >= UINT8_C(0xE0)) { /* Three bytes. */
- uint_least32_t codep = (_in[n] ^ UINT32_C(0xE0)) << UINT32_C(0xC);
- n += SIZE_C(0x1);
- codep += (_in[n] ^ UINT32_C(0x80)) << UINT32_C(0x6);
- n += SIZE_C(0x1);
- codep += _in[n] ^ UINT32_C(0x80);
- n += SIZE_C(0x1);
- (*_out)[outn] = codep;
+ uint_least32_t codep = (_in[n] ^ UINT32_C(0xE0)) << UINT32_C(0xC);
+ n += SIZE_C(0x1);
+ codep += (_in[n] ^ UINT32_C(0x80)) << UINT32_C(0x6);
+ n += SIZE_C(0x1);
+ codep += _in[n] ^ UINT32_C(0x80);
+ n += SIZE_C(0x1);
+ (*_out)[outn] = codep;
continue;
}
if(_in[n] >= UINT8_C(0xC0)) { /* Two bytes. */
- uint_least32_t codep = (_in[n] ^ UINT32_C(0xC0)) << UINT32_C(0x6);
- n += SIZE_C(0x1);
- codep += _in[n] ^ UINT32_C(0x80);
- n += SIZE_C(0x1);
+ uint_least32_t codep = (_in[n] ^ UINT32_C(0xC0)) << UINT32_C(0x6);
+ n += SIZE_C(0x1);
+ codep += _in[n] ^ UINT32_C(0x80);
+ n += SIZE_C(0x1);
(*_out)[outn] = codep;
continue;
}
/* One byte. */
- (*_out)[outn] = (uint_least32_t)(_in[n]);
+ (*_out)[outn] = (uint_least32_t)(_in[n]);
+ n += SIZE_C(0x1);
continue;
}
return UINT8_C(0x0);
diff --git a/src/u8c/u8enc.c b/src/u8c/u8enc.c
index f37e9cf..5ebd479 100644
--- a/src/u8c/u8enc.c
+++ b/src/u8c/u8enc.c
@@ -24,10 +24,6 @@ uint_least8_t u8c_u8enc(size_t * _sz,uint_least8_t * * _out,uint_least32_t * _in
size_t insz = SIZE_C(0x0); /* Size of input array (bytes). */
size_t outsz = SIZE_C(0x0); /* Size of output array /bytes). */
for(size_t n = SIZE_C(0x0);n <= SIZE_MAX;n += SIZE_C(0x1)) { /* First pass: get size of input array, and determine size of output array. */
- if(_in[n] == UINT32_C(0x0)) { /* U+0000 is Null. */
- insz = n;
- goto nottoobig;
- }
if(_in[n] >= UINT32_C(0x110000)) { /* Codepoint out of range. */
u8c_seterr((uint_least32_t[]){UINT32_C(0x75),UINT32_C(0x38),UINT32_C(0x63),UINT32_C(0x5F),UINT32_C(0x75),UINT32_C(0x38),UINT32_C(0x65),UINT32_C(0x6E),UINT32_C(0x63),UINT32_C(0x3A),UINT32_C(0x20),UINT32_C(0x43),UINT32_C(0x6F),UINT32_C(0x64),UINT32_C(0x65),UINT32_C(0x70),UINT32_C(0x6F),UINT32_C(0x69),UINT32_C(0x6E),UINT32_C(0x74),UINT32_C(0x20),UINT32_C(0x6F),UINT32_C(0x75),UINT32_C(0x74),UINT32_C(0x20),UINT32_C(0x6F),UINT32_C(0x66),UINT32_C(0x20),UINT32_C(0x72),UINT32_C(0x61),UINT32_C(0x6E),UINT32_C(0x67),UINT32_C(0x65),UINT32_C(0x20),UINT32_C(0x28),UINT32_C(0x74),UINT32_C(0x6F),UINT32_C(0x6F),UINT32_C(0x20),UINT32_C(0x62),UINT32_C(0x69),UINT32_C(0x67),UINT32_C(0x29),UINT32_C(0x2E),UINT32_C(0x0),}); /* u8c_u8enc: Codepoint out of range (too big). */
return UINT8_C(0x1);
@@ -46,16 +42,18 @@ uint_least8_t u8c_u8enc(size_t * _sz,uint_least8_t * * _out,uint_least32_t * _in
}
/* 1 byte. */
outsz += SIZE_C(0x1);
+ if(_in[n] == UINT32_C(0x0)) {
+ insz = n + SIZE_C(0x1);
+ goto nottoobig;
+ }
}
u8c_seterr((uint_least32_t[]){UINT32_C(0x75),UINT32_C(0x38),UINT32_C(0x63),UINT32_C(0x5F),UINT32_C(0x75),UINT32_C(0x38),UINT32_C(0x65),UINT32_C(0x6E),UINT32_C(0x63),UINT32_C(0x3A),UINT32_C(0x20),UINT32_C(0x55),UINT32_C(0x6E),UINT32_C(0x74),UINT32_C(0x65),UINT32_C(0x72),UINT32_C(0x6D),UINT32_C(0x69),UINT32_C(0x6E),UINT32_C(0x61),UINT32_C(0x74),UINT32_C(0x65),UINT32_C(0x64),UINT32_C(0x20),UINT32_C(0x69),UINT32_C(0x6E),UINT32_C(0x70),UINT32_C(0x75),UINT32_C(0x74),UINT32_C(0x2E),UINT32_C(0x0),}); /* u8c_u8enc: Unterminated input. */
return UINT8_C(0x1);
nottoobig:;
- outsz += SIZE_C(0x1); /* Reserve space for null-terminator. */
if(_sz != NULL) {
*_sz = outsz;
}
- *_out = calloc(sizeof(uint_least8_t),outsz); /* Allocate space for output array. */
- (*_out)[outsz - SIZE_C(0x1)] = UINT8_C(0x0); /* Create null-terminator on output array. */
+ *_out = calloc(sizeof(uint_least8_t),outsz); /* Allocate space for output array. */
for(size_t n = SIZE_C(0x0), outn = SIZE_C(0x0);n < insz;n += SIZE_C(0x1),outn += SIZE_C(0x1)) { /* Second pass: encode each codepoint into UTF-8. */
if(_in[n] >= UINT32_C(0x10000)) { // Four bytes.
(*_out)[outn] = UINT8_C(0xF0) + (uint_least8_t)(_in[n] >> UINT32_C(0x12));
diff --git a/u8c.svg b/u8c.svg
new file mode 100644
index 0000000..b885474
--- /dev/null
+++ b/u8c.svg
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg height="168" version="1.1" width="216" xmlns="http://www.w3.org/2000/svg">
+ <rect fill="#F8F8F1" height="152" rx="8" ry="8" width="200" x="8" y="8" />
+ <rect fill="#444747" height="72" rx="4" ry="4" width="192" x="12" y="84" />
+ <rect fill="#444747" height="16" rx="4" ry="4" width="16" x="16" y="16" />
+ <rect fill="#444747" height="16" rx="4" ry="4" width="16" x="16" y="40" />
+ <rect fill="#444747" height="16" rx="4" ry="4" width="16" x="16" y="64" />
+ <rect fill="#444747" height="16" rx="4" ry="4" width="16" x="88" y="16" />
+ <rect fill="#444747" height="16" rx="4" ry="4" width="16" x="64" y="40" />
+ <rect fill="#444747" height="16" rx="4" ry="4" width="16" x="112" y="16" />
+ <rect fill="#444747" height="16" rx="4" ry="4" width="16" x="112" y="40" />
+ <rect fill="#E13D3D" height="16" rx="4" ry="4" width="16" x="16" y="88" />
+ <rect fill="#E13D3D" height="16" rx="4" ry="4" width="16" x="16" y="136" />
+ <rect fill="#E1A93D" height="16" rx="4" ry="4" width="16" x="40" y="136" />
+ <rect fill="#A9E13D" height="16" rx="4" ry="4" width="16" x="88" y="88" />
+ <rect fill="#E1E13D" height="16" rx="4" ry="4" width="16" x="64" y="136" />
+ <rect fill="#A9E13D" height="16" rx="4" ry="4" width="16" x="88" y="136" />
+ <rect fill="#3DE13D" height="16" rx="4" ry="4" width="16" x="112" y="88" />
+ <rect fill="#3DE1A9" height="16" rx="4" ry="4" width="16" x="136" y="88" />
+ <rect fill="#3DE13D" height="16" rx="4" ry="4" width="16" x="112" y="112" />
+ <rect fill="#3DE1E1" height="16" rx="4" ry="4" width="16" x="160" y="88" />
+ <rect fill="#3DA9E1" height="16" rx="4" ry="4" width="16" x="184" y="88" />
+</svg>