1
Fork 0

UTF-8 validation: Add missing if conditional for short input

We need to guard that `len` is large enough for the fast skip loop.
This commit is contained in:
Ulrik Sverdrup 2016-01-12 23:04:46 +01:00
parent 11e3de39d9
commit cadcd70775

View file

@ -1158,24 +1158,27 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
offset += 1; offset += 1;
} else { } else {
// Ascii case, try to skip forward quickly. // Ascii case, try to skip forward quickly.
// When the pointer is aligned, read 2 words of data per iteration
// until we find a word containing a non-ascii byte.
const BYTES_PER_ITERATION: usize = 2 * usize::BYTES;
let ptr = v.as_ptr(); let ptr = v.as_ptr();
let align = (ptr as usize + offset) & (usize::BYTES - 1); let align = (ptr as usize + offset) & (usize::BYTES - 1);
if align == 0 { if align == 0 {
// When the pointer is aligned, read 2 words of data per iteration if len >= BYTES_PER_ITERATION {
// until we find a word containing a non-ascii byte. while offset <= len - BYTES_PER_ITERATION {
while offset <= len - 2 * usize::BYTES { unsafe {
unsafe { let u = *(ptr.offset(offset as isize) as *const usize);
let u = *(ptr.offset(offset as isize) as *const usize); let v = *(ptr.offset((offset + usize::BYTES) as isize) as *const usize);
let v = *(ptr.offset((offset + usize::BYTES) as isize) as *const usize);
// break if there is a nonascii byte // break if there is a nonascii byte
let zu = contains_nonascii(u); let zu = contains_nonascii(u);
let zv = contains_nonascii(v); let zv = contains_nonascii(v);
if zu || zv { if zu || zv {
break; break;
}
} }
offset += BYTES_PER_ITERATION;
} }
offset += usize::BYTES * 2;
} }
// step from the point where the wordwise loop stopped // step from the point where the wordwise loop stopped
while offset < len && v[offset] < 128 { while offset < len && v[offset] < 128 {