Add stresstests for shared bytes for pattern API
This commit is contained in:
parent
bc55355576
commit
9b92a4419d
1 changed files with 153 additions and 1 deletions
|
@ -21,7 +21,7 @@ enum Step {
|
|||
Done
|
||||
}
|
||||
|
||||
use Step::*;
|
||||
use self::Step::*;
|
||||
|
||||
impl From<SearchStep> for Step {
|
||||
fn from(x: SearchStep) -> Self {
|
||||
|
@ -42,6 +42,12 @@ impl From<Option<(usize, usize)>> for Step {
|
|||
}
|
||||
}
|
||||
|
||||
// XXXManishearth these tests focus on single-character searching (CharSearcher)
|
||||
// and on next()/next_match(), not next_reject(). This is because
|
||||
// the memchr changes make next_match() for single chars complex, but next_reject()
|
||||
// continues to use next() under the hood. We should add more test cases for all
|
||||
// of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
|
||||
|
||||
#[test]
|
||||
fn test_simple_iteration() {
|
||||
search_asserts! ("abcdeabcd", 'a', "forward iteration for ASCII string",
|
||||
|
@ -98,3 +104,149 @@ fn test_simple_search() {
|
|||
);
|
||||
}
|
||||
|
||||
// Á, 각, ก, 😀 all end in 0x81
|
||||
// 🁀, ᘀ do not end in 0x81 but contain the byte
|
||||
// ꁁ has 0x81 as its second and third bytes.
|
||||
//
|
||||
// The memchr-using implementation of next_match
|
||||
// and next_match_back temporarily violate
|
||||
// the property that the search is always on a unicode boundary,
|
||||
// which is fine as long as this never reaches next() or next_back().
|
||||
// So we test if next() is correct after each next_match() as well.
|
||||
const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a";
|
||||
|
||||
#[test]
|
||||
fn test_stress_indices() {
|
||||
// this isn't really a test, more of documentation on the indices of each character in the stresstest string
|
||||
|
||||
search_asserts!(STRESS, 'x', "Indices of characters in stress test",
|
||||
[next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next],
|
||||
[Rejects(0, 2), // Á
|
||||
Rejects(2, 3), // a
|
||||
Rejects(3, 7), // 🁀
|
||||
Rejects(7, 8), // b
|
||||
Rejects(8, 10), // Á
|
||||
Rejects(10, 13), // ꁁ
|
||||
Rejects(13, 14), // f
|
||||
Rejects(14, 15), // g
|
||||
Rejects(15, 19), // 😀
|
||||
Rejects(19, 22), // 각
|
||||
Rejects(22, 25), // ก
|
||||
Rejects(25, 28), // ᘀ
|
||||
Rejects(28, 31), // 각
|
||||
Rejects(31, 32), // a
|
||||
Rejects(32, 34), // Á
|
||||
Rejects(34, 37), // 각
|
||||
Rejects(37, 40), // ꁁ
|
||||
Rejects(40, 43), // ก
|
||||
Rejects(43, 47), // 😀
|
||||
Rejects(47, 48), // a
|
||||
Done]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_forward_search_shared_bytes() {
|
||||
search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character",
|
||||
[next_match, next_match, next_match, next_match],
|
||||
[InRange(0, 2), InRange(8, 10), InRange(32, 34), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character; check if next() still works",
|
||||
[next_match, next, next_match, next, next_match, next, next_match],
|
||||
[InRange(0, 2), Rejects(2, 3), InRange(8, 10), Rejects(10, 13), InRange(32, 34), Rejects(34, 37), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character",
|
||||
[next_match, next, next_match, next_match, next_match],
|
||||
[InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character; check if next() still works",
|
||||
[next_match, next, next_match, next, next_match, next, next_match],
|
||||
[InRange(19, 22), Rejects(22, 25), InRange(28, 31), Rejects(31, 32), InRange(34, 37), Rejects(37, 40), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character",
|
||||
[next_match, next, next_match, next, next_match],
|
||||
[InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character; check if next() still works",
|
||||
[next_match, next, next_match, next, next_match],
|
||||
[InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, '😁', "Forward search for four-byte emoji",
|
||||
[next_match, next, next_match, next, next_match],
|
||||
[InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, '😁', "Forward search for four-byte emoji; check if next() still works",
|
||||
[next_match, next, next_match, next, next_match],
|
||||
[InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes",
|
||||
[next_match, next, next_match, next, next_match],
|
||||
[InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes; check if next() still works",
|
||||
[next_match, next, next_match, next, next_match],
|
||||
[InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reverse_search_shared_bytes() {
|
||||
search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character",
|
||||
[next_match_back, next_match_back, next_match_back, next_match_back],
|
||||
[InRange(32, 34), InRange(8, 10), InRange(0, 2), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character; check if next_back() still works",
|
||||
[next_match_back, next_back, next_match_back, next_back, next_match_back, next_back],
|
||||
[InRange(32, 34), Rejects(31, 32), InRange(8, 10), Rejects(7, 8), InRange(0, 2), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character",
|
||||
[next_match_back, next_back, next_match_back, next_match_back, next_match_back],
|
||||
[InRange(34, 37), Rejects(32, 34), InRange(28, 31), InRange(19, 22), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character; check if next_back() still works",
|
||||
[next_match_back, next_back, next_match_back, next_back, next_match_back, next_back, next_match_back],
|
||||
[InRange(34, 37), Rejects(32, 34), InRange(28, 31), Rejects(25, 28), InRange(19, 22), Rejects(15, 19), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character",
|
||||
[next_match_back, next_back, next_match_back, next_back, next_match_back],
|
||||
[InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character; check if next_back() still works",
|
||||
[next_match_back, next_back, next_match_back, next_back, next_match_back],
|
||||
[InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji",
|
||||
[next_match_back, next_back, next_match_back, next_back, next_match_back],
|
||||
[InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji; check if next_back() still works",
|
||||
[next_match_back, next_back, next_match_back, next_back, next_match_back],
|
||||
[InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes",
|
||||
[next_match_back, next_back, next_match_back, next_back, next_match_back],
|
||||
[InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
|
||||
);
|
||||
|
||||
search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works",
|
||||
[next_match_back, next_back, next_match_back, next_back, next_match_back],
|
||||
[InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
|
||||
);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue