Skip to content

Commit b5392f5

Browse files
committed
Auto merge of #47208 - Manishearth:double-ended-searcher, r=pnkfelix
Make double ended searchers use dependent fingers (fixes #47175) r? @BurntSushi @alexcrichton needs uplift to beta
2 parents 1b193de + 9066219 commit b5392f5

File tree

2 files changed

+48
-6
lines changed

2 files changed

+48
-6
lines changed

src/libcore/str/pattern.rs

+10-6
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
284284
#[inline]
285285
fn next(&mut self) -> SearchStep {
286286
let old_finger = self.finger;
287-
let slice = unsafe { self.haystack.get_unchecked(old_finger..self.haystack.len()) };
287+
let slice = unsafe { self.haystack.get_unchecked(old_finger..self.finger_back) };
288288
let mut iter = slice.chars();
289289
let old_len = iter.iter.len();
290290
if let Some(ch) = iter.next() {
@@ -304,7 +304,8 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
304304
fn next_match(&mut self) -> Option<(usize, usize)> {
305305
loop {
306306
// get the haystack after the last character found
307-
let bytes = if let Some(slice) = self.haystack.as_bytes().get(self.finger..) {
307+
let bytes = if let Some(slice) = self.haystack.as_bytes()
308+
.get(self.finger..self.finger_back) {
308309
slice
309310
} else {
310311
return None;
@@ -340,7 +341,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
340341
}
341342
} else {
342343
// found nothing, exit
343-
self.finger = self.haystack.len();
344+
self.finger = self.finger_back;
344345
return None;
345346
}
346347
}
@@ -353,7 +354,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
353354
#[inline]
354355
fn next_back(&mut self) -> SearchStep {
355356
let old_finger = self.finger_back;
356-
let slice = unsafe { self.haystack.slice_unchecked(0, old_finger) };
357+
let slice = unsafe { self.haystack.slice_unchecked(self.finger, old_finger) };
357358
let mut iter = slice.chars();
358359
let old_len = iter.iter.len();
359360
if let Some(ch) = iter.next_back() {
@@ -374,14 +375,17 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
374375
let haystack = self.haystack.as_bytes();
375376
loop {
376377
// get the haystack up to but not including the last character searched
377-
let bytes = if let Some(slice) = haystack.get(..self.finger_back) {
378+
let bytes = if let Some(slice) = haystack.get(self.finger..self.finger_back) {
378379
slice
379380
} else {
380381
return None;
381382
};
382383
// the last byte of the utf8 encoded needle
383384
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
384385
if let Some(index) = memchr::memrchr(last_byte, bytes) {
386+
// we searched a slice that was offset by self.finger,
387+
// add self.finger to recoup the original index
388+
let index = self.finger + index;
385389
// memrchr will return the index of the byte we wish to
386390
// find. In case of an ASCII character, this is indeed
387391
// were we wish our new finger to be ("after" the found
@@ -412,7 +416,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
412416
// found the last byte when searching in reverse.
413417
self.finger_back = index;
414418
} else {
415-
self.finger_back = 0;
419+
self.finger_back = self.finger;
416420
// found nothing, exit
417421
return None;
418422
}

src/libcore/tests/pattern.rs

+38
Original file line numberDiff line numberDiff line change
@@ -262,3 +262,41 @@ fn test_reverse_search_shared_bytes() {
262262
[InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
263263
);
264264
}
265+
266+
#[test]
267+
fn double_ended_regression_test() {
268+
// https://github.com/rust-lang/rust/issues/47175
269+
// Ensures that double ended searching comes to a convergence
270+
search_asserts!("abcdeabcdeabcde", 'a', "alternating double ended search",
271+
[next_match, next_match_back, next_match, next_match_back],
272+
[InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
273+
);
274+
search_asserts!("abcdeabcdeabcde", 'a', "triple double ended search for a",
275+
[next_match, next_match_back, next_match_back, next_match_back],
276+
[InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
277+
);
278+
search_asserts!("abcdeabcdeabcde", 'd', "triple double ended search for d",
279+
[next_match, next_match_back, next_match_back, next_match_back],
280+
[InRange(3, 4), InRange(13, 14), InRange(8, 9), Done]
281+
);
282+
search_asserts!(STRESS, 'Á', "Double ended search for two-byte Latin character",
283+
[next_match, next_match_back, next_match, next_match_back],
284+
[InRange(0, 2), InRange(32, 34), InRange(8, 10), Done]
285+
);
286+
search_asserts!(STRESS, '각', "Reverse double ended search for three-byte Hangul character",
287+
[next_match_back, next_back, next_match, next, next_match_back, next_match],
288+
[InRange(34, 37), Rejects(32, 34), InRange(19, 22), Rejects(22, 25), InRange(28, 31), Done]
289+
);
290+
search_asserts!(STRESS, 'ก', "Double ended search for three-byte Thai character",
291+
[next_match, next_back, next, next_match_back, next_match],
292+
[InRange(22, 25), Rejects(47, 48), Rejects(25, 28), InRange(40, 43), Done]
293+
);
294+
search_asserts!(STRESS, '😁', "Double ended search for four-byte emoji",
295+
[next_match_back, next, next_match, next_back, next_match],
296+
[InRange(43, 47), Rejects(0, 2), InRange(15, 19), Rejects(40, 43), Done]
297+
);
298+
search_asserts!(STRESS, 'ꁁ', "Double ended search for three-byte Yi character with repeated bytes",
299+
[next_match, next, next_match_back, next_back, next_match],
300+
[InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(34, 37), Done]
301+
);
302+
}

0 commit comments

Comments
 (0)