|
9 | 9 | use crate::cmp::Ordering::{self, Equal, Greater, Less};
|
10 | 10 | use crate::fmt;
|
11 | 11 | use crate::hint;
|
12 |
| -use crate::intrinsics::{exact_div, unchecked_sub}; |
| 12 | +use crate::intrinsics::{exact_div, select_unpredictable, unchecked_sub}; |
13 | 13 | use crate::mem::{self, SizedTypeProperties};
|
14 | 14 | use crate::num::NonZero;
|
15 | 15 | use crate::ops::{Bound, OneSidedRange, Range, RangeBounds};
|
@@ -2787,41 +2787,54 @@ impl<T> [T] {
|
2787 | 2787 | where
|
2788 | 2788 | F: FnMut(&'a T) -> Ordering,
|
2789 | 2789 | {
|
2790 |
| - // INVARIANTS: |
2791 |
| - // - 0 <= left <= left + size = right <= self.len() |
2792 |
| - // - f returns Less for everything in self[..left] |
2793 |
| - // - f returns Greater for everything in self[right..] |
2794 | 2790 | let mut size = self.len();
|
2795 |
| - let mut left = 0; |
2796 |
| - let mut right = size; |
2797 |
| - while left < right { |
2798 |
| - let mid = left + size / 2; |
2799 |
| - |
2800 |
| - // SAFETY: the while condition means `size` is strictly positive, so |
2801 |
| - // `size/2 < size`. Thus `left + size/2 < left + size`, which |
2802 |
| - // coupled with the `left + size <= self.len()` invariant means |
2803 |
| - // we have `left + size/2 < self.len()`, and this is in-bounds. |
| 2791 | + if size == 0 { |
| 2792 | + return Err(0); |
| 2793 | + } |
| 2794 | + let mut base = 0usize; |
| 2795 | + |
| 2796 | + // This loop intentionally doesn't have an early exit if the comparison |
| 2797 | + // returns Equal. We want the number of loop iterations to depend *only* |
| 2798 | + // on the size of the input slice so that the CPU can reliably predict |
| 2799 | + // the loop count. |
| 2800 | + while size > 1 { |
| 2801 | + let half = size / 2; |
| 2802 | + let mid = base + half; |
| 2803 | + |
| 2804 | + // SAFETY: the call is made safe by the following inconstants: |
| 2805 | + // - `mid >= 0`: by definition |
| 2806 | + // - `mid < size`: `mid = size / 2 + size / 4 + size / 8 ...` |
2804 | 2807 | let cmp = f(unsafe { self.get_unchecked(mid) });
|
2805 | 2808 |
|
2806 |
| - // This control flow produces conditional moves, which results in |
2807 |
| - // fewer branches and instructions than if/else or matching on |
2808 |
| - // cmp::Ordering. |
2809 |
| - // This is x86 asm for u8: https://rust.godbolt.org/z/698eYffTx. |
2810 |
| - left = if cmp == Less { mid + 1 } else { left }; |
2811 |
| - right = if cmp == Greater { mid } else { right }; |
2812 |
| - if cmp == Equal { |
2813 |
| - // SAFETY: same as the `get_unchecked` above |
2814 |
| - unsafe { hint::assert_unchecked(mid < self.len()) }; |
2815 |
| - return Ok(mid); |
2816 |
| - } |
2817 |
| - |
2818 |
| - size = right - left; |
| 2809 | + // Binary search interacts poorly with branch prediction, so force |
| 2810 | + // the compiler to use conditional moves if supported by the target |
| 2811 | + // architecture. |
| 2812 | + base = select_unpredictable(cmp == Greater, base, mid); |
| 2813 | + |
| 2814 | + // This is imprecise in the case where `size` is odd and the |
| 2815 | + // comparison returns Greater: the mid element still gets included |
| 2816 | + // by `size` even though it's known to be larger than the element |
| 2817 | + // being searched for. |
| 2818 | + // |
| 2819 | + // This is fine though: we gain more performance by keeping the |
| 2820 | + // loop iteration count invariant (and thus predictable) than we |
| 2821 | + // lose from considering one additional element. |
| 2822 | + size -= half; |
2819 | 2823 | }
|
2820 | 2824 |
|
2821 |
| - // SAFETY: directly true from the overall invariant. |
2822 |
| - // Note that this is `<=`, unlike the assume in the `Ok` path. |
2823 |
| - unsafe { hint::assert_unchecked(left <= self.len()) }; |
2824 |
| - Err(left) |
| 2825 | + // SAFETY: base is always in [0, size) because base <= mid. |
| 2826 | + let cmp = f(unsafe { self.get_unchecked(base) }); |
| 2827 | + if cmp == Equal { |
| 2828 | + // SAFETY: same as the `get_unchecked` above. |
| 2829 | + unsafe { hint::assert_unchecked(base < self.len()) }; |
| 2830 | + Ok(base) |
| 2831 | + } else { |
| 2832 | + let result = base + (cmp == Less) as usize; |
| 2833 | + // SAFETY: same as the `get_unchecked` above. |
| 2834 | + // Note that this is `<=`, unlike the assume in the `Ok` path. |
| 2835 | + unsafe { hint::assert_unchecked(result <= self.len()) }; |
| 2836 | + Err(result) |
| 2837 | + } |
2825 | 2838 | }
|
2826 | 2839 |
|
2827 | 2840 | /// Binary searches this slice with a key extraction function.
|
|
0 commit comments