diff --git a/library/core/src/cmp.rs b/library/core/src/cmp.rs index 0dc2cc72e06cc..c315131f4136c 100644 --- a/library/core/src/cmp.rs +++ b/library/core/src/cmp.rs @@ -2053,6 +2053,22 @@ mod impls { fn ge(&self, other: &&B) -> bool { PartialOrd::ge(*self, *other) } + #[inline] + fn __chaining_lt(&self, other: &&B) -> ControlFlow { + PartialOrd::__chaining_lt(*self, *other) + } + #[inline] + fn __chaining_le(&self, other: &&B) -> ControlFlow { + PartialOrd::__chaining_le(*self, *other) + } + #[inline] + fn __chaining_gt(&self, other: &&B) -> ControlFlow { + PartialOrd::__chaining_gt(*self, *other) + } + #[inline] + fn __chaining_ge(&self, other: &&B) -> ControlFlow { + PartialOrd::__chaining_ge(*self, *other) + } } #[stable(feature = "rust1", since = "1.0.0")] impl Ord for &A @@ -2108,6 +2124,22 @@ mod impls { fn ge(&self, other: &&mut B) -> bool { PartialOrd::ge(*self, *other) } + #[inline] + fn __chaining_lt(&self, other: &&mut B) -> ControlFlow { + PartialOrd::__chaining_lt(*self, *other) + } + #[inline] + fn __chaining_le(&self, other: &&mut B) -> ControlFlow { + PartialOrd::__chaining_le(*self, *other) + } + #[inline] + fn __chaining_gt(&self, other: &&mut B) -> ControlFlow { + PartialOrd::__chaining_gt(*self, *other) + } + #[inline] + fn __chaining_ge(&self, other: &&mut B) -> ControlFlow { + PartialOrd::__chaining_ge(*self, *other) + } } #[stable(feature = "rust1", since = "1.0.0")] impl Ord for &mut A diff --git a/library/core/src/slice/cmp.rs b/library/core/src/slice/cmp.rs index da85f42926e6b..5ce72b46eee36 100644 --- a/library/core/src/slice/cmp.rs +++ b/library/core/src/slice/cmp.rs @@ -5,6 +5,7 @@ use crate::ascii; use crate::cmp::{self, BytewiseEq, Ordering}; use crate::intrinsics::compare_bytes; use crate::num::NonZero; +use crate::ops::ControlFlow; #[stable(feature = "rust1", since = "1.0.0")] impl PartialEq<[U]> for [T] @@ -31,12 +32,64 @@ impl Ord for [T] { } } +#[inline] +fn as_underlying(x: ControlFlow) -> u8 { + // SAFETY: This will only compile if `bool` and `ControlFlow` have the same + // size (which isn't guaranteed but this is libcore). Because they have the same + // size, it's a niched implementation, which in one byte means there can't be + // any uninitialized memory. The callers then only check for `0` or `1` from this, + // which must necessarily match the `Break` variant, and we're fine no matter + // what ends up getting picked as the value representing `Continue(())`. + unsafe { crate::mem::transmute(x) } +} + /// Implements comparison of slices [lexicographically](Ord#lexicographical-comparison). #[stable(feature = "rust1", since = "1.0.0")] impl PartialOrd for [T] { + #[inline] fn partial_cmp(&self, other: &[T]) -> Option { SlicePartialOrd::partial_compare(self, other) } + #[inline] + fn lt(&self, other: &Self) -> bool { + // This is certainly not the obvious way to implement these methods. + // Unfortunately, using anything that looks at the discriminant means that + // LLVM sees a check for `2` (aka `ControlFlow::Continue(())`) and + // gets very distracted by that, ending up generating extraneous code. + // This should be changed to something simpler once either LLVM is smarter, + // see , or we generate + // niche discriminant checks in a way that doesn't trigger it. + + as_underlying(self.__chaining_lt(other)) == 1 + } + #[inline] + fn le(&self, other: &Self) -> bool { + as_underlying(self.__chaining_le(other)) != 0 + } + #[inline] + fn gt(&self, other: &Self) -> bool { + as_underlying(self.__chaining_gt(other)) == 1 + } + #[inline] + fn ge(&self, other: &Self) -> bool { + as_underlying(self.__chaining_ge(other)) != 0 + } + #[inline] + fn __chaining_lt(&self, other: &Self) -> ControlFlow { + SliceChain::chaining_lt(self, other) + } + #[inline] + fn __chaining_le(&self, other: &Self) -> ControlFlow { + SliceChain::chaining_le(self, other) + } + #[inline] + fn __chaining_gt(&self, other: &Self) -> ControlFlow { + SliceChain::chaining_gt(self, other) + } + #[inline] + fn __chaining_ge(&self, other: &Self) -> ControlFlow { + SliceChain::chaining_ge(self, other) + } } #[doc(hidden)] @@ -99,24 +152,63 @@ trait SlicePartialOrd: Sized { fn partial_compare(left: &[Self], right: &[Self]) -> Option; } +#[doc(hidden)] +// intermediate trait for specialization of slice's PartialOrd chaining methods +trait SliceChain: Sized { + fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow; + fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow; + fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow; + fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow; +} + +type AlwaysBreak = ControlFlow; + impl SlicePartialOrd for A { default fn partial_compare(left: &[A], right: &[A]) -> Option { - let l = cmp::min(left.len(), right.len()); - - // Slice to the loop iteration range to enable bound check - // elimination in the compiler - let lhs = &left[..l]; - let rhs = &right[..l]; + let elem_chain = |a, b| match PartialOrd::partial_cmp(a, b) { + Some(Ordering::Equal) => ControlFlow::Continue(()), + non_eq => ControlFlow::Break(non_eq), + }; + let len_chain = |a: &_, b: &_| ControlFlow::Break(usize::partial_cmp(a, b)); + let AlwaysBreak::Break(b) = chaining_impl(left, right, elem_chain, len_chain); + b + } +} - for i in 0..l { - match lhs[i].partial_cmp(&rhs[i]) { - Some(Ordering::Equal) => (), - non_eq => return non_eq, - } - } +impl SliceChain for A { + default fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow { + chaining_impl(left, right, PartialOrd::__chaining_lt, usize::__chaining_lt) + } + default fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow { + chaining_impl(left, right, PartialOrd::__chaining_le, usize::__chaining_le) + } + default fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow { + chaining_impl(left, right, PartialOrd::__chaining_gt, usize::__chaining_gt) + } + default fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow { + chaining_impl(left, right, PartialOrd::__chaining_ge, usize::__chaining_ge) + } +} - left.len().partial_cmp(&right.len()) +#[inline] +fn chaining_impl<'l, 'r, A: PartialOrd, B, C>( + left: &'l [A], + right: &'r [A], + elem_chain: impl Fn(&'l A, &'r A) -> ControlFlow, + len_chain: impl for<'a> FnOnce(&'a usize, &'a usize) -> ControlFlow, +) -> ControlFlow { + let l = cmp::min(left.len(), right.len()); + + // Slice to the loop iteration range to enable bound check + // elimination in the compiler + let lhs = &left[..l]; + let rhs = &right[..l]; + + for i in 0..l { + elem_chain(&lhs[i], &rhs[i])?; } + + len_chain(&left.len(), &right.len()) } // This is the impl that we would like to have. Unfortunately it's not sound. @@ -165,21 +257,13 @@ trait SliceOrd: Sized { impl SliceOrd for A { default fn compare(left: &[Self], right: &[Self]) -> Ordering { - let l = cmp::min(left.len(), right.len()); - - // Slice to the loop iteration range to enable bound check - // elimination in the compiler - let lhs = &left[..l]; - let rhs = &right[..l]; - - for i in 0..l { - match lhs[i].cmp(&rhs[i]) { - Ordering::Equal => (), - non_eq => return non_eq, - } - } - - left.len().cmp(&right.len()) + let elem_chain = |a, b| match Ord::cmp(a, b) { + Ordering::Equal => ControlFlow::Continue(()), + non_eq => ControlFlow::Break(non_eq), + }; + let len_chain = |a: &_, b: &_| ControlFlow::Break(usize::cmp(a, b)); + let AlwaysBreak::Break(b) = chaining_impl(left, right, elem_chain, len_chain); + b } } @@ -191,7 +275,7 @@ impl SliceOrd for A { /// * For every `x` and `y` of this type, `Ord(x, y)` must return the same /// value as `Ord::cmp(transmute::<_, u8>(x), transmute::<_, u8>(y))`. #[rustc_specialization_trait] -unsafe trait UnsignedBytewiseOrd {} +unsafe trait UnsignedBytewiseOrd: Ord {} unsafe impl UnsignedBytewiseOrd for bool {} unsafe impl UnsignedBytewiseOrd for u8 {} @@ -225,6 +309,38 @@ impl SliceOrd for A { } } +// Don't generate our own chaining loops for `memcmp`-able things either. +impl SliceChain for A { + #[inline] + fn chaining_lt(left: &[Self], right: &[Self]) -> ControlFlow { + match SliceOrd::compare(left, right) { + Ordering::Equal => ControlFlow::Continue(()), + ne => ControlFlow::Break(ne.is_lt()), + } + } + #[inline] + fn chaining_le(left: &[Self], right: &[Self]) -> ControlFlow { + match SliceOrd::compare(left, right) { + Ordering::Equal => ControlFlow::Continue(()), + ne => ControlFlow::Break(ne.is_le()), + } + } + #[inline] + fn chaining_gt(left: &[Self], right: &[Self]) -> ControlFlow { + match SliceOrd::compare(left, right) { + Ordering::Equal => ControlFlow::Continue(()), + ne => ControlFlow::Break(ne.is_gt()), + } + } + #[inline] + fn chaining_ge(left: &[Self], right: &[Self]) -> ControlFlow { + match SliceOrd::compare(left, right) { + Ordering::Equal => ControlFlow::Continue(()), + ne => ControlFlow::Break(ne.is_ge()), + } + } +} + pub(super) trait SliceContains: Sized { fn slice_contains(&self, x: &[Self]) -> bool; } diff --git a/library/core/src/tuple.rs b/library/core/src/tuple.rs index d754bb9034300..02eb805ece121 100644 --- a/library/core/src/tuple.rs +++ b/library/core/src/tuple.rs @@ -2,7 +2,7 @@ use crate::cmp::Ordering::{self, *}; use crate::marker::{ConstParamTy_, StructuralPartialEq, UnsizedConstParamTy}; -use crate::ops::ControlFlow::{Break, Continue}; +use crate::ops::ControlFlow::{self, Break, Continue}; // Recursive macro for implementing n-ary tuple functions and operations // @@ -95,6 +95,22 @@ macro_rules! tuple_impls { fn gt(&self, other: &($($T,)+)) -> bool { lexical_ord!(gt, __chaining_gt, $( ${ignore($T)} self.${index()}, other.${index()} ),+) } + #[inline] + fn __chaining_lt(&self, other: &($($T,)+)) -> ControlFlow { + lexical_chain!(__chaining_lt, $( ${ignore($T)} self.${index()}, other.${index()} ),+) + } + #[inline] + fn __chaining_le(&self, other: &($($T,)+)) -> ControlFlow { + lexical_chain!(__chaining_le, $( ${ignore($T)} self.${index()}, other.${index()} ),+) + } + #[inline] + fn __chaining_gt(&self, other: &($($T,)+)) -> ControlFlow { + lexical_chain!(__chaining_gt, $( ${ignore($T)} self.${index()}, other.${index()} ),+) + } + #[inline] + fn __chaining_ge(&self, other: &($($T,)+)) -> ControlFlow { + lexical_chain!(__chaining_ge, $( ${ignore($T)} self.${index()}, other.${index()} ),+) + } } } @@ -187,6 +203,17 @@ macro_rules! lexical_ord { }; } +// Same parameter interleaving as `lexical_ord` above +macro_rules! lexical_chain { + ($chain_rel: ident, $a:expr, $b:expr $(,$rest_a:expr, $rest_b:expr)*) => {{ + PartialOrd::$chain_rel(&$a, &$b)?; + lexical_chain!($chain_rel $(,$rest_a, $rest_b)*) + }}; + ($chain_rel: ident) => { + Continue(()) + }; +} + macro_rules! lexical_partial_cmp { ($a:expr, $b:expr, $($rest_a:expr, $rest_b:expr),+) => { match ($a).partial_cmp(&$b) { diff --git a/tests/codegen/array-cmp.rs b/tests/codegen/array-cmp.rs index 2565a385b61b5..0d33765540176 100644 --- a/tests/codegen/array-cmp.rs +++ b/tests/codegen/array-cmp.rs @@ -1,6 +1,7 @@ // Ensure the asm for array comparisons is properly optimized. //@ compile-flags: -C opt-level=2 +//@ needs-deterministic-layouts (checks depend on tuple layout) #![crate_type = "lib"] @@ -17,3 +18,57 @@ pub fn compare() -> bool { [0x00, 0x00, 0x48, 0x41] } } + +// CHECK-LABEL: @array_of_tuple_le +#[no_mangle] +pub fn array_of_tuple_le(a: &[(i16, u16); 2], b: &[(i16, u16); 2]) -> bool { + // Ensure that, after all the optimizations have run, the happy path just checks + // `eq` on each corresponding pair and moves onto the next one if it is. + // Then there's a dedup'd comparison for the place that's different. + // (As opposed to, say, running a full `[su]cmp` as part of checking equality.) + + // This is written quite specifically because different library code was triggering + // along the way, so this + // has enough checks to make sure that's not happening. It doesn't need to be + // *exactly* this IR, but be careful if you ever need to update these checks. + + // CHECK: start: + // CHECK: %[[A00:.+]] = load i16, ptr %a + // CHECK: %[[B00:.+]] = load i16, ptr %b + // CHECK-NOT: cmp + // CHECK: %[[EQ00:.+]] = icmp eq i16 %[[A00]], %[[B00]] + // CHECK-NEXT: br i1 %[[EQ00]], label %[[L01:.+]], label %[[EXIT_S:.+]] + + // CHECK: [[L01]]: + // CHECK: %[[PA01:.+]] = getelementptr{{.+}}i8, ptr %a, {{i32|i64}} 2 + // CHECK: %[[PB01:.+]] = getelementptr{{.+}}i8, ptr %b, {{i32|i64}} 2 + // CHECK: %[[A01:.+]] = load i16, ptr %[[PA01]] + // CHECK: %[[B01:.+]] = load i16, ptr %[[PB01]] + // CHECK-NOT: cmp + // CHECK: %[[EQ01:.+]] = icmp eq i16 %[[A01]], %[[B01]] + // CHECK-NEXT: br i1 %[[EQ01]], label %[[L10:.+]], label %[[EXIT_U:.+]] + + // CHECK: [[L10]]: + // CHECK: %[[PA10:.+]] = getelementptr{{.+}}i8, ptr %a, {{i32|i64}} 4 + // CHECK: %[[PB10:.+]] = getelementptr{{.+}}i8, ptr %b, {{i32|i64}} 4 + // CHECK: %[[A10:.+]] = load i16, ptr %[[PA10]] + // CHECK: %[[B10:.+]] = load i16, ptr %[[PB10]] + // CHECK-NOT: cmp + // CHECK: %[[EQ10:.+]] = icmp eq i16 %[[A10]], %[[B10]] + // CHECK-NEXT: br i1 %[[EQ10]], label %[[L11:.+]], label %[[EXIT_S]] + + // CHECK: [[L11]]: + // CHECK: %[[PA11:.+]] = getelementptr{{.+}}i8, ptr %a, {{i32|i64}} 6 + // CHECK: %[[PB11:.+]] = getelementptr{{.+}}i8, ptr %b, {{i32|i64}} 6 + // CHECK: %[[A11:.+]] = load i16, ptr %[[PA11]] + // CHECK: %[[B11:.+]] = load i16, ptr %[[PB11]] + // CHECK-NOT: cmp + // CHECK: %[[EQ11:.+]] = icmp eq i16 %[[A11]], %[[B11]] + // CHECK-NEXT: br i1 %[[EQ11]], label %[[DONE:.+]], label %[[EXIT_U]] + + // CHECK: [[DONE]]: + // CHECK: %[[RET:.+]] = phi i1 [ %{{.+}}, %[[EXIT_S]] ], [ %{{.+}}, %[[EXIT_U]] ], [ true, %[[L11]] ] + // CHECK: ret i1 %[[RET]] + + a <= b +}