Skip to content

Commit 2456495

Browse files
committed
Stop generating allocas+memcmp for simple array equality
1 parent d05eafa commit 2456495

File tree

12 files changed

+238
-4
lines changed

12 files changed

+238
-4
lines changed

compiler/rustc_codegen_llvm/src/context.rs

+5
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ impl CodegenCx<'b, 'tcx> {
500500
let t_i32 = self.type_i32();
501501
let t_i64 = self.type_i64();
502502
let t_i128 = self.type_i128();
503+
let t_isize = self.type_isize();
503504
let t_f32 = self.type_f32();
504505
let t_f64 = self.type_f64();
505506

@@ -712,6 +713,10 @@ impl CodegenCx<'b, 'tcx> {
712713
ifn!("llvm.assume", fn(i1) -> void);
713714
ifn!("llvm.prefetch", fn(i8p, t_i32, t_i32, t_i32) -> void);
714715

716+
// This isn't an "LLVM intrinsic", but LLVM's optimization passes
717+
// recognize it like one and we assume it exists in `core::slice::cmp`
718+
ifn!("memcmp", fn(i8p, i8p, t_isize) -> t_i32);
719+
715720
// variadic intrinsics
716721
ifn!("llvm.va_start", fn(i8p) -> void);
717722
ifn!("llvm.va_end", fn(i8p) -> void);

compiler/rustc_codegen_llvm/src/intrinsic.rs

+26
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,32 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> {
296296
}
297297
}
298298

299+
sym::raw_eq => {
300+
let tp_ty = substs.type_at(0);
301+
let (size, align) = self.size_and_align_of(tp_ty);
302+
let a = args[0].immediate();
303+
let b = args[1].immediate();
304+
if size.bytes() == 0 {
305+
self.const_bool(true)
306+
} else if size > self.data_layout().pointer_size * 4 {
307+
let i8p_ty = self.type_i8p();
308+
let a_ptr = self.bitcast(a, i8p_ty);
309+
let b_ptr = self.bitcast(b, i8p_ty);
310+
let n = self.const_usize(size.bytes());
311+
let llfn = self.get_intrinsic("memcmp");
312+
let cmp = self.call(llfn, &[a_ptr, b_ptr, n], None);
313+
self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
314+
} else {
315+
let integer_ty = self.type_ix(size.bits());
316+
let ptr_ty = self.type_ptr_to(integer_ty);
317+
let a_ptr = self.bitcast(a, ptr_ty);
318+
let a_val = self.load(a_ptr, align);
319+
let b_ptr = self.bitcast(b, ptr_ty);
320+
let b_val = self.load(b_ptr, align);
321+
self.icmp(IntPredicate::IntEQ, a_val, b_val)
322+
}
323+
}
324+
299325
_ if name_str.starts_with("simd_") => {
300326
match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) {
301327
Ok(llval) => llval,

compiler/rustc_mir/src/interpret/intrinsics.rs

+18
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,10 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
472472
throw_ub_format!("`assume` intrinsic called with `false`");
473473
}
474474
}
475+
sym::raw_eq => {
476+
let result = self.raw_eq_intrinsic(&args[0], &args[1])?;
477+
self.write_scalar(result, dest)?;
478+
}
475479
_ => return Ok(false),
476480
}
477481

@@ -559,4 +563,18 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
559563

560564
self.memory.copy(src, align, dst, align, size, nonoverlapping)
561565
}
566+
567+
pub(crate) fn raw_eq_intrinsic(
568+
&mut self,
569+
lhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::PointerTag>,
570+
rhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::PointerTag>,
571+
) -> InterpResult<'tcx, Scalar<M::PointerTag>> {
572+
let layout = self.layout_of(lhs.layout.ty.builtin_deref(true).unwrap().ty)?;
573+
574+
let lhs = self.read_scalar(lhs)?.check_init()?;
575+
let rhs = self.read_scalar(rhs)?.check_init()?;
576+
let lhs_bytes = self.memory.read_bytes(lhs, layout.size)?;
577+
let rhs_bytes = self.memory.read_bytes(rhs, layout.size)?;
578+
Ok(Scalar::Int((lhs_bytes == rhs_bytes).into()))
579+
}
562580
}

compiler/rustc_span/src/symbol.rs

+1
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,7 @@ symbols! {
933933
quote,
934934
range_inclusive_new,
935935
raw_dylib,
936+
raw_eq,
936937
raw_identifiers,
937938
raw_ref_op,
938939
re_rebalance_coherence,

compiler/rustc_typeck/src/check/intrinsic.rs

+8
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,14 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
380380

381381
sym::nontemporal_store => (1, vec![tcx.mk_mut_ptr(param(0)), param(0)], tcx.mk_unit()),
382382

383+
sym::raw_eq => {
384+
let param_count = if intrinsic_name == sym::raw_eq { 2 } else { 1 };
385+
let br = ty::BoundRegion { var: ty::BoundVar::from_u32(0), kind: ty::BrAnon(0) };
386+
let param_ty =
387+
tcx.mk_imm_ref(tcx.mk_region(ty::ReLateBound(ty::INNERMOST, br)), param(0));
388+
(1, vec![param_ty; param_count], tcx.types.bool)
389+
}
390+
383391
other => {
384392
tcx.sess.emit_err(UnrecognizedIntrinsicFunction { span: it.span, name: other });
385393
return;

library/core/src/array/equality.rs

+51-2
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ where
55
{
66
#[inline]
77
fn eq(&self, other: &[B; N]) -> bool {
8-
self[..] == other[..]
8+
SpecArrayEq::spec_eq(self, other)
99
}
1010
#[inline]
1111
fn ne(&self, other: &[B; N]) -> bool {
12-
self[..] != other[..]
12+
SpecArrayEq::spec_ne(self, other)
1313
}
1414
}
1515

@@ -109,3 +109,52 @@ where
109109

110110
#[stable(feature = "rust1", since = "1.0.0")]
111111
impl<T: Eq, const N: usize> Eq for [T; N] {}
112+
113+
trait SpecArrayEq<Other, const N: usize>: Sized {
114+
fn spec_eq(a: &[Self; N], b: &[Other; N]) -> bool;
115+
fn spec_ne(a: &[Self; N], b: &[Other; N]) -> bool;
116+
}
117+
118+
impl<T: PartialEq<Other>, Other, const N: usize> SpecArrayEq<Other, N> for T {
119+
default fn spec_eq(a: &[Self; N], b: &[Other; N]) -> bool {
120+
a[..] == b[..]
121+
}
122+
default fn spec_ne(a: &[Self; N], b: &[Other; N]) -> bool {
123+
a[..] != b[..]
124+
}
125+
}
126+
127+
impl<T: PartialEq<U> + IsRawEqComparable<U>, U, const N: usize> SpecArrayEq<U, N> for T {
128+
#[cfg(bootstrap)]
129+
fn spec_eq(a: &[T; N], b: &[U; N]) -> bool {
130+
a[..] == b[..]
131+
}
132+
#[cfg(not(bootstrap))]
133+
fn spec_eq(a: &[T; N], b: &[U; N]) -> bool {
134+
// SAFETY: This is why `IsRawEqComparable` is an `unsafe trait`.
135+
unsafe {
136+
let b = &*b.as_ptr().cast::<[T; N]>();
137+
crate::intrinsics::raw_eq(a, b)
138+
}
139+
}
140+
fn spec_ne(a: &[T; N], b: &[U; N]) -> bool {
141+
!Self::spec_eq(a, b)
142+
}
143+
}
144+
145+
/// `U` exists on here mostly because `min_specialization` didn't let me
146+
/// repeat the `T` type parameter in the above specialization, so instead
147+
/// the `T == U` constraint comes from the impls on this.
148+
/// # Safety
149+
/// - Neither `Self` nor `U` has any padding.
150+
/// - `Self` and `U` have the same layout.
151+
/// - `Self: PartialEq<U>` is byte-wise (this means no floats, among other things)
152+
#[rustc_specialization_trait]
153+
unsafe trait IsRawEqComparable<U> {}
154+
155+
macro_rules! is_raw_comparable {
156+
($($t:ty),+) => {$(
157+
unsafe impl IsRawEqComparable<$t> for $t {}
158+
)+};
159+
}
160+
is_raw_comparable!(bool, char, u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize);

library/core/src/intrinsics.rs

+16
Original file line numberDiff line numberDiff line change
@@ -1913,6 +1913,22 @@ extern "rust-intrinsic" {
19131913
/// Allocate at compile time. Should not be called at runtime.
19141914
#[rustc_const_unstable(feature = "const_heap", issue = "79597")]
19151915
pub fn const_allocate(size: usize, align: usize) -> *mut u8;
1916+
1917+
/// Determines whether the raw bytes of the two values are equal.
1918+
///
1919+
/// The is particularly handy for arrays, since it allows things like just
1920+
/// comparing `i96`s instead of forcing `alloca`s for `[6 x i16]`.
1921+
///
1922+
/// Above some backend-decided threshold this will emit calls to `memcmp`,
1923+
/// like slice equality does, instead of causing massive code size.
1924+
///
1925+
/// # Safety
1926+
///
1927+
/// This doesn't take into account padding, so if `T` has padding
1928+
/// the result will be `undef`, which cannot be exposed to safe code.
1929+
#[cfg(not(bootstrap))]
1930+
#[rustc_const_unstable(feature = "const_intrinsic_raw_eq", issue = "none")]
1931+
pub fn raw_eq<T>(a: &T, b: &T) -> bool;
19161932
}
19171933

19181934
// Some functions are defined here because they accidentally got made

src/test/codegen/array-equality.rs

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// compile-flags: -O
2+
// only-x86_64
3+
4+
#![crate_type = "lib"]
5+
6+
// CHECK-LABEL: @array_eq_value
7+
#[no_mangle]
8+
pub fn array_eq_value(a: [u16; 6], b: [u16; 6]) -> bool {
9+
// CHECK-NEXT: start:
10+
// CHECK-NEXT: %2 = icmp eq i96 %0, %1
11+
// CHECK-NEXT: ret i1 %2
12+
a == b
13+
}
14+
15+
// CHECK-LABEL: @array_eq_ref
16+
#[no_mangle]
17+
pub fn array_eq_ref(a: &[u16; 6], b: &[u16; 6]) -> bool {
18+
// CHECK: start:
19+
// CHECK: load i96, i96* %{{.+}}, align 2
20+
// CHECK: load i96, i96* %{{.+}}, align 2
21+
// CHECK: icmp eq i96
22+
// CHECK-NEXT: ret
23+
a == b
24+
}
25+
26+
// CHECK-LABEL: @array_eq_long
27+
#[no_mangle]
28+
pub fn array_eq_long(a: &[u16; 1234], b: &[u16; 1234]) -> bool {
29+
// CHECK-NEXT: start:
30+
// CHECK-NEXT: bitcast
31+
// CHECK-NEXT: bitcast
32+
// CHECK-NEXT: %[[CMP:.+]] = tail call i32 @{{bcmp|memcmp}}(i8* nonnull dereferenceable(2468) %{{.+}}, i8* nonnull dereferenceable(2468) %{{.+}}, i64 2468)
33+
// CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[CMP]], 0
34+
// CHECK-NEXT: ret i1 %[[EQ]]
35+
a == b
36+
}

src/test/codegen/slice-ref-equality.rs

+17-2
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,30 @@
22

33
#![crate_type = "lib"]
44

5-
// #71602: check that slice equality just generates a single bcmp
5+
// #71602 reported a simple array comparison just generating a loop.
6+
// This was originally fixed by ensuring it generates a single bcmp,
7+
// but we now generate it as a load instead. `is_zero_slice` was
8+
// tweaked to still test the case of comparison against a slice,
9+
// and `is_zero_array` tests the new array-specific behaviour.
610

711
// CHECK-LABEL: @is_zero_slice
812
#[no_mangle]
913
pub fn is_zero_slice(data: &[u8; 4]) -> bool {
10-
// CHECK: start:
14+
// CHECK: :
1115
// CHECK-NEXT: %{{.+}} = getelementptr {{.+}}
1216
// CHECK-NEXT: %[[BCMP:.+]] = tail call i32 @{{bcmp|memcmp}}({{.+}})
1317
// CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[BCMP]], 0
1418
// CHECK-NEXT: ret i1 %[[EQ]]
19+
&data[..] == [0; 4]
20+
}
21+
22+
// CHECK-LABEL: @is_zero_array
23+
#[no_mangle]
24+
pub fn is_zero_array(data: &[u8; 4]) -> bool {
25+
// CHECK: start:
26+
// CHECK-NEXT: %[[PTR:.+]] = bitcast [4 x i8]* {{.+}} to i32*
27+
// CHECK-NEXT: %[[LOAD:.+]] = load i32, i32* %[[PTR]], align 1
28+
// CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[LOAD]], 0
29+
// CHECK-NEXT: ret i1 %[[EQ]]
1530
*data == [0; 4]
1631
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#![feature(core_intrinsics)]
2+
#![feature(const_intrinsic_raw_eq)]
3+
#![deny(const_err)]
4+
5+
const BAD_RAW_EQ_CALL: bool = unsafe {
6+
std::intrinsics::raw_eq(&(1_u8, 2_u16), &(1_u8, 2_u16))
7+
//~^ ERROR any use of this value will cause an error
8+
//~| WARNING this was previously accepted by the compiler but is being phased out
9+
};
10+
11+
pub fn main() {
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
error: any use of this value will cause an error
2+
--> $DIR/intrinsic-raw_eq-const-padding.rs:6:5
3+
|
4+
LL | / const BAD_RAW_EQ_CALL: bool = unsafe {
5+
LL | | std::intrinsics::raw_eq(&(1_u8, 2_u16), &(1_u8, 2_u16))
6+
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ reading 4 bytes of memory starting at alloc2, but 1 byte is uninitialized starting at alloc2+0x1, and this operation requires initialized memory
7+
LL | |
8+
LL | |
9+
LL | | };
10+
| |__-
11+
|
12+
note: the lint level is defined here
13+
--> $DIR/intrinsic-raw_eq-const-padding.rs:3:9
14+
|
15+
LL | #![deny(const_err)]
16+
| ^^^^^^^^^
17+
= warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release!
18+
= note: for more information, see issue #71800 <https://github.com/rust-lang/rust/issues/71800>
19+
20+
error: aborting due to previous error
21+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// run-pass
2+
3+
#![feature(core_intrinsics)]
4+
#![feature(const_intrinsic_raw_eq)]
5+
#![deny(const_err)]
6+
7+
pub fn main() {
8+
use std::intrinsics::raw_eq;
9+
10+
const RAW_EQ_I32_TRUE: bool = unsafe { raw_eq(&42_i32, &42) };
11+
assert!(RAW_EQ_I32_TRUE);
12+
13+
const RAW_EQ_I32_FALSE: bool = unsafe { raw_eq(&4_i32, &2) };
14+
assert!(!RAW_EQ_I32_FALSE);
15+
16+
const RAW_EQ_CHAR_TRUE: bool = unsafe { raw_eq(&'a', &'a') };
17+
assert!(RAW_EQ_CHAR_TRUE);
18+
19+
const RAW_EQ_CHAR_FALSE: bool = unsafe { raw_eq(&'a', &'A') };
20+
assert!(!RAW_EQ_CHAR_FALSE);
21+
22+
const RAW_EQ_ARRAY_TRUE: bool = unsafe { raw_eq(&[13_u8, 42], &[13, 42]) };
23+
assert!(RAW_EQ_ARRAY_TRUE);
24+
25+
const RAW_EQ_ARRAY_FALSE: bool = unsafe { raw_eq(&[13_u8, 42], &[42, 13]) };
26+
assert!(!RAW_EQ_ARRAY_FALSE);
27+
}

0 commit comments

Comments
 (0)