From 1825810a898842b4660fd684cea66906b7e32500 Mon Sep 17 00:00:00 2001 From: Soveu Date: Tue, 16 Feb 2021 18:48:42 +0100 Subject: [PATCH 1/6] Vec::dedup optimization --- library/alloc/src/vec/mod.rs | 50 +++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index b6166617789a0..88f9f624aba2d 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1514,15 +1514,53 @@ impl Vec { /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]); /// ``` #[stable(feature = "dedup_by", since = "1.16.0")] - pub fn dedup_by(&mut self, same_bucket: F) + pub fn dedup_by(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool, { - let len = { - let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket); - dedup.len() - }; - self.truncate(len); + let len = self.len(); + if len <= 1 { + return; + } + + let ptr = self.as_mut_ptr(); + /* Offset of the element we want to check if it is duplicate */ + let mut read: usize = 1; + /* Offset of the place where we want to place the non-duplicate + * when we find it. */ + let mut write: usize = 1; + + /* Drop items while going through Vec, it should be more efficient than + * doing slice partition_dedup + truncate */ + + /* INVARIANT: len > read >= write > write-1 >= 0 + * SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr + * are always in-bounds and read_ptr never aliases prev_ptr */ + unsafe { + while read < len { + let read_ptr = ptr.add(read); + let prev_ptr = ptr.add(write.wrapping_sub(1)); + + if same_bucket(&mut *read_ptr, &mut *prev_ptr) { + /* We have found duplicate, drop it in-place */ + ptr::drop_in_place(read_ptr); + } else { + let write_ptr = ptr.add(write); + + /* Looks like doing just `copy` can be faster than + * conditional `copy_nonoverlapping` */ + ptr::copy(read_ptr, write_ptr, 1); + + /* We have filled that place, so go further */ + write += 1; + } + + read += 1; + } + + /* `write` items are inside vec, rest is already dropped */ + self.set_len(write); + } } /// Appends an element to the back of a collection. From c114894b90971ad7c6743ca5961f276cae1e2b27 Mon Sep 17 00:00:00 2001 From: Soveu Date: Wed, 17 Feb 2021 17:21:12 +0100 Subject: [PATCH 2/6] Vec::dedup optimization - panic gracefully --- library/alloc/src/vec/mod.rs | 81 +++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 16 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 88f9f624aba2d..e65adb6c77eda 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1523,43 +1523,92 @@ impl Vec { return; } - let ptr = self.as_mut_ptr(); - /* Offset of the element we want to check if it is duplicate */ - let mut read: usize = 1; - /* Offset of the place where we want to place the non-duplicate - * when we find it. */ - let mut write: usize = 1; + /* INVARIANT: vec.len() > read >= write > write-1 >= 0 */ + struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> { + /* Offset of the element we want to check if it is duplicate */ + read: usize, + + /* Offset of the place where we want to place the non-duplicate + * when we find it. */ + write: usize, + + /* The Vec that would need correction if `same_bucket` panicked */ + vec: &'a mut Vec, + } + + impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> { + fn drop(&mut self) { + /* This code gets executed either at the end of `dedup_by` or + * when `same_bucket` panics */ + + /* SAFETY (if finishing successfully): self.read == len, so + * no data is copied and length is set correctly */ + + /* SAFETY (if panicing): invariant guarantees that `read - write` + * and `len - read` never overflow and that the copy is always + * in-bounds. */ + unsafe { + let ptr = self.vec.as_mut_ptr(); + let len = self.vec.len(); + + /* How many items were left when `same_bucket` paniced. + * Basically vec[read..].len() */ + let items_left = len - self.read; + + /* Pointer to first item in vec[write..write+items_left] slice */ + let dropped_ptr = ptr.add(self.write); + /* Pointer to first item in vec[read..] slice */ + let valid_ptr = ptr.add(self.read); + + /* Copy `vec[read..]` to `vec[write..write+items_left]`. + * The slices can overlap, so `copy_nonoverlapping` cannot be used */ + ptr::copy(valid_ptr, dropped_ptr, items_left); + + /* How many items have been already dropped + * Basically vec[read..write].len() */ + let dropped = self.read - self.write; + + self.vec.set_len(len - dropped); + } + } + } + + let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self }; + + let ptr = gap.vec.as_mut_ptr(); /* Drop items while going through Vec, it should be more efficient than * doing slice partition_dedup + truncate */ - /* INVARIANT: len > read >= write > write-1 >= 0 - * SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr + /* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr * are always in-bounds and read_ptr never aliases prev_ptr */ unsafe { - while read < len { - let read_ptr = ptr.add(read); - let prev_ptr = ptr.add(write.wrapping_sub(1)); + while gap.read < len { + let read_ptr = ptr.add(gap.read); + let prev_ptr = ptr.add(gap.write.wrapping_sub(1)); if same_bucket(&mut *read_ptr, &mut *prev_ptr) { /* We have found duplicate, drop it in-place */ ptr::drop_in_place(read_ptr); } else { - let write_ptr = ptr.add(write); + let write_ptr = ptr.add(gap.write); /* Looks like doing just `copy` can be faster than * conditional `copy_nonoverlapping` */ ptr::copy(read_ptr, write_ptr, 1); /* We have filled that place, so go further */ - write += 1; + gap.write += 1; } - read += 1; + gap.read += 1; } - /* `write` items are inside vec, rest is already dropped */ - self.set_len(write); + /* Technically we could let `gap` clean up with its Drop, but + * when `same_bucket` is guaranteed to not panic, this bloats a little + * the codegen, so we just do it manually */ + gap.vec.set_len(gap.write); + mem::forget(gap); } } From 2abab1f688fe0d4a740b216b298f32fbb48b653b Mon Sep 17 00:00:00 2001 From: Soveu Date: Mon, 15 Mar 2021 20:24:35 +0100 Subject: [PATCH 3/6] Vec::dedup optimization - add tests --- library/alloc/tests/lib.rs | 1 + library/alloc/tests/vec.rs | 73 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/library/alloc/tests/lib.rs b/library/alloc/tests/lib.rs index dd98f806451d8..fd1612f316bfd 100644 --- a/library/alloc/tests/lib.rs +++ b/library/alloc/tests/lib.rs @@ -21,6 +21,7 @@ #![feature(vecdeque_binary_search)] #![feature(slice_group_by)] #![feature(vec_extend_from_within)] +#![feature(slice_partition_dedup)] use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; diff --git a/library/alloc/tests/vec.rs b/library/alloc/tests/vec.rs index 2969da58d4268..1d28fc9eebf82 100644 --- a/library/alloc/tests/vec.rs +++ b/library/alloc/tests/vec.rs @@ -2096,3 +2096,76 @@ fn test_extend_from_within() { assert_eq!(v, ["a", "b", "c", "b", "c", "a", "b"]); } + +#[test] +fn test_vec_dedup_by() { + let mut vec: Vec = vec![1, -1, 2, 3, 1, -5, 5, -2, 2]; + + vec.dedup_by(|a, b| a.abs() == b.abs()); + + assert_eq!(vec, [1, 2, 3, 1, -5, -2]); +} + +#[test] +fn test_vec_dedup_empty() { + let mut vec: Vec = Vec::new(); + + vec.dedup(); + + assert_eq!(vec, []); +} + +#[test] +fn test_vec_dedup_one() { + let mut vec = vec![12i32]; + + vec.dedup(); + + assert_eq!(vec, [12]); +} + +#[test] +fn test_vec_dedup_multiple_ident() { + let mut vec = vec![12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11]; + + vec.dedup(); + + assert_eq!(vec, [12, 11]); +} + +#[test] +fn test_vec_dedup_partialeq() { + #[derive(Debug)] + struct Foo(i32, i32); + + impl PartialEq for Foo { + fn eq(&self, other: &Foo) -> bool { + self.0 == other.0 + } + } + + let mut vec = vec![Foo(0, 1), Foo(0, 5), Foo(1, 7), Foo(1, 9)]; + + vec.dedup(); + assert_eq!(vec, [Foo(0, 1), Foo(1, 7)]); +} + +#[test] +fn test_vec_dedup() { + let mut vec: Vec = Vec::with_capacity(8); + let mut template = vec.clone(); + + for x in 0u8..255u8 { + vec.clear(); + template.clear(); + + let iter = (0..8).map(move |bit| (x >> bit) & 1 == 1); + vec.extend(iter); + template.extend_from_slice(&vec); + + let (dedup, _) = template.partition_dedup(); + vec.dedup(); + + assert_eq!(vec, dedup); + } +} From afdbc9ece176ccac7b1d156efcb397d089d88b5a Mon Sep 17 00:00:00 2001 From: Soveu Date: Mon, 15 Mar 2021 20:36:29 +0100 Subject: [PATCH 4/6] Vec::dedup optimization - finishing polishes --- library/alloc/src/vec/mod.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index e65adb6c77eda..19198d4eeefd9 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1538,13 +1538,9 @@ impl Vec { impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> { fn drop(&mut self) { - /* This code gets executed either at the end of `dedup_by` or - * when `same_bucket` panics */ + /* This code gets executed when `same_bucket` panics */ - /* SAFETY (if finishing successfully): self.read == len, so - * no data is copied and length is set correctly */ - - /* SAFETY (if panicing): invariant guarantees that `read - write` + /* SAFETY: invariant guarantees that `read - write` * and `len - read` never overflow and that the copy is always * in-bounds. */ unsafe { @@ -1553,7 +1549,7 @@ impl Vec { /* How many items were left when `same_bucket` paniced. * Basically vec[read..].len() */ - let items_left = len - self.read; + let items_left = len.wrapping_sub(self.read); /* Pointer to first item in vec[write..write+items_left] slice */ let dropped_ptr = ptr.add(self.write); @@ -1566,7 +1562,7 @@ impl Vec { /* How many items have been already dropped * Basically vec[read..write].len() */ - let dropped = self.read - self.write; + let dropped = self.read.wrapping_sub(self.write); self.vec.set_len(len - dropped); } @@ -1574,7 +1570,6 @@ impl Vec { } let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self }; - let ptr = gap.vec.as_mut_ptr(); /* Drop items while going through Vec, it should be more efficient than @@ -1593,8 +1588,9 @@ impl Vec { } else { let write_ptr = ptr.add(gap.write); - /* Looks like doing just `copy` can be faster than - * conditional `copy_nonoverlapping` */ + /* Because `read_ptr` can be equal to `write_ptr`, we either + * have to use `copy` or conditional `copy_nonoverlapping`. + * Looks like the first option is faster. */ ptr::copy(read_ptr, write_ptr, 1); /* We have filled that place, so go further */ From 2285f11724e2fa3251c94c9ab7672544099600e2 Mon Sep 17 00:00:00 2001 From: Soveu Date: Mon, 15 Mar 2021 21:26:22 +0100 Subject: [PATCH 5/6] Vec::dedup optimization - add test for panic --- library/alloc/tests/vec.rs | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/library/alloc/tests/vec.rs b/library/alloc/tests/vec.rs index 1d28fc9eebf82..536fb4af5647d 100644 --- a/library/alloc/tests/vec.rs +++ b/library/alloc/tests/vec.rs @@ -7,6 +7,7 @@ use std::mem::{size_of, swap}; use std::ops::Bound::*; use std::panic::{catch_unwind, AssertUnwindSafe}; use std::rc::Rc; +use std::sync::atomic::{AtomicU32, Ordering}; use std::vec::{Drain, IntoIter}; struct DropCounter<'a> { @@ -2169,3 +2170,56 @@ fn test_vec_dedup() { assert_eq!(vec, dedup); } } + +#[test] +fn test_vec_dedup_panicking() { + #[derive(Debug)] + struct Panic { + drop_counter: &'static AtomicU32, + value: bool, + index: usize, + } + + impl PartialEq for Panic { + fn eq(&self, other: &Self) -> bool { + self.value == other.value + } + } + + impl Drop for Panic { + fn drop(&mut self) { + let x = self.drop_counter.fetch_add(1, Ordering::SeqCst); + assert!(x != 4); + } + } + + static DROP_COUNTER: AtomicU32 = AtomicU32::new(0); + let expected = [ + Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 }, + Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 }, + Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 }, + Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 }, + ]; + let mut vec = vec![ + Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 }, + // these elements get deduplicated + Panic { drop_counter: &DROP_COUNTER, value: false, index: 1 }, + Panic { drop_counter: &DROP_COUNTER, value: false, index: 2 }, + Panic { drop_counter: &DROP_COUNTER, value: false, index: 3 }, + Panic { drop_counter: &DROP_COUNTER, value: false, index: 4 }, + // here it panics + Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 }, + Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 }, + Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 }, + ]; + + let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + vec.dedup(); + })); + + let ok = vec.iter().zip(expected.iter()).all(|(x, y)| x.index == y.index); + + if !ok { + panic!("expected: {:?}\ngot: {:?}\n", expected, vec); + } +} From b0092bc995fa3e6633c3aaa1d0a56006ab7ad1e3 Mon Sep 17 00:00:00 2001 From: Soveu Date: Tue, 16 Mar 2021 14:41:26 +0100 Subject: [PATCH 6/6] Vec::dedup optimization - add benches --- library/alloc/benches/lib.rs | 1 + library/alloc/benches/vec.rs | 89 ++++++++++++++++++++++++++++++++++++ library/alloc/tests/vec.rs | 2 +- 3 files changed, 91 insertions(+), 1 deletion(-) diff --git a/library/alloc/benches/lib.rs b/library/alloc/benches/lib.rs index 32edb86d10119..38a8f65f1695a 100644 --- a/library/alloc/benches/lib.rs +++ b/library/alloc/benches/lib.rs @@ -4,6 +4,7 @@ #![feature(btree_drain_filter)] #![feature(map_first_last)] #![feature(repr_simd)] +#![feature(slice_partition_dedup)] #![feature(test)] extern crate test; diff --git a/library/alloc/benches/vec.rs b/library/alloc/benches/vec.rs index 89893b6209c0a..73eb353f6e7d4 100644 --- a/library/alloc/benches/vec.rs +++ b/library/alloc/benches/vec.rs @@ -671,3 +671,92 @@ fn bench_map_fast(b: &mut Bencher) { let data = black_box([(0, 0); LEN]); b.iter(|| map_fast(&data)); } + +fn random_sorted_fill(mut seed: u32, buf: &mut [u32]) { + let mask = if buf.len() < 8192 { + 0xFF + } else if buf.len() < 200_000 { + 0xFFFF + } else { + 0xFFFF_FFFF + }; + + for item in buf.iter_mut() { + seed ^= seed << 13; + seed ^= seed >> 17; + seed ^= seed << 5; + + *item = seed & mask; + } + + buf.sort(); +} + +fn bench_vec_dedup_old(b: &mut Bencher, sz: usize) { + let mut template = vec![0u32; sz]; + b.bytes = std::mem::size_of_val(template.as_slice()) as u64; + random_sorted_fill(0x43, &mut template); + + let mut vec = template.clone(); + b.iter(|| { + let len = { + let (dedup, _) = vec.partition_dedup(); + dedup.len() + }; + vec.truncate(len); + + black_box(vec.first()); + vec.clear(); + vec.extend_from_slice(&template); + }); +} + +fn bench_vec_dedup_new(b: &mut Bencher, sz: usize) { + let mut template = vec![0u32; sz]; + b.bytes = std::mem::size_of_val(template.as_slice()) as u64; + random_sorted_fill(0x43, &mut template); + + let mut vec = template.clone(); + b.iter(|| { + vec.dedup(); + black_box(vec.first()); + vec.clear(); + vec.extend_from_slice(&template); + }); +} + +#[bench] +fn bench_dedup_old_100(b: &mut Bencher) { + bench_vec_dedup_old(b, 100); +} +#[bench] +fn bench_dedup_new_100(b: &mut Bencher) { + bench_vec_dedup_new(b, 100); +} + +#[bench] +fn bench_dedup_old_1000(b: &mut Bencher) { + bench_vec_dedup_old(b, 1000); +} +#[bench] +fn bench_dedup_new_1000(b: &mut Bencher) { + bench_vec_dedup_new(b, 1000); +} + +#[bench] +fn bench_dedup_old_10000(b: &mut Bencher) { + bench_vec_dedup_old(b, 10000); +} +#[bench] +fn bench_dedup_new_10000(b: &mut Bencher) { + bench_vec_dedup_new(b, 10000); +} + +#[bench] +fn bench_dedup_old_100000(b: &mut Bencher) { + bench_vec_dedup_old(b, 100000); +} +#[bench] +fn bench_dedup_new_100000(b: &mut Bencher) { + bench_vec_dedup_new(b, 100000); +} diff --git a/library/alloc/tests/vec.rs b/library/alloc/tests/vec.rs index e4a20dd932451..c142536cd2dfb 100644 --- a/library/alloc/tests/vec.rs +++ b/library/alloc/tests/vec.rs @@ -2267,4 +2267,4 @@ fn test_extend_from_within_panicing_clone() { std::panic::catch_unwind(move || vec.extend_from_within(..)).unwrap_err(); assert_eq!(count.load(Ordering::SeqCst), 4); -} \ No newline at end of file +}