Skip to content

Commit 929f66a

Browse files
committed
Auto merge of #80115 - tgnottingham:specialize_opaque_u8_sequences, r=oli-obk
rustc_serialize: specialize opaque encoding and decoding of some u8 sequences This specializes encoding and decoding of some contiguous u8 sequences to use a more efficient implementation. The default implementations process each u8 individually, but that isn't necessary for the opaque encoder and decoder. The opaque encoding for u8s is a no-op, so we can just copy entire sequences as-is, rather than process them byte by byte. This also changes some encode and decode implementations for contiguous sequences to forward to the slice and vector implementations, so that they can take advantage of the new specialization when applicable.
2 parents 5986dd8 + be79f49 commit 929f66a

File tree

7 files changed

+84
-47
lines changed

7 files changed

+84
-47
lines changed

compiler/rustc_data_structures/src/fingerprint.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use rustc_serialize::{
44
Decodable, Encodable,
55
};
66
use std::hash::{Hash, Hasher};
7-
use std::mem;
7+
use std::mem::{self, MaybeUninit};
88

99
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy)]
1010
pub struct Fingerprint(u64, u64);
@@ -61,7 +61,7 @@ impl Fingerprint {
6161
}
6262

6363
pub fn decode_opaque(decoder: &mut opaque::Decoder<'_>) -> Result<Fingerprint, String> {
64-
let mut bytes = [0; 16];
64+
let mut bytes: [MaybeUninit<u8>; 16] = MaybeUninit::uninit_array();
6565

6666
decoder.read_raw_bytes(&mut bytes)?;
6767

compiler/rustc_macros/src/serialize.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ fn encodable_body(
203203
#field_name,
204204
#field_idx,
205205
|__encoder|
206-
::rustc_serialize::Encodable::encode(#bind_ident, __encoder),
206+
::rustc_serialize::Encodable::<#encoder_ty>::encode(#bind_ident, __encoder),
207207
) {
208208
::std::result::Result::Ok(()) => (),
209209
::std::result::Result::Err(__err)
@@ -237,7 +237,7 @@ fn encodable_body(
237237
__encoder,
238238
#field_idx,
239239
|__encoder|
240-
::rustc_serialize::Encodable::encode(#bind_ident, __encoder),
240+
::rustc_serialize::Encodable::<#encoder_ty>::encode(#bind_ident, __encoder),
241241
) {
242242
::std::result::Result::Ok(()) => (),
243243
::std::result::Result::Err(__err)

compiler/rustc_middle/src/ty/query/on_disk_cache.rs

+19
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,15 @@ impl<'a, 'tcx> TyDecoder<'tcx> for CacheDecoder<'a, 'tcx> {
807807

808808
crate::implement_ty_decoder!(CacheDecoder<'a, 'tcx>);
809809

810+
// This ensures that the `Decodable<opaque::Decoder>::decode` specialization for `Vec<u8>` is used
811+
// when a `CacheDecoder` is passed to `Decodable::decode`. Unfortunately, we have to manually opt
812+
// into specializations this way, given how `CacheDecoder` and the decoding traits currently work.
813+
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Vec<u8> {
814+
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Result<Self, String> {
815+
Decodable::decode(&mut d.opaque)
816+
}
817+
}
818+
810819
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for SyntaxContext {
811820
fn decode(decoder: &mut CacheDecoder<'a, 'tcx>) -> Result<Self, String> {
812821
let syntax_contexts = decoder.syntax_contexts;
@@ -1149,6 +1158,16 @@ where
11491158
}
11501159
}
11511160

1161+
// This ensures that the `Encodable<opaque::Encoder>::encode` specialization for byte slices
1162+
// is used when a `CacheEncoder` having an `opaque::Encoder` is passed to `Encodable::encode`.
1163+
// Unfortunately, we have to manually opt into specializations this way, given how `CacheEncoder`
1164+
// and the encoding traits currently work.
1165+
impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx, opaque::Encoder>> for [u8] {
1166+
fn encode(&self, e: &mut CacheEncoder<'a, 'tcx, opaque::Encoder>) -> opaque::EncodeResult {
1167+
self.encode(e.encoder)
1168+
}
1169+
}
1170+
11521171
// An integer that will always encode to 8 bytes.
11531172
struct IntEncodedWithFixedSize(u64);
11541173

compiler/rustc_serialize/src/collection_impls.rs

+10-32
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,8 @@ use smallvec::{Array, SmallVec};
1111

1212
impl<S: Encoder, A: Array<Item: Encodable<S>>> Encodable<S> for SmallVec<A> {
1313
fn encode(&self, s: &mut S) -> Result<(), S::Error> {
14-
s.emit_seq(self.len(), |s| {
15-
for (i, e) in self.iter().enumerate() {
16-
s.emit_seq_elt(i, |s| e.encode(s))?;
17-
}
18-
Ok(())
19-
})
14+
let slice: &[A::Item] = self;
15+
slice.encode(s)
2016
}
2117
}
2218

@@ -292,46 +288,28 @@ where
292288

293289
impl<E: Encoder, T: Encodable<E>> Encodable<E> for Rc<[T]> {
294290
fn encode(&self, s: &mut E) -> Result<(), E::Error> {
295-
s.emit_seq(self.len(), |s| {
296-
for (index, e) in self.iter().enumerate() {
297-
s.emit_seq_elt(index, |s| e.encode(s))?;
298-
}
299-
Ok(())
300-
})
291+
let slice: &[T] = self;
292+
slice.encode(s)
301293
}
302294
}
303295

304296
impl<D: Decoder, T: Decodable<D>> Decodable<D> for Rc<[T]> {
305297
fn decode(d: &mut D) -> Result<Rc<[T]>, D::Error> {
306-
d.read_seq(|d, len| {
307-
let mut vec = Vec::with_capacity(len);
308-
for index in 0..len {
309-
vec.push(d.read_seq_elt(index, |d| Decodable::decode(d))?);
310-
}
311-
Ok(vec.into())
312-
})
298+
let vec: Vec<T> = Decodable::decode(d)?;
299+
Ok(vec.into())
313300
}
314301
}
315302

316303
impl<E: Encoder, T: Encodable<E>> Encodable<E> for Arc<[T]> {
317304
fn encode(&self, s: &mut E) -> Result<(), E::Error> {
318-
s.emit_seq(self.len(), |s| {
319-
for (index, e) in self.iter().enumerate() {
320-
s.emit_seq_elt(index, |s| e.encode(s))?;
321-
}
322-
Ok(())
323-
})
305+
let slice: &[T] = self;
306+
slice.encode(s)
324307
}
325308
}
326309

327310
impl<D: Decoder, T: Decodable<D>> Decodable<D> for Arc<[T]> {
328311
fn decode(d: &mut D) -> Result<Arc<[T]>, D::Error> {
329-
d.read_seq(|d, len| {
330-
let mut vec = Vec::with_capacity(len);
331-
for index in 0..len {
332-
vec.push(d.read_seq_elt(index, |d| Decodable::decode(d))?);
333-
}
334-
Ok(vec.into())
335-
})
312+
let vec: Vec<T> = Decodable::decode(d)?;
313+
Ok(vec.into())
336314
}
337315
}

compiler/rustc_serialize/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ Core encoding and decoding interfaces.
1414
#![feature(nll)]
1515
#![feature(associated_type_bounds)]
1616
#![cfg_attr(bootstrap, feature(min_const_generics))]
17+
#![feature(min_specialization)]
18+
#![feature(vec_spare_capacity)]
1719
#![cfg_attr(test, feature(test))]
1820
#![allow(rustc::internal)]
1921

compiler/rustc_serialize/src/opaque.rs

+45-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use crate::leb128::{self, read_signed_leb128, write_signed_leb128};
22
use crate::serialize;
33
use std::borrow::Cow;
4+
use std::mem::MaybeUninit;
5+
use std::ptr;
46

57
// -----------------------------------------------------------------------------
68
// Encoder
@@ -179,11 +181,19 @@ impl<'a> Decoder<'a> {
179181
}
180182

181183
#[inline]
182-
pub fn read_raw_bytes(&mut self, s: &mut [u8]) -> Result<(), String> {
184+
pub fn read_raw_bytes(&mut self, s: &mut [MaybeUninit<u8>]) -> Result<(), String> {
183185
let start = self.position;
184186
let end = start + s.len();
187+
assert!(end <= self.data.len());
185188

186-
s.copy_from_slice(&self.data[start..end]);
189+
// SAFETY: Both `src` and `dst` point to at least `s.len()` elements:
190+
// `src` points to at least `s.len()` elements by above assert, and
191+
// `dst` points to `s.len()` elements by derivation from `s`.
192+
unsafe {
193+
let src = self.data.as_ptr().add(start);
194+
let dst = s.as_mut_ptr() as *mut u8;
195+
ptr::copy_nonoverlapping(src, dst, s.len());
196+
}
187197

188198
self.position = end;
189199

@@ -316,3 +326,36 @@ impl<'a> serialize::Decoder for Decoder<'a> {
316326
err.to_string()
317327
}
318328
}
329+
330+
// Specializations for contiguous byte sequences follow. The default implementations for slices
331+
// encode and decode each element individually. This isn't necessary for `u8` slices when using
332+
// opaque encoders and decoders, because each `u8` is unchanged by encoding and decoding.
333+
// Therefore, we can use more efficient implementations that process the entire sequence at once.
334+
335+
// Specialize encoding byte slices. This specialization also applies to encoding `Vec<u8>`s, etc.,
336+
// since the default implementations call `encode` on their slices internally.
337+
impl serialize::Encodable<Encoder> for [u8] {
338+
fn encode(&self, e: &mut Encoder) -> EncodeResult {
339+
serialize::Encoder::emit_usize(e, self.len())?;
340+
e.emit_raw_bytes(self);
341+
Ok(())
342+
}
343+
}
344+
345+
// Specialize decoding `Vec<u8>`. This specialization also applies to decoding `Box<[u8]>`s, etc.,
346+
// since the default implementations call `decode` to produce a `Vec<u8>` internally.
347+
impl<'a> serialize::Decodable<Decoder<'a>> for Vec<u8> {
348+
fn decode(d: &mut Decoder<'a>) -> Result<Self, String> {
349+
let len = serialize::Decoder::read_usize(d)?;
350+
351+
let mut v = Vec::with_capacity(len);
352+
let buf = &mut v.spare_capacity_mut()[..len];
353+
d.read_raw_bytes(buf)?;
354+
355+
unsafe {
356+
v.set_len(len);
357+
}
358+
359+
Ok(v)
360+
}
361+
}

compiler/rustc_serialize/src/serialize.rs

+4-9
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ impl<D: Decoder, T: Decodable<D>> Decodable<D> for Rc<T> {
527527
}
528528

529529
impl<S: Encoder, T: Encodable<S>> Encodable<S> for [T] {
530-
fn encode(&self, s: &mut S) -> Result<(), S::Error> {
530+
default fn encode(&self, s: &mut S) -> Result<(), S::Error> {
531531
s.emit_seq(self.len(), |s| {
532532
for (i, e) in self.iter().enumerate() {
533533
s.emit_seq_elt(i, |s| e.encode(s))?
@@ -545,7 +545,7 @@ impl<S: Encoder, T: Encodable<S>> Encodable<S> for Vec<T> {
545545
}
546546

547547
impl<D: Decoder, T: Decodable<D>> Decodable<D> for Vec<T> {
548-
fn decode(d: &mut D) -> Result<Vec<T>, D::Error> {
548+
default fn decode(d: &mut D) -> Result<Vec<T>, D::Error> {
549549
d.read_seq(|d, len| {
550550
let mut v = Vec::with_capacity(len);
551551
for i in 0..len {
@@ -591,13 +591,8 @@ where
591591
[T]: ToOwned<Owned = Vec<T>>,
592592
{
593593
fn decode(d: &mut D) -> Result<Cow<'static, [T]>, D::Error> {
594-
d.read_seq(|d, len| {
595-
let mut v = Vec::with_capacity(len);
596-
for i in 0..len {
597-
v.push(d.read_seq_elt(i, |d| Decodable::decode(d))?);
598-
}
599-
Ok(Cow::Owned(v))
600-
})
594+
let v: Vec<T> = Decodable::decode(d)?;
595+
Ok(Cow::Owned(v))
601596
}
602597
}
603598

0 commit comments

Comments
 (0)