Skip to content

Commit 670f024

Browse files
authored
Merge pull request #55 from eddyb/const-values
v0: demangle structural constants and &str.
2 parents ef26263 + fd906f8 commit 670f024

File tree

1 file changed

+262
-5
lines changed

1 file changed

+262
-5
lines changed

src/v0.rs

+262-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use core::convert::TryFrom;
2-
use core::{char, fmt, iter, mem};
2+
use core::{char, fmt, iter, mem, str};
33

44
#[allow(unused_macros)]
55
macro_rules! write {
@@ -287,6 +287,84 @@ impl<'s> HexNibbles<'s> {
287287
}
288288
Some(v)
289289
}
290+
291+
/// Decode a UTF-8 byte sequence (with each byte using a pair of nibbles)
292+
/// into individual `char`s, returning `None` for invalid UTF-8.
293+
fn try_parse_str_chars(&self) -> Option<impl Iterator<Item = char> + 's> {
294+
if self.nibbles.len() % 2 != 0 {
295+
return None;
296+
}
297+
298+
// FIXME(eddyb) use `array_chunks` instead, when that becomes stable.
299+
let mut bytes = self
300+
.nibbles
301+
.as_bytes()
302+
.chunks_exact(2)
303+
.map(|slice| match slice {
304+
[a, b] => [a, b],
305+
_ => unreachable!(),
306+
})
307+
.map(|[&hi, &lo]| {
308+
let half = |nibble: u8| (nibble as char).to_digit(16).unwrap() as u8;
309+
(half(hi) << 4) | half(lo)
310+
});
311+
312+
let chars = iter::from_fn(move || {
313+
// As long as there are any bytes left, there's at least one more
314+
// UTF-8-encoded `char` to decode (or the possibility of error).
315+
bytes.next().map(|first_byte| -> Result<char, ()> {
316+
// FIXME(eddyb) this `enum` and `fn` should be somewhere in `core`.
317+
enum Utf8FirstByteError {
318+
ContinuationByte,
319+
TooLong,
320+
}
321+
fn utf8_len_from_first_byte(byte: u8) -> Result<usize, Utf8FirstByteError> {
322+
match byte {
323+
0x00..=0x7f => Ok(1),
324+
0x80..=0xbf => Err(Utf8FirstByteError::ContinuationByte),
325+
0xc0..=0xdf => Ok(2),
326+
0xe0..=0xef => Ok(3),
327+
0xf0..=0xf7 => Ok(4),
328+
0xf8..=0xff => Err(Utf8FirstByteError::TooLong),
329+
}
330+
}
331+
332+
// Collect the appropriate amount of bytes (up to 4), according
333+
// to the UTF-8 length implied by the first byte.
334+
let utf8_len = utf8_len_from_first_byte(first_byte).map_err(|_| ())?;
335+
let utf8 = &mut [first_byte, 0, 0, 0][..utf8_len];
336+
for i in 1..utf8_len {
337+
utf8[i] = bytes.next().ok_or(())?;
338+
}
339+
340+
// Fully validate the UTF-8 sequence.
341+
let s = str::from_utf8(utf8).map_err(|_| ())?;
342+
343+
// Since we included exactly one UTF-8 sequence, and validation
344+
// succeeded, `str::chars` should return exactly one `char`.
345+
let mut chars = s.chars();
346+
match (chars.next(), chars.next()) {
347+
(Some(c), None) => Ok(c),
348+
_ => unreachable!(
349+
"str::from_utf8({:?}) = {:?} was expected to have 1 char, \
350+
but {} chars were found",
351+
utf8,
352+
s,
353+
s.chars().count()
354+
),
355+
}
356+
})
357+
});
358+
359+
// HACK(eddyb) doing a separate validation iteration like this might be
360+
// wasteful, but it's easier to avoid starting to print a string literal
361+
// in the first place, than to abort it mid-string.
362+
if chars.clone().any(|r| r.is_err()) {
363+
None
364+
} else {
365+
Some(chars.map(Result::unwrap))
366+
}
367+
}
290368
}
291369

292370
fn basic_type(tag: u8) -> Option<&'static str> {
@@ -815,7 +893,7 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
815893
let lt = parse!(self, integer_62);
816894
self.print_lifetime_from_index(lt)
817895
} else if self.eat(b'K') {
818-
self.print_const()
896+
self.print_const(false)
819897
} else {
820898
self.print_type()
821899
}
@@ -861,7 +939,7 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
861939
self.print_type()?;
862940
if tag == b'A' {
863941
self.print("; ")?;
864-
self.print_const()?;
942+
self.print_const(true)?;
865943
}
866944
self.print("]")?;
867945
}
@@ -1001,11 +1079,28 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
10011079
Ok(())
10021080
}
10031081

1004-
fn print_const(&mut self) -> fmt::Result {
1082+
fn print_const(&mut self, in_value: bool) -> fmt::Result {
10051083
let tag = parse!(self, next);
10061084

10071085
parse!(self, push_depth);
10081086

1087+
// Only literals (and the names of `const` generic parameters, but they
1088+
// don't get mangled at all), can appear in generic argument position
1089+
// without any disambiguation, all other expressions require braces.
1090+
// To avoid duplicating the mapping between `tag` and what syntax gets
1091+
// used (especially any special-casing), every case that needs braces
1092+
// has to call `open_brace(self)?` (and the closing brace is automatic).
1093+
let mut opened_brace = false;
1094+
let mut open_brace_if_outside_expr = |this: &mut Self| {
1095+
// If this expression is nested in another, braces aren't required.
1096+
if in_value {
1097+
return Ok(());
1098+
}
1099+
1100+
opened_brace = true;
1101+
this.print("{")
1102+
};
1103+
10091104
match tag {
10101105
b'p' => self.print("_")?,
10111106

@@ -1033,13 +1128,82 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
10331128
None => invalid!(self),
10341129
}
10351130
}
1131+
b'e' => {
1132+
// NOTE(eddyb) a string literal `"..."` has type `&str`, so
1133+
// to get back the type `str`, `*"..."` syntax is needed
1134+
// (even if that may not be valid in Rust itself).
1135+
open_brace_if_outside_expr(self)?;
1136+
self.print("*")?;
1137+
1138+
self.print_const_str_literal()?;
1139+
}
10361140

1141+
b'R' | b'Q' => {
1142+
// NOTE(eddyb) this prints `"..."` instead of `&*"..."`, which
1143+
// is what `Re..._` would imply (see comment for `str` above).
1144+
if tag == b'R' && self.eat(b'e') {
1145+
self.print_const_str_literal()?;
1146+
} else {
1147+
open_brace_if_outside_expr(self)?;
1148+
self.print("&")?;
1149+
if tag != b'R' {
1150+
self.print("mut ")?;
1151+
}
1152+
self.print_const(true)?;
1153+
}
1154+
}
1155+
b'A' => {
1156+
open_brace_if_outside_expr(self)?;
1157+
self.print("[")?;
1158+
self.print_sep_list(|this| this.print_const(true), ", ")?;
1159+
self.print("]")?;
1160+
}
1161+
b'T' => {
1162+
open_brace_if_outside_expr(self)?;
1163+
self.print("(")?;
1164+
let count = self.print_sep_list(|this| this.print_const(true), ", ")?;
1165+
if count == 1 {
1166+
self.print(",")?;
1167+
}
1168+
self.print(")")?;
1169+
}
1170+
b'V' => {
1171+
open_brace_if_outside_expr(self)?;
1172+
self.print_path(true)?;
1173+
match parse!(self, next) {
1174+
b'U' => {}
1175+
b'T' => {
1176+
self.print("(")?;
1177+
self.print_sep_list(|this| this.print_const(true), ", ")?;
1178+
self.print(")")?;
1179+
}
1180+
b'S' => {
1181+
self.print(" { ")?;
1182+
self.print_sep_list(
1183+
|this| {
1184+
parse!(this, disambiguator);
1185+
let name = parse!(this, ident);
1186+
this.print(name)?;
1187+
this.print(": ")?;
1188+
this.print_const(true)
1189+
},
1190+
", ",
1191+
)?;
1192+
self.print(" }")?;
1193+
}
1194+
_ => invalid!(self),
1195+
}
1196+
}
10371197
b'B' => {
1038-
self.print_backref(Self::print_const)?;
1198+
self.print_backref(|this| this.print_const(in_value))?;
10391199
}
10401200
_ => invalid!(self),
10411201
}
10421202

1203+
if opened_brace {
1204+
self.print("}")?;
1205+
}
1206+
10431207
self.pop_depth();
10441208
Ok(())
10451209
}
@@ -1066,6 +1230,13 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
10661230

10671231
Ok(())
10681232
}
1233+
1234+
fn print_const_str_literal(&mut self) -> fmt::Result {
1235+
match parse!(self, hex_nibbles).try_parse_str_chars() {
1236+
Some(chars) => self.print_quoted_escaped_chars('"', chars),
1237+
None => invalid!(self),
1238+
}
1239+
}
10691240
}
10701241

10711242
#[cfg(test)]
@@ -1164,6 +1335,92 @@ mod tests {
11641335
t_const!("c2202_", "'∂'");
11651336
}
11661337

1338+
#[test]
1339+
fn demangle_const_str() {
1340+
t_const!("e616263_", "{*\"abc\"}");
1341+
t_const!("e27_", r#"{*"'"}"#);
1342+
t_const!("e090a_", "{*\"\\t\\n\"}");
1343+
t_const!("ee28882c3bc_", "{*\"∂ü\"}");
1344+
t_const!(
1345+
"ee183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\
1346+
e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_",
1347+
"{*\"საჭმელად_გემრიელი_სადილი\"}"
1348+
);
1349+
t_const!(
1350+
"ef09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\
1351+
95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_",
1352+
"{*\"🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\"}"
1353+
);
1354+
}
1355+
1356+
// NOTE(eddyb) this uses the same strings as `demangle_const_str` and should
1357+
// be kept in sync with it - while a macro could be used to generate both
1358+
// `str` and `&str` tests, from a single list of strings, this seems clearer.
1359+
#[test]
1360+
fn demangle_const_ref_str() {
1361+
t_const!("Re616263_", "\"abc\"");
1362+
t_const!("Re27_", r#""'""#);
1363+
t_const!("Re090a_", "\"\\t\\n\"");
1364+
t_const!("Ree28882c3bc_", "\"∂ü\"");
1365+
t_const!(
1366+
"Ree183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\
1367+
e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_",
1368+
"\"საჭმელად_გემრიელი_სადილი\""
1369+
);
1370+
t_const!(
1371+
"Ref09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\
1372+
95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_",
1373+
"\"🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\""
1374+
);
1375+
}
1376+
1377+
#[test]
1378+
fn demangle_const_ref() {
1379+
t_const!("Rp", "{&_}");
1380+
t_const!("Rh7b_", "{&123}");
1381+
t_const!("Rb0_", "{&false}");
1382+
t_const!("Rc58_", "{&'X'}");
1383+
t_const!("RRRh0_", "{&&&0}");
1384+
t_const!("RRRe_", "{&&\"\"}");
1385+
t_const!("QAE", "{&mut []}");
1386+
}
1387+
1388+
#[test]
1389+
fn demangle_const_array() {
1390+
t_const!("AE", "{[]}");
1391+
t_const!("Aj0_E", "{[0]}");
1392+
t_const!("Ah1_h2_h3_E", "{[1, 2, 3]}");
1393+
t_const!("ARe61_Re62_Re63_E", "{[\"a\", \"b\", \"c\"]}");
1394+
t_const!("AAh1_h2_EAh3_h4_EE", "{[[1, 2], [3, 4]]}");
1395+
}
1396+
1397+
#[test]
1398+
fn demangle_const_tuple() {
1399+
t_const!("TE", "{()}");
1400+
t_const!("Tj0_E", "{(0,)}");
1401+
t_const!("Th1_b0_E", "{(1, false)}");
1402+
t_const!(
1403+
"TRe616263_c78_RAh1_h2_h3_EE",
1404+
"{(\"abc\", 'x', &[1, 2, 3])}"
1405+
);
1406+
}
1407+
1408+
#[test]
1409+
fn demangle_const_adt() {
1410+
t_const!(
1411+
"VNvINtNtC4core6option6OptionjE4NoneU",
1412+
"{core::option::Option::<usize>::None}"
1413+
);
1414+
t_const!(
1415+
"VNvINtNtC4core6option6OptionjE4SomeTj0_E",
1416+
"{core::option::Option::<usize>::Some(0)}"
1417+
);
1418+
t_const!(
1419+
"VNtC3foo3BarS1sRe616263_2chc78_5sliceRAh1_h2_h3_EE",
1420+
"{foo::Bar { s: \"abc\", ch: 'x', slice: &[1, 2, 3] }}"
1421+
);
1422+
}
1423+
11671424
#[test]
11681425
fn demangle_exponential_explosion() {
11691426
// NOTE(eddyb) because of the prefix added by `t_nohash_type!` is

0 commit comments

Comments
 (0)