1
1
use core:: convert:: TryFrom ;
2
- use core:: { char, fmt, iter, mem} ;
2
+ use core:: { char, fmt, iter, mem, str } ;
3
3
4
4
#[ allow( unused_macros) ]
5
5
macro_rules! write {
@@ -287,6 +287,84 @@ impl<'s> HexNibbles<'s> {
287
287
}
288
288
Some ( v)
289
289
}
290
+
291
+ /// Decode a UTF-8 byte sequence (with each byte using a pair of nibbles)
292
+ /// into individual `char`s, returning `None` for invalid UTF-8.
293
+ fn try_parse_str_chars ( & self ) -> Option < impl Iterator < Item = char > + ' s > {
294
+ if self . nibbles . len ( ) % 2 != 0 {
295
+ return None ;
296
+ }
297
+
298
+ // FIXME(eddyb) use `array_chunks` instead, when that becomes stable.
299
+ let mut bytes = self
300
+ . nibbles
301
+ . as_bytes ( )
302
+ . chunks_exact ( 2 )
303
+ . map ( |slice| match slice {
304
+ [ a, b] => [ a, b] ,
305
+ _ => unreachable ! ( ) ,
306
+ } )
307
+ . map ( |[ & hi, & lo] | {
308
+ let half = |nibble : u8 | ( nibble as char ) . to_digit ( 16 ) . unwrap ( ) as u8 ;
309
+ ( half ( hi) << 4 ) | half ( lo)
310
+ } ) ;
311
+
312
+ let chars = iter:: from_fn ( move || {
313
+ // As long as there are any bytes left, there's at least one more
314
+ // UTF-8-encoded `char` to decode (or the possibility of error).
315
+ bytes. next ( ) . map ( |first_byte| -> Result < char , ( ) > {
316
+ // FIXME(eddyb) this `enum` and `fn` should be somewhere in `core`.
317
+ enum Utf8FirstByteError {
318
+ ContinuationByte ,
319
+ TooLong ,
320
+ }
321
+ fn utf8_len_from_first_byte ( byte : u8 ) -> Result < usize , Utf8FirstByteError > {
322
+ match byte {
323
+ 0x00 ..=0x7f => Ok ( 1 ) ,
324
+ 0x80 ..=0xbf => Err ( Utf8FirstByteError :: ContinuationByte ) ,
325
+ 0xc0 ..=0xdf => Ok ( 2 ) ,
326
+ 0xe0 ..=0xef => Ok ( 3 ) ,
327
+ 0xf0 ..=0xf7 => Ok ( 4 ) ,
328
+ 0xf8 ..=0xff => Err ( Utf8FirstByteError :: TooLong ) ,
329
+ }
330
+ }
331
+
332
+ // Collect the appropriate amount of bytes (up to 4), according
333
+ // to the UTF-8 length implied by the first byte.
334
+ let utf8_len = utf8_len_from_first_byte ( first_byte) . map_err ( |_| ( ) ) ?;
335
+ let utf8 = & mut [ first_byte, 0 , 0 , 0 ] [ ..utf8_len] ;
336
+ for i in 1 ..utf8_len {
337
+ utf8[ i] = bytes. next ( ) . ok_or ( ( ) ) ?;
338
+ }
339
+
340
+ // Fully validate the UTF-8 sequence.
341
+ let s = str:: from_utf8 ( utf8) . map_err ( |_| ( ) ) ?;
342
+
343
+ // Since we included exactly one UTF-8 sequence, and validation
344
+ // succeeded, `str::chars` should return exactly one `char`.
345
+ let mut chars = s. chars ( ) ;
346
+ match ( chars. next ( ) , chars. next ( ) ) {
347
+ ( Some ( c) , None ) => Ok ( c) ,
348
+ _ => unreachable ! (
349
+ "str::from_utf8({:?}) = {:?} was expected to have 1 char, \
350
+ but {} chars were found",
351
+ utf8,
352
+ s,
353
+ s. chars( ) . count( )
354
+ ) ,
355
+ }
356
+ } )
357
+ } ) ;
358
+
359
+ // HACK(eddyb) doing a separate validation iteration like this might be
360
+ // wasteful, but it's easier to avoid starting to print a string literal
361
+ // in the first place, than to abort it mid-string.
362
+ if chars. clone ( ) . any ( |r| r. is_err ( ) ) {
363
+ None
364
+ } else {
365
+ Some ( chars. map ( Result :: unwrap) )
366
+ }
367
+ }
290
368
}
291
369
292
370
fn basic_type ( tag : u8 ) -> Option < & ' static str > {
@@ -815,7 +893,7 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
815
893
let lt = parse ! ( self , integer_62) ;
816
894
self . print_lifetime_from_index ( lt)
817
895
} else if self . eat ( b'K' ) {
818
- self . print_const ( )
896
+ self . print_const ( false )
819
897
} else {
820
898
self . print_type ( )
821
899
}
@@ -861,7 +939,7 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
861
939
self . print_type ( ) ?;
862
940
if tag == b'A' {
863
941
self . print ( "; " ) ?;
864
- self . print_const ( ) ?;
942
+ self . print_const ( true ) ?;
865
943
}
866
944
self . print ( "]" ) ?;
867
945
}
@@ -1001,11 +1079,28 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
1001
1079
Ok ( ( ) )
1002
1080
}
1003
1081
1004
- fn print_const ( & mut self ) -> fmt:: Result {
1082
+ fn print_const ( & mut self , in_value : bool ) -> fmt:: Result {
1005
1083
let tag = parse ! ( self , next) ;
1006
1084
1007
1085
parse ! ( self , push_depth) ;
1008
1086
1087
+ // Only literals (and the names of `const` generic parameters, but they
1088
+ // don't get mangled at all), can appear in generic argument position
1089
+ // without any disambiguation, all other expressions require braces.
1090
+ // To avoid duplicating the mapping between `tag` and what syntax gets
1091
+ // used (especially any special-casing), every case that needs braces
1092
+ // has to call `open_brace(self)?` (and the closing brace is automatic).
1093
+ let mut opened_brace = false ;
1094
+ let mut open_brace_if_outside_expr = |this : & mut Self | {
1095
+ // If this expression is nested in another, braces aren't required.
1096
+ if in_value {
1097
+ return Ok ( ( ) ) ;
1098
+ }
1099
+
1100
+ opened_brace = true ;
1101
+ this. print ( "{" )
1102
+ } ;
1103
+
1009
1104
match tag {
1010
1105
b'p' => self . print ( "_" ) ?,
1011
1106
@@ -1033,13 +1128,82 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
1033
1128
None => invalid ! ( self ) ,
1034
1129
}
1035
1130
}
1131
+ b'e' => {
1132
+ // NOTE(eddyb) a string literal `"..."` has type `&str`, so
1133
+ // to get back the type `str`, `*"..."` syntax is needed
1134
+ // (even if that may not be valid in Rust itself).
1135
+ open_brace_if_outside_expr ( self ) ?;
1136
+ self . print ( "*" ) ?;
1137
+
1138
+ self . print_const_str_literal ( ) ?;
1139
+ }
1036
1140
1141
+ b'R' | b'Q' => {
1142
+ // NOTE(eddyb) this prints `"..."` instead of `&*"..."`, which
1143
+ // is what `Re..._` would imply (see comment for `str` above).
1144
+ if tag == b'R' && self . eat ( b'e' ) {
1145
+ self . print_const_str_literal ( ) ?;
1146
+ } else {
1147
+ open_brace_if_outside_expr ( self ) ?;
1148
+ self . print ( "&" ) ?;
1149
+ if tag != b'R' {
1150
+ self . print ( "mut " ) ?;
1151
+ }
1152
+ self . print_const ( true ) ?;
1153
+ }
1154
+ }
1155
+ b'A' => {
1156
+ open_brace_if_outside_expr ( self ) ?;
1157
+ self . print ( "[" ) ?;
1158
+ self . print_sep_list ( |this| this. print_const ( true ) , ", " ) ?;
1159
+ self . print ( "]" ) ?;
1160
+ }
1161
+ b'T' => {
1162
+ open_brace_if_outside_expr ( self ) ?;
1163
+ self . print ( "(" ) ?;
1164
+ let count = self . print_sep_list ( |this| this. print_const ( true ) , ", " ) ?;
1165
+ if count == 1 {
1166
+ self . print ( "," ) ?;
1167
+ }
1168
+ self . print ( ")" ) ?;
1169
+ }
1170
+ b'V' => {
1171
+ open_brace_if_outside_expr ( self ) ?;
1172
+ self . print_path ( true ) ?;
1173
+ match parse ! ( self , next) {
1174
+ b'U' => { }
1175
+ b'T' => {
1176
+ self . print ( "(" ) ?;
1177
+ self . print_sep_list ( |this| this. print_const ( true ) , ", " ) ?;
1178
+ self . print ( ")" ) ?;
1179
+ }
1180
+ b'S' => {
1181
+ self . print ( " { " ) ?;
1182
+ self . print_sep_list (
1183
+ |this| {
1184
+ parse ! ( this, disambiguator) ;
1185
+ let name = parse ! ( this, ident) ;
1186
+ this. print ( name) ?;
1187
+ this. print ( ": " ) ?;
1188
+ this. print_const ( true )
1189
+ } ,
1190
+ ", " ,
1191
+ ) ?;
1192
+ self . print ( " }" ) ?;
1193
+ }
1194
+ _ => invalid ! ( self ) ,
1195
+ }
1196
+ }
1037
1197
b'B' => {
1038
- self . print_backref ( Self :: print_const) ?;
1198
+ self . print_backref ( |this| this . print_const ( in_value ) ) ?;
1039
1199
}
1040
1200
_ => invalid ! ( self ) ,
1041
1201
}
1042
1202
1203
+ if opened_brace {
1204
+ self . print ( "}" ) ?;
1205
+ }
1206
+
1043
1207
self . pop_depth ( ) ;
1044
1208
Ok ( ( ) )
1045
1209
}
@@ -1066,6 +1230,13 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
1066
1230
1067
1231
Ok ( ( ) )
1068
1232
}
1233
+
1234
+ fn print_const_str_literal ( & mut self ) -> fmt:: Result {
1235
+ match parse ! ( self , hex_nibbles) . try_parse_str_chars ( ) {
1236
+ Some ( chars) => self . print_quoted_escaped_chars ( '"' , chars) ,
1237
+ None => invalid ! ( self ) ,
1238
+ }
1239
+ }
1069
1240
}
1070
1241
1071
1242
#[ cfg( test) ]
@@ -1164,6 +1335,92 @@ mod tests {
1164
1335
t_const ! ( "c2202_" , "'∂'" ) ;
1165
1336
}
1166
1337
1338
+ #[ test]
1339
+ fn demangle_const_str ( ) {
1340
+ t_const ! ( "e616263_" , "{*\" abc\" }" ) ;
1341
+ t_const ! ( "e27_" , r#"{*"'"}"# ) ;
1342
+ t_const ! ( "e090a_" , "{*\" \\ t\\ n\" }" ) ;
1343
+ t_const ! ( "ee28882c3bc_" , "{*\" ∂ü\" }" ) ;
1344
+ t_const ! (
1345
+ "ee183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\
1346
+ e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_",
1347
+ "{*\" საჭმელად_გემრიელი_სადილი\" }"
1348
+ ) ;
1349
+ t_const ! (
1350
+ "ef09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\
1351
+ 95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_",
1352
+ "{*\" 🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\" }"
1353
+ ) ;
1354
+ }
1355
+
1356
+ // NOTE(eddyb) this uses the same strings as `demangle_const_str` and should
1357
+ // be kept in sync with it - while a macro could be used to generate both
1358
+ // `str` and `&str` tests, from a single list of strings, this seems clearer.
1359
+ #[ test]
1360
+ fn demangle_const_ref_str ( ) {
1361
+ t_const ! ( "Re616263_" , "\" abc\" " ) ;
1362
+ t_const ! ( "Re27_" , r#""'""# ) ;
1363
+ t_const ! ( "Re090a_" , "\" \\ t\\ n\" " ) ;
1364
+ t_const ! ( "Ree28882c3bc_" , "\" ∂ü\" " ) ;
1365
+ t_const ! (
1366
+ "Ree183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\
1367
+ e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_",
1368
+ "\" საჭმელად_გემრიელი_სადილი\" "
1369
+ ) ;
1370
+ t_const ! (
1371
+ "Ref09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\
1372
+ 95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_",
1373
+ "\" 🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\" "
1374
+ ) ;
1375
+ }
1376
+
1377
+ #[ test]
1378
+ fn demangle_const_ref ( ) {
1379
+ t_const ! ( "Rp" , "{&_}" ) ;
1380
+ t_const ! ( "Rh7b_" , "{&123}" ) ;
1381
+ t_const ! ( "Rb0_" , "{&false}" ) ;
1382
+ t_const ! ( "Rc58_" , "{&'X'}" ) ;
1383
+ t_const ! ( "RRRh0_" , "{&&&0}" ) ;
1384
+ t_const ! ( "RRRe_" , "{&&\" \" }" ) ;
1385
+ t_const ! ( "QAE" , "{&mut []}" ) ;
1386
+ }
1387
+
1388
+ #[ test]
1389
+ fn demangle_const_array ( ) {
1390
+ t_const ! ( "AE" , "{[]}" ) ;
1391
+ t_const ! ( "Aj0_E" , "{[0]}" ) ;
1392
+ t_const ! ( "Ah1_h2_h3_E" , "{[1, 2, 3]}" ) ;
1393
+ t_const ! ( "ARe61_Re62_Re63_E" , "{[\" a\" , \" b\" , \" c\" ]}" ) ;
1394
+ t_const ! ( "AAh1_h2_EAh3_h4_EE" , "{[[1, 2], [3, 4]]}" ) ;
1395
+ }
1396
+
1397
+ #[ test]
1398
+ fn demangle_const_tuple ( ) {
1399
+ t_const ! ( "TE" , "{()}" ) ;
1400
+ t_const ! ( "Tj0_E" , "{(0,)}" ) ;
1401
+ t_const ! ( "Th1_b0_E" , "{(1, false)}" ) ;
1402
+ t_const ! (
1403
+ "TRe616263_c78_RAh1_h2_h3_EE" ,
1404
+ "{(\" abc\" , 'x', &[1, 2, 3])}"
1405
+ ) ;
1406
+ }
1407
+
1408
+ #[ test]
1409
+ fn demangle_const_adt ( ) {
1410
+ t_const ! (
1411
+ "VNvINtNtC4core6option6OptionjE4NoneU" ,
1412
+ "{core::option::Option::<usize>::None}"
1413
+ ) ;
1414
+ t_const ! (
1415
+ "VNvINtNtC4core6option6OptionjE4SomeTj0_E" ,
1416
+ "{core::option::Option::<usize>::Some(0)}"
1417
+ ) ;
1418
+ t_const ! (
1419
+ "VNtC3foo3BarS1sRe616263_2chc78_5sliceRAh1_h2_h3_EE" ,
1420
+ "{foo::Bar { s: \" abc\" , ch: 'x', slice: &[1, 2, 3] }}"
1421
+ ) ;
1422
+ }
1423
+
1167
1424
#[ test]
1168
1425
fn demangle_exponential_explosion ( ) {
1169
1426
// NOTE(eddyb) because of the prefix added by `t_nohash_type!` is
0 commit comments