@@ -20,6 +20,7 @@ use std::fmt;
20
20
use std:: str;
21
21
use std:: cmp:: { PartialEq , Ordering , PartialOrd , Ord } ;
22
22
use std:: hash:: { Hash , Hasher } ;
23
+ use std:: num:: NonZeroU32 ;
23
24
24
25
use hygiene:: SyntaxContext ;
25
26
use { Span , DUMMY_SP , GLOBALS } ;
@@ -143,9 +144,10 @@ impl Decodable for Ident {
143
144
}
144
145
}
145
146
146
- /// A symbol is an interned or gensymed string.
147
+ /// A symbol is an interned or gensymed string. It's a NonZeroU32 so that
148
+ /// Option<Symbol> only takes up 4 bytes.
147
149
#[ derive( Clone , Copy , PartialEq , Eq , PartialOrd , Ord , Hash ) ]
148
- pub struct Symbol ( u32 ) ;
150
+ pub struct Symbol ( NonZeroU32 ) ;
149
151
150
152
// The interner is pointed to by a thread local value which is only set on the main thread
151
153
// with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
@@ -188,8 +190,9 @@ impl Symbol {
188
190
} )
189
191
}
190
192
193
+ #[ inline( always) ]
191
194
pub fn as_u32 ( self ) -> u32 {
192
- self . 0
195
+ self . 0 . get ( )
193
196
}
194
197
}
195
198
@@ -228,12 +231,36 @@ impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
228
231
}
229
232
}
230
233
234
+ /// Symbols (which are 1-indexed) index into this (which is 0-indexed
235
+ /// internally). The methods handle the index conversions.
236
+ #[ derive( Default ) ]
237
+ pub struct SymbolVec ( Vec < & ' static str > ) ;
238
+
239
+ impl SymbolVec {
240
+ #[ inline]
241
+ fn new_symbol ( & mut self , s : & ' static str ) -> Symbol {
242
+ self . 0 . push ( s) ;
243
+ // self.0.len() cannot be zero because of the push above.
244
+ Symbol ( unsafe { NonZeroU32 :: new_unchecked ( self . 0 . len ( ) as u32 ) } )
245
+ }
246
+
247
+ #[ inline]
248
+ fn get ( & self , sym : Symbol ) -> Option < & & ' static str > {
249
+ self . 0 . get ( sym. 0 . get ( ) as usize - 1 )
250
+ }
251
+
252
+ #[ inline]
253
+ fn contains ( & self , sym : Symbol ) -> bool {
254
+ sym. 0 . get ( ) as usize <= self . 0 . len ( )
255
+ }
256
+ }
257
+
231
258
// The `&'static str`s in this type actually point into the arena.
232
259
#[ derive( Default ) ]
233
260
pub struct Interner {
234
261
arena : DroplessArena ,
235
262
names : FxHashMap < & ' static str , Symbol > ,
236
- strings : Vec < & ' static str > ,
263
+ strings : SymbolVec ,
237
264
gensyms : Vec < Symbol > ,
238
265
}
239
266
@@ -243,9 +270,8 @@ impl Interner {
243
270
for & string in init {
244
271
if string == "" {
245
272
// We can't allocate empty strings in the arena, so handle this here.
246
- let name = Symbol ( this. strings . len ( ) as u32 ) ;
273
+ let name = this. strings . new_symbol ( "" ) ;
247
274
this. names . insert ( "" , name) ;
248
- this. strings . push ( "" ) ;
249
275
} else {
250
276
this. intern ( string) ;
251
277
}
@@ -258,8 +284,6 @@ impl Interner {
258
284
return name;
259
285
}
260
286
261
- let name = Symbol ( self . strings . len ( ) as u32 ) ;
262
-
263
287
// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
264
288
// UTF-8.
265
289
let string: & str = unsafe {
@@ -270,16 +294,17 @@ impl Interner {
270
294
let string: & ' static str = unsafe {
271
295
& * ( string as * const str )
272
296
} ;
273
- self . strings . push ( string) ;
297
+
298
+ let name = self . strings . new_symbol ( string) ;
274
299
self . names . insert ( string, name) ;
275
300
name
276
301
}
277
302
278
303
pub fn interned ( & self , symbol : Symbol ) -> Symbol {
279
- if ( symbol . 0 as usize ) < self . strings . len ( ) {
304
+ if self . strings . contains ( symbol ) {
280
305
symbol
281
306
} else {
282
- self . interned ( self . gensyms [ ( !0 - symbol. 0 ) as usize ] )
307
+ self . interned ( self . gensyms [ ( !0 - symbol. as_u32 ( ) ) as usize ] )
283
308
}
284
309
}
285
310
@@ -290,17 +315,17 @@ impl Interner {
290
315
291
316
fn gensymed ( & mut self , symbol : Symbol ) -> Symbol {
292
317
self . gensyms . push ( symbol) ;
293
- Symbol ( !0 - self . gensyms . len ( ) as u32 + 1 )
318
+ Symbol ( NonZeroU32 :: new ( !0 - self . gensyms . len ( ) as u32 + 1 ) . unwrap ( ) )
294
319
}
295
320
296
321
fn is_gensymed ( & mut self , symbol : Symbol ) -> bool {
297
- symbol . 0 as usize >= self . strings . len ( )
322
+ ! self . strings . contains ( symbol )
298
323
}
299
324
300
325
pub fn get ( & self , symbol : Symbol ) -> & str {
301
- match self . strings . get ( symbol. 0 as usize ) {
326
+ match self . strings . get ( symbol) {
302
327
Some ( string) => string,
303
- None => self . get ( self . gensyms [ ( !0 - symbol. 0 ) as usize ] ) ,
328
+ None => self . get ( self . gensyms [ ( !0 - symbol. as_u32 ( ) ) as usize ] ) ,
304
329
}
305
330
}
306
331
}
@@ -313,6 +338,8 @@ macro_rules! declare_keywords {(
313
338
) => {
314
339
pub mod keywords {
315
340
use super :: { Symbol , Ident } ;
341
+ use std:: num:: NonZeroU32 ;
342
+
316
343
#[ derive( Clone , Copy , PartialEq , Eq ) ]
317
344
pub struct Keyword {
318
345
ident: Ident ,
@@ -321,10 +348,17 @@ macro_rules! declare_keywords {(
321
348
#[ inline] pub fn ident( self ) -> Ident { self . ident }
322
349
#[ inline] pub fn name( self ) -> Symbol { self . ident. name }
323
350
}
351
+ // We must use `NonZeroU32::new_unchecked` below because it's `const`
352
+ // and `NonZeroU32::new` is not. So we static_assert the non-zeroness
353
+ // here.
354
+ mod asserts {
355
+ $( static_assert!( $konst: $index > 0u32 ) ; ) *
356
+ }
324
357
$(
325
358
#[ allow( non_upper_case_globals) ]
326
359
pub const $konst: Keyword = Keyword {
327
- ident: Ident :: with_empty_ctxt( super :: Symbol ( $index) )
360
+ ident: Ident :: with_empty_ctxt(
361
+ super :: Symbol ( unsafe { NonZeroU32 :: new_unchecked( $index) } ) )
328
362
} ;
329
363
) *
330
364
@@ -355,79 +389,80 @@ macro_rules! declare_keywords {(
355
389
declare_keywords ! {
356
390
// Special reserved identifiers used internally for elided lifetimes,
357
391
// unnamed method parameters, crate root module, error recovery etc.
358
- ( 0 , Invalid , "" )
359
- ( 1 , PathRoot , "{{root}}" )
360
- ( 2 , DollarCrate , "$crate" )
361
- ( 3 , Underscore , "_" )
392
+ // (0 cannot be used because Symbol uses NonZeroU32)
393
+ ( 1 , Invalid , "" )
394
+ ( 2 , PathRoot , "{{root}}" )
395
+ ( 3 , DollarCrate , "$crate" )
396
+ ( 4 , Underscore , "_" )
362
397
363
398
// Keywords that are used in stable Rust.
364
- ( 4 , As , "as" )
365
- ( 5 , Box , "box" )
366
- ( 6 , Break , "break" )
367
- ( 7 , Const , "const" )
368
- ( 8 , Continue , "continue" )
369
- ( 9 , Crate , "crate" )
370
- ( 10 , Else , "else" )
371
- ( 11 , Enum , "enum" )
372
- ( 12 , Extern , "extern" )
373
- ( 13 , False , "false" )
374
- ( 14 , Fn , "fn" )
375
- ( 15 , For , "for" )
376
- ( 16 , If , "if" )
377
- ( 17 , Impl , "impl" )
378
- ( 18 , In , "in" )
379
- ( 19 , Let , "let" )
380
- ( 20 , Loop , "loop" )
381
- ( 21 , Match , "match" )
382
- ( 22 , Mod , "mod" )
383
- ( 23 , Move , "move" )
384
- ( 24 , Mut , "mut" )
385
- ( 25 , Pub , "pub" )
386
- ( 26 , Ref , "ref" )
387
- ( 27 , Return , "return" )
388
- ( 28 , SelfLower , "self" )
389
- ( 29 , SelfUpper , "Self" )
390
- ( 30 , Static , "static" )
391
- ( 31 , Struct , "struct" )
392
- ( 32 , Super , "super" )
393
- ( 33 , Trait , "trait" )
394
- ( 34 , True , "true" )
395
- ( 35 , Type , "type" )
396
- ( 36 , Unsafe , "unsafe" )
397
- ( 37 , Use , "use" )
398
- ( 38 , Where , "where" )
399
- ( 39 , While , "while" )
399
+ ( 5 , As , "as" )
400
+ ( 6 , Box , "box" )
401
+ ( 7 , Break , "break" )
402
+ ( 8 , Const , "const" )
403
+ ( 9 , Continue , "continue" )
404
+ ( 10 , Crate , "crate" )
405
+ ( 11 , Else , "else" )
406
+ ( 12 , Enum , "enum" )
407
+ ( 13 , Extern , "extern" )
408
+ ( 14 , False , "false" )
409
+ ( 15 , Fn , "fn" )
410
+ ( 16 , For , "for" )
411
+ ( 17 , If , "if" )
412
+ ( 18 , Impl , "impl" )
413
+ ( 19 , In , "in" )
414
+ ( 20 , Let , "let" )
415
+ ( 21 , Loop , "loop" )
416
+ ( 22 , Match , "match" )
417
+ ( 23 , Mod , "mod" )
418
+ ( 24 , Move , "move" )
419
+ ( 25 , Mut , "mut" )
420
+ ( 26 , Pub , "pub" )
421
+ ( 27 , Ref , "ref" )
422
+ ( 28 , Return , "return" )
423
+ ( 29 , SelfLower , "self" )
424
+ ( 30 , SelfUpper , "Self" )
425
+ ( 31 , Static , "static" )
426
+ ( 32 , Struct , "struct" )
427
+ ( 33 , Super , "super" )
428
+ ( 34 , Trait , "trait" )
429
+ ( 35 , True , "true" )
430
+ ( 36 , Type , "type" )
431
+ ( 37 , Unsafe , "unsafe" )
432
+ ( 38 , Use , "use" )
433
+ ( 39 , Where , "where" )
434
+ ( 40 , While , "while" )
400
435
401
436
// Keywords that are used in unstable Rust or reserved for future use.
402
- ( 40 , Abstract , "abstract" )
403
- ( 41 , Become , "become" )
404
- ( 42 , Do , "do" )
405
- ( 43 , Final , "final" )
406
- ( 44 , Macro , "macro" )
407
- ( 45 , Override , "override" )
408
- ( 46 , Priv , "priv" )
409
- ( 47 , Typeof , "typeof" )
410
- ( 48 , Unsized , "unsized" )
411
- ( 49 , Virtual , "virtual" )
412
- ( 50 , Yield , "yield" )
437
+ ( 41 , Abstract , "abstract" )
438
+ ( 42 , Become , "become" )
439
+ ( 43 , Do , "do" )
440
+ ( 44 , Final , "final" )
441
+ ( 45 , Macro , "macro" )
442
+ ( 46 , Override , "override" )
443
+ ( 47 , Priv , "priv" )
444
+ ( 48 , Typeof , "typeof" )
445
+ ( 49 , Unsized , "unsized" )
446
+ ( 50 , Virtual , "virtual" )
447
+ ( 51 , Yield , "yield" )
413
448
414
449
// Edition-specific keywords that are used in stable Rust.
415
- ( 51 , Dyn , "dyn" ) // >= 2018 Edition only
450
+ ( 52 , Dyn , "dyn" ) // >= 2018 Edition only
416
451
417
452
// Edition-specific keywords that are used in unstable Rust or reserved for future use.
418
- ( 52 , Async , "async" ) // >= 2018 Edition only
419
- ( 53 , Try , "try" ) // >= 2018 Edition only
453
+ ( 53 , Async , "async" ) // >= 2018 Edition only
454
+ ( 54 , Try , "try" ) // >= 2018 Edition only
420
455
421
456
// Special lifetime names
422
- ( 54 , UnderscoreLifetime , "'_" )
423
- ( 55 , StaticLifetime , "'static" )
457
+ ( 55 , UnderscoreLifetime , "'_" )
458
+ ( 56 , StaticLifetime , "'static" )
424
459
425
460
// Weak keywords, have special meaning only in specific contexts.
426
- ( 56 , Auto , "auto" )
427
- ( 57 , Catch , "catch" )
428
- ( 58 , Default , "default" )
429
- ( 59 , Existential , "existential" )
430
- ( 60 , Union , "union" )
461
+ ( 57 , Auto , "auto" )
462
+ ( 58 , Catch , "catch" )
463
+ ( 59 , Default , "default" )
464
+ ( 60 , Existential , "existential" )
465
+ ( 61 , Union , "union" )
431
466
}
432
467
433
468
impl Symbol {
@@ -708,20 +743,22 @@ mod tests {
708
743
#[ test]
709
744
fn interner_tests ( ) {
710
745
let mut i: Interner = Interner :: default ( ) ;
711
- // first one is zero:
712
- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
746
+ let nz = |n| NonZeroU32 :: new ( n) . unwrap ( ) ;
747
+
748
+ // first one is 1:
749
+ assert_eq ! ( i. intern( "dog" ) , Symbol ( nz( 1 ) ) ) ;
713
750
// re-use gets the same entry:
714
- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
715
- // different string gets a different # :
716
- assert_eq ! ( i. intern( "cat" ) , Symbol ( 1 ) ) ;
717
- assert_eq ! ( i. intern( "cat" ) , Symbol ( 1 ) ) ;
718
- // dog is still at zero
719
- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
720
- assert_eq ! ( i. gensym( "zebra" ) , Symbol ( 4294967295 ) ) ;
721
- // gensym of same string gets new number :
722
- assert_eq ! ( i. gensym( "zebra" ) , Symbol ( 4294967294 ) ) ;
751
+ assert_eq ! ( i. intern( "dog" ) , Symbol ( nz ( 1 ) ) ) ;
752
+ // different string gets a different number :
753
+ assert_eq ! ( i. intern( "cat" ) , Symbol ( nz ( 2 ) ) ) ;
754
+ assert_eq ! ( i. intern( "cat" ) , Symbol ( nz ( 2 ) ) ) ;
755
+ // dog is still at 1
756
+ assert_eq ! ( i. intern( "dog" ) , Symbol ( nz ( 1 ) ) ) ;
757
+ assert_eq ! ( i. gensym( "zebra" ) , Symbol ( nz ( 4294967295 ) ) ) ;
758
+ // gensym of same string gets new number:
759
+ assert_eq ! ( i. gensym( "zebra" ) , Symbol ( nz ( 4294967294 ) ) ) ;
723
760
// gensym of *existing* string gets new number:
724
- assert_eq ! ( i. gensym( "dog" ) , Symbol ( 4294967293 ) ) ;
761
+ assert_eq ! ( i. gensym( "dog" ) , Symbol ( nz ( 4294967293 ) ) ) ;
725
762
}
726
763
727
764
#[ test]
0 commit comments