Skip to content

Commit 435246b

Browse files
authored
Auto merge of #37640 - michaelwoerister:llvm-type-names, r=brson
trans: Make type names in LLVM IR independent of crate-nums and source locations. UPDATE: This PR makes the type names we assign in LLVM IR independent of the type definition's location in the source code and the order in which extern crates are loaded. The new type names look like the old ones, except for closures and the `<crate-num>.` prefix being gone. Resolution of name clashes (e.g. of the same type in different crate versions) is left to LLVM (which will just append `.<counter>` to the name). ORIGINAL TEXT: This PR makes the type names we assign in LLVM IR independent of the type definition's location in the source code. Before, the type of closures contained the closures definition location. The new naming scheme follows the same pattern that we already use for symbol names: We have a human readable prefix followed by a hash that makes sure we don't have any collisions. Here is an example of what the new names look like: ```rust // prog.rs - example program mod mod1 { pub struct Struct<T>(pub T); } fn main() { use mod1::Struct; let _s = Struct(0u32); let _t = Struct('h'); let _x = Struct(Struct(0i32)); } ``` Old: ```llvm %"mod1::Struct<u32>" = type { i32 } %"mod1::Struct<char>" = type { i32 } %"mod1::Struct<mod1::Struct<i32>>" = type { %"mod1::Struct<i32>" } %"mod1::Struct<i32>" = type { i32 } ``` New: ```llvm %"prog::mod1::Struct<u32>::ejDrT" = type { i32 } %"prog::mod1::Struct<char>::2eEAU" = type { i32 } %"prog::mod1::Struct<prog::mod1::Struct<i32>>::ehCqR" = type { %"prog::mod1::Struct<i32>::$fAo2" } %"prog::mod1::Struct<i32>::$fAo2" = type { i32 } ``` As you can see, the new names are slightly more verbose, but also more consistent. There is no difference now between a local type and one from another crate (before, non-local types where prefixed with `<crate-num>.` as in `2.std::mod1::Type1`). There is a bit of design space here. For example, we could leave off the crate name for local definitions (making names shorter but less consistent): ```llvm %"mod1::Struct<u32>::ejDrT" = type { i32 } %"mod1::Struct<char>::2eEAU" = type { i32 } %"mod1::Struct<mod1::Struct<i32>>::ehCqR" = type { %"mod1::Struct<i32>::$fAo2" } %"mod1::Struct<i32>::$fAo2" = type { i32 } ``` We could also put the hash in front, which might be more readable: ```llvm %"ejDrT.mod1::Struct<u32>" = type { i32 } %"2eEAU.mod1::Struct<char>" = type { i32 } %"ehCqR.mod1::Struct<mod1::Struct<i32>>" = type { %"$fAo2.mod1::Struct<i32>" } %"$fAo2.mod1::Struct<i32>" = type { i32 } ``` We could probably also get rid of the hash if we used full DefPaths and crate-nums (though I'm not yet a 100% sure if crate-nums could mess with incremental compilation). ```llvm %"mod1::Struct<u32>" = type { i32 } %"mod1::Struct<char>" = type { i32 } %"mod1::Struct<mod1::Struct<i32>>" = type { %"mod1::Struct<i32>" } %"mod1::Struct<i32>" = type { i32 } %"2.std::mod1::Type1" = type { ... } ``` I would prefer the solution with the hashes because it is nice and consistent conceptually, but visually it's admittedly a bit uglier. Maybe @rust-lang/compiler would like to bikeshed a little about this. On a related note: Has anyone ever tried if the LTO-linker will merge equal types with different names? (^ @brson, @alexcrichton ^) If not, that would be a reason to make type names more consistent.
2 parents 87b76a5 + 276f052 commit 435246b

File tree

10 files changed

+323
-272
lines changed

10 files changed

+323
-272
lines changed
+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
/// Convert unsigned integers into a string representation with some base.
12+
/// Bases up to and including 36 can be used for case-insensitive things.
13+
14+
use std::str;
15+
16+
pub const MAX_BASE: u64 = 64;
17+
const BASE_64: &'static [u8; MAX_BASE as usize] =
18+
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
19+
20+
#[inline]
21+
pub fn push_str(mut n: u64, base: u64, output: &mut String) {
22+
debug_assert!(base >= 2 && base <= MAX_BASE);
23+
let mut s = [0u8; 64];
24+
let mut index = 0;
25+
26+
loop {
27+
s[index] = BASE_64[(n % base) as usize];
28+
index += 1;
29+
n /= base;
30+
31+
if n == 0 {
32+
break;
33+
}
34+
}
35+
&mut s[0..index].reverse();
36+
output.push_str(str::from_utf8(&s[0..index]).unwrap());
37+
}
38+
39+
#[inline]
40+
pub fn encode(n: u64, base: u64) -> String {
41+
let mut s = String::with_capacity(13);
42+
push_str(n, base, &mut s);
43+
s
44+
}
45+
46+
#[test]
47+
fn test_encode() {
48+
fn test(n: u64, base: u64) {
49+
assert_eq!(Ok(n), u64::from_str_radix(&encode(n, base)[..], base as u32));
50+
}
51+
52+
for base in 2..37 {
53+
test(0, base);
54+
test(1, base);
55+
test(35, base);
56+
test(36, base);
57+
test(37, base);
58+
test(u64::max_value(), base);
59+
60+
for i in 0 .. 1_000 {
61+
test(i * 983, base);
62+
}
63+
}
64+
}

src/librustc_data_structures/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ extern crate libc;
4747
pub mod array_vec;
4848
pub mod accumulate_vec;
4949
pub mod small_vec;
50+
pub mod base_n;
5051
pub mod bitslice;
5152
pub mod blake2b;
5253
pub mod bitvec;

src/librustc_incremental/persist/fs.rs

+15-40
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ use rustc::hir::svh::Svh;
119119
use rustc::session::Session;
120120
use rustc::ty::TyCtxt;
121121
use rustc::util::fs as fs_util;
122-
use rustc_data_structures::flock;
122+
use rustc_data_structures::{flock, base_n};
123123
use rustc_data_structures::fx::{FxHashSet, FxHashMap};
124124

125125
use std::ffi::OsString;
@@ -135,6 +135,12 @@ const DEP_GRAPH_FILENAME: &'static str = "dep-graph.bin";
135135
const WORK_PRODUCTS_FILENAME: &'static str = "work-products.bin";
136136
const METADATA_HASHES_FILENAME: &'static str = "metadata.bin";
137137

138+
// We encode integers using the following base, so they are shorter than decimal
139+
// or hexadecimal numbers (we want short file and directory names). Since these
140+
// numbers will be used in file names, we choose an encoding that is not
141+
// case-sensitive (as opposed to base64, for example).
142+
const INT_ENCODE_BASE: u64 = 36;
143+
138144
pub fn dep_graph_path(sess: &Session) -> PathBuf {
139145
in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
140146
}
@@ -327,7 +333,7 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) {
327333
let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]);
328334

329335
// Append the svh
330-
new_sub_dir_name.push_str(&encode_base_36(svh.as_u64()));
336+
base_n::push_str(svh.as_u64(), INT_ENCODE_BASE, &mut new_sub_dir_name);
331337

332338
// Create the full path
333339
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
@@ -433,7 +439,8 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
433439

434440
let directory_name = format!("s-{}-{}-working",
435441
timestamp,
436-
encode_base_36(random_number as u64));
442+
base_n::encode(random_number as u64,
443+
INT_ENCODE_BASE));
437444
debug!("generate_session_dir_path: directory_name = {}", directory_name);
438445
let directory_path = crate_dir.join(directory_name);
439446
debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
@@ -562,27 +569,11 @@ fn extract_timestamp_from_session_dir(directory_name: &str)
562569
string_to_timestamp(&directory_name[dash_indices[0]+1 .. dash_indices[1]])
563570
}
564571

565-
const BASE_36: &'static [u8] = b"0123456789abcdefghijklmnopqrstuvwxyz";
566-
567-
fn encode_base_36(mut n: u64) -> String {
568-
let mut s = Vec::with_capacity(13);
569-
loop {
570-
s.push(BASE_36[(n % 36) as usize]);
571-
n /= 36;
572-
573-
if n == 0 {
574-
break;
575-
}
576-
}
577-
s.reverse();
578-
String::from_utf8(s).unwrap()
579-
}
580-
581572
fn timestamp_to_string(timestamp: SystemTime) -> String {
582573
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
583574
let micros = duration.as_secs() * 1_000_000 +
584575
(duration.subsec_nanos() as u64) / 1000;
585-
encode_base_36(micros)
576+
base_n::encode(micros, INT_ENCODE_BASE)
586577
}
587578

588579
fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
@@ -629,7 +620,7 @@ pub fn find_metadata_hashes_for(tcx: TyCtxt, cnum: CrateNum) -> Option<PathBuf>
629620
};
630621

631622
let target_svh = tcx.sess.cstore.crate_hash(cnum);
632-
let target_svh = encode_base_36(target_svh.as_u64());
623+
let target_svh = base_n::encode(target_svh.as_u64(), INT_ENCODE_BASE);
633624

634625
let sub_dir = find_metadata_hashes_iter(&target_svh, dir_entries.filter_map(|e| {
635626
e.ok().map(|e| e.file_name().to_string_lossy().into_owned())
@@ -677,7 +668,9 @@ fn crate_path(sess: &Session,
677668
let mut hasher = DefaultHasher::new();
678669
crate_disambiguator.hash(&mut hasher);
679670

680-
let crate_name = format!("{}-{}", crate_name, encode_base_36(hasher.finish()));
671+
let crate_name = format!("{}-{}",
672+
crate_name,
673+
base_n::encode(hasher.finish(), INT_ENCODE_BASE));
681674
incr_dir.join(crate_name)
682675
}
683676

@@ -1049,21 +1042,3 @@ fn test_find_metadata_hashes_iter()
10491042
None
10501043
);
10511044
}
1052-
1053-
#[test]
1054-
fn test_encode_base_36() {
1055-
fn test(n: u64) {
1056-
assert_eq!(Ok(n), u64::from_str_radix(&encode_base_36(n)[..], 36));
1057-
}
1058-
1059-
test(0);
1060-
test(1);
1061-
test(35);
1062-
test(36);
1063-
test(37);
1064-
test(u64::max_value());
1065-
1066-
for i in 0 .. 1_000 {
1067-
test(i * 983);
1068-
}
1069-
}

src/librustc_trans/base.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ use monomorphize::{self, Instance};
7474
use partitioning::{self, PartitioningStrategy, CodegenUnit};
7575
use symbol_map::SymbolMap;
7676
use symbol_names_test;
77-
use trans_item::TransItem;
77+
use trans_item::{TransItem, DefPathBasedNames};
7878
use type_::Type;
7979
use type_of;
8080
use value::Value;
@@ -1004,7 +1004,15 @@ impl<'blk, 'tcx> FunctionContext<'blk, 'tcx> {
10041004
}
10051005

10061006
pub fn trans_instance<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, instance: Instance<'tcx>) {
1007-
let _s = StatRecorder::new(ccx, ccx.tcx().item_path_str(instance.def));
1007+
let _s = if ccx.sess().trans_stats() {
1008+
let mut instance_name = String::new();
1009+
DefPathBasedNames::new(ccx.tcx(), true, true)
1010+
.push_def_path(instance.def, &mut instance_name);
1011+
Some(StatRecorder::new(ccx, instance_name))
1012+
} else {
1013+
None
1014+
};
1015+
10081016
// this is an info! to allow collecting monomorphization statistics
10091017
// and to allow finding the last function before LLVM aborts from
10101018
// release builds.

src/librustc_trans/collector.rs

+19-1
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ use glue::{self, DropGlueKind};
213213
use monomorphize::{self, Instance};
214214
use util::nodemap::{FxHashSet, FxHashMap, DefIdMap};
215215

216-
use trans_item::{TransItem, type_to_string, def_id_to_string};
216+
use trans_item::{TransItem, DefPathBasedNames};
217217

218218
#[derive(PartialEq, Eq, Hash, Clone, Copy, Debug)]
219219
pub enum TransItemCollectionMode {
@@ -1234,3 +1234,21 @@ fn visit_mir_and_promoted<'tcx, V: MirVisitor<'tcx>>(mut visitor: V, mir: &mir::
12341234
visitor.visit_mir(promoted);
12351235
}
12361236
}
1237+
1238+
fn def_id_to_string<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
1239+
def_id: DefId)
1240+
-> String {
1241+
let mut output = String::new();
1242+
let printer = DefPathBasedNames::new(tcx, false, false);
1243+
printer.push_def_path(def_id, &mut output);
1244+
output
1245+
}
1246+
1247+
fn type_to_string<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
1248+
ty: ty::Ty<'tcx>)
1249+
-> String {
1250+
let mut output = String::new();
1251+
let printer = DefPathBasedNames::new(tcx, false, false);
1252+
printer.push_type_name(ty, &mut output);
1253+
output
1254+
}

src/librustc_trans/context.rs

+6-17
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use monomorphize::Instance;
2626
use partitioning::CodegenUnit;
2727
use trans_item::TransItem;
2828
use type_::Type;
29+
use rustc_data_structures::base_n;
2930
use rustc::ty::subst::Substs;
3031
use rustc::ty::{self, Ty, TyCtxt};
3132
use session::config::NoDebugInfo;
@@ -700,22 +701,6 @@ impl<'b, 'tcx> CrateContext<'b, 'tcx> {
700701
&self.local_ccxs[self.index]
701702
}
702703

703-
/// Get a (possibly) different `CrateContext` from the same
704-
/// `SharedCrateContext`.
705-
pub fn rotate(&'b self) -> CrateContext<'b, 'tcx> {
706-
let (_, index) =
707-
self.local_ccxs
708-
.iter()
709-
.zip(0..self.local_ccxs.len())
710-
.min_by_key(|&(local_ccx, _idx)| local_ccx.n_llvm_insns.get())
711-
.unwrap();
712-
CrateContext {
713-
shared: self.shared,
714-
index: index,
715-
local_ccxs: &self.local_ccxs[..],
716-
}
717-
}
718-
719704
/// Either iterate over only `self`, or iterate over all `CrateContext`s in
720705
/// the `SharedCrateContext`. The iterator produces `(ccx, is_origin)`
721706
/// pairs, where `is_origin` is `true` if `ccx` is `self` and `false`
@@ -975,7 +960,11 @@ impl<'b, 'tcx> CrateContext<'b, 'tcx> {
975960
self.local().local_gen_sym_counter.set(idx + 1);
976961
// Include a '.' character, so there can be no accidental conflicts with
977962
// user defined names
978-
format!("{}.{}", prefix, idx)
963+
let mut name = String::with_capacity(prefix.len() + 6);
964+
name.push_str(prefix);
965+
name.push_str(".");
966+
base_n::push_str(idx as u64, base_n::MAX_BASE, &mut name);
967+
name
979968
}
980969
}
981970

0 commit comments

Comments
 (0)