Skip to content

Use a versioning scheme for bytecode objects in rlibs #16139

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 4, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 88 additions & 15 deletions src/librustc/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ use std::char;
use std::collections::HashSet;
use std::io::{fs, TempDir, Command};
use std::io;
use std::mem;
use std::ptr;
use std::str;
use std::string::String;
Expand All @@ -45,6 +46,36 @@ use syntax::attr::AttrMetaMethods;
use syntax::codemap::Span;
use syntax::parse::token;

// RLIB LLVM-BYTECODE OBJECT LAYOUT
// Version 1
// Bytes Data
// 0..10 "RUST_OBJECT" encoded in ASCII
// 11..14 format version as little-endian u32
// 15..22 size in bytes of deflate compressed LLVM bitcode as
// little-endian u64
// 23.. compressed LLVM bitcode

// This is the "magic number" expected at the beginning of a LLVM bytecode
// object in an rlib.
pub static RLIB_BYTECODE_OBJECT_MAGIC: &'static [u8] = b"RUST_OBJECT";

// The version number this compiler will write to bytecode objects in rlibs
pub static RLIB_BYTECODE_OBJECT_VERSION: u32 = 1;

// The offset in bytes the bytecode object format version number can be found at
pub static RLIB_BYTECODE_OBJECT_VERSION_OFFSET: uint = 11;

// The offset in bytes the size of the compressed bytecode can be found at in
// format version 1
pub static RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET: uint =
RLIB_BYTECODE_OBJECT_VERSION_OFFSET + 4;

// The offset in bytes the compressed LLVM bytecode can be found at in format
// version 1
pub static RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET: uint =
RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET + 8;


#[deriving(Clone, PartialEq, PartialOrd, Ord, Eq)]
pub enum OutputType {
OutputTypeBitcode,
Expand Down Expand Up @@ -1103,28 +1134,44 @@ fn link_rlib<'a>(sess: &'a Session,
// is never exactly 16 bytes long by adding a 16 byte extension to
// it. This is to work around a bug in LLDB that would cause it to
// crash if the name of a file in an archive was exactly 16 bytes.
let bc = obj_filename.with_extension("bc");
let bc_deflated = obj_filename.with_extension("bytecode.deflate");
match fs::File::open(&bc).read_to_end().and_then(|data| {
fs::File::create(&bc_deflated)
.write(match flate::deflate_bytes(data.as_slice()) {
Some(compressed) => compressed,
None => sess.fatal("failed to compress bytecode")
}.as_slice())
}) {
let bc_filename = obj_filename.with_extension("bc");
let bc_deflated_filename = obj_filename.with_extension("bytecode.deflate");

let bc_data = match fs::File::open(&bc_filename).read_to_end() {
Ok(buffer) => buffer,
Err(e) => sess.fatal(format!("failed to read bytecode: {}",
e).as_slice())
};

let bc_data_deflated = match flate::deflate_bytes(bc_data.as_slice()) {
Some(compressed) => compressed,
None => sess.fatal(format!("failed to compress bytecode from {}",
bc_filename.display()).as_slice())
};

let mut bc_file_deflated = match fs::File::create(&bc_deflated_filename) {
Ok(file) => file,
Err(e) => {
sess.fatal(format!("failed to create compressed bytecode \
file: {}", e).as_slice())
}
};

match write_rlib_bytecode_object_v1(&mut bc_file_deflated,
bc_data_deflated.as_slice()) {
Ok(()) => {}
Err(e) => {
sess.err(format!("failed to write compressed bytecode: \
{}",
e).as_slice());
{}", e).as_slice());
sess.abort_if_errors()
}
}
ab.add_file(&bc_deflated).unwrap();
remove(sess, &bc_deflated);
};

ab.add_file(&bc_deflated_filename).unwrap();
remove(sess, &bc_deflated_filename);
if !sess.opts.cg.save_temps &&
!sess.opts.output_types.contains(&OutputTypeBitcode) {
remove(sess, &bc);
remove(sess, &bc_filename);
}
}

Expand All @@ -1134,6 +1181,32 @@ fn link_rlib<'a>(sess: &'a Session,
ab
}

fn write_rlib_bytecode_object_v1<T: Writer>(writer: &mut T,
bc_data_deflated: &[u8])
-> ::std::io::IoResult<()> {
let bc_data_deflated_size: u64 = bc_data_deflated.as_slice().len() as u64;

try! { writer.write(RLIB_BYTECODE_OBJECT_MAGIC) };
try! { writer.write_le_u32(1) };
try! { writer.write_le_u64(bc_data_deflated_size) };
try! { writer.write(bc_data_deflated.as_slice()) };

let number_of_bytes_written_so_far =
RLIB_BYTECODE_OBJECT_MAGIC.len() + // magic id
mem::size_of_val(&RLIB_BYTECODE_OBJECT_VERSION) + // version
mem::size_of_val(&bc_data_deflated_size) + // data size field
bc_data_deflated_size as uint; // actual data

// If the number of bytes written to the object so far is odd, add a
// padding byte to make it even. This works around a crash bug in LLDB
// (see issue #15950)
if number_of_bytes_written_so_far % 2 == 1 {
try! { writer.write_u8(0) };
}

return Ok(());
}

// Create a static archive
//
// This is essentially the same thing as an rlib, but it also involves adding
Expand Down
103 changes: 80 additions & 23 deletions src/librustc/back/lto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ use util::common::time;
use libc;
use flate;

use std::mem;

pub fn run(sess: &session::Session, llmod: ModuleRef,
tm: TargetMachineRef, reachable: &[String]) {
if sess.opts.cg.prefer_dynamic {
Expand Down Expand Up @@ -57,36 +59,66 @@ pub fn run(sess: &session::Session, llmod: ModuleRef,
let file = path.filename_str().unwrap();
let file = file.slice(3, file.len() - 5); // chop off lib/.rlib
debug!("reading {}", file);
let bc = time(sess.time_passes(),
format!("read {}.bytecode.deflate", name).as_slice(),
(),
|_| {
archive.read(format!("{}.bytecode.deflate",
file).as_slice())
});
let bc = bc.expect("missing compressed bytecode in archive!");
let bc = time(sess.time_passes(),
format!("inflate {}.bc", file).as_slice(),
(),
|_| {
match flate::inflate_bytes(bc) {
Some(bc) => bc,
None => {
sess.fatal(format!("failed to decompress \
bc of `{}`",
name).as_slice())
}
}
});
let ptr = bc.as_slice().as_ptr();
let bc_encoded = time(sess.time_passes(),
format!("read {}.bytecode.deflate", name).as_slice(),
(),
|_| {
archive.read(format!("{}.bytecode.deflate",
file).as_slice())
});
let bc_encoded = bc_encoded.expect("missing compressed bytecode in archive!");
let bc_extractor = if is_versioned_bytecode_format(bc_encoded) {
|_| {
// Read the version
let version = extract_bytecode_format_version(bc_encoded);

if version == 1 {
// The only version existing so far
let data_size = extract_compressed_bytecode_size_v1(bc_encoded);
let compressed_data = bc_encoded.slice(
link::RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET,
link::RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET + data_size as uint);

match flate::inflate_bytes(compressed_data) {
Some(inflated) => inflated,
None => {
sess.fatal(format!("failed to decompress bc of `{}`",
name).as_slice())
}
}
} else {
sess.fatal(format!("Unsupported bytecode format version {}",
version).as_slice())
}
}
} else {
// the object must be in the old, pre-versioning format, so simply
// inflate everything and let LLVM decide if it can make sense of it
|_| {
match flate::inflate_bytes(bc_encoded) {
Some(bc) => bc,
None => {
sess.fatal(format!("failed to decompress bc of `{}`",
name).as_slice())
}
}
}
};

let bc_decoded = time(sess.time_passes(),
format!("decode {}.bc", file).as_slice(),
(),
bc_extractor);

let ptr = bc_decoded.as_slice().as_ptr();
debug!("linking {}", name);
time(sess.time_passes(),
format!("ll link {}", name).as_slice(),
(),
|()| unsafe {
if !llvm::LLVMRustLinkInExternalBitcode(llmod,
ptr as *const libc::c_char,
bc.len() as libc::size_t) {
bc_decoded.len() as libc::size_t) {
link::llvm_err(sess,
format!("failed to load bc of `{}`",
name.as_slice()));
Expand Down Expand Up @@ -137,3 +169,28 @@ pub fn run(sess: &session::Session, llmod: ModuleRef,
}
debug!("lto done");
}

fn is_versioned_bytecode_format(bc: &[u8]) -> bool {
let magic_id_byte_count = link::RLIB_BYTECODE_OBJECT_MAGIC.len();
return bc.len() > magic_id_byte_count &&
bc.slice(0, magic_id_byte_count) == link::RLIB_BYTECODE_OBJECT_MAGIC;
}

fn extract_bytecode_format_version(bc: &[u8]) -> u32 {
return read_from_le_bytes::<u32>(bc, link::RLIB_BYTECODE_OBJECT_VERSION_OFFSET);
}

fn extract_compressed_bytecode_size_v1(bc: &[u8]) -> u64 {
return read_from_le_bytes::<u64>(bc, link::RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET);
}

fn read_from_le_bytes<T: Int>(bytes: &[u8], position_in_bytes: uint) -> T {
let byte_data = bytes.slice(position_in_bytes,
position_in_bytes + mem::size_of::<T>());
let data = unsafe {
*(byte_data.as_ptr() as *const T)
};

Int::from_le(data)
}