Skip to content

Commit ff9151f

Browse files
committed
rustc: Use memset when zeroing allocas out (issue #3025).
Previously, LLVM was generating a ton of byte-by-byte copies, leading to huge numbers of vregs and bloating the code. Now, using memset, the code becomes a nice series of SSE moves instead.
1 parent db71ff3 commit ff9151f

File tree

1 file changed

+30
-2
lines changed

1 file changed

+30
-2
lines changed

src/rustc/middle/trans/base.rs

+30-2
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ fn alloca_maybe_zeroed(cx: block, t: TypeRef, zero: bool) -> ValueRef {
278278
if cx.unreachable { return llvm::LLVMGetUndef(t); }
279279
let initcx = raw_block(cx.fcx, false, cx.fcx.llstaticallocas);
280280
let p = Alloca(initcx, t);
281-
if zero { Store(initcx, C_null(t), p); }
281+
if zero { memzero(initcx, p, t); }
282282
return p;
283283
}
284284

@@ -287,10 +287,38 @@ fn zero_mem(cx: block, llptr: ValueRef, t: ty::t) -> block {
287287
let bcx = cx;
288288
let ccx = cx.ccx();
289289
let llty = type_of(ccx, t);
290-
Store(bcx, C_null(llty), llptr);
290+
memzero(bcx, llptr, llty);
291291
return bcx;
292292
}
293293

294+
// Always use this function instead of storing a zero constant to the memory
295+
// in question. If you store a zero constant, LLVM will drown in vreg
296+
// allocation for large data structures, and the generated code will be
297+
// awful. (A telltale sign of this is large quantities of
298+
// `mov [byte ptr foo],0` in the generated code.)
299+
fn memzero(cx: block, llptr: ValueRef, llty: TypeRef) {
300+
let _icx = cx.insn_ctxt("memzero");
301+
let ccx = cx.ccx();
302+
303+
let intrinsic_key;
304+
match ccx.sess.targ_cfg.arch {
305+
session::arch_x86 | session::arch_arm => {
306+
intrinsic_key = ~"llvm.memset.p0i8.i32";
307+
}
308+
session::arch_x86_64 => {
309+
intrinsic_key = ~"llvm.memset.p0i8.i64";
310+
}
311+
}
312+
313+
let llintrinsicfn = ccx.intrinsics.get(intrinsic_key);
314+
let llptr = PointerCast(cx, llptr, T_ptr(T_i8()));
315+
let llzeroval = C_u8(0);
316+
let size = IntCast(cx, llsize_of(ccx, llty), ccx.int_type);
317+
let align = C_i32(1i32);
318+
let volatile = C_bool(false);
319+
Call(cx, llintrinsicfn, ~[llptr, llzeroval, size, align, volatile]);
320+
}
321+
294322
fn arrayalloca(cx: block, t: TypeRef, v: ValueRef) -> ValueRef {
295323
let _icx = cx.insn_ctxt("arrayalloca");
296324
if cx.unreachable { return llvm::LLVMGetUndef(t); }

0 commit comments

Comments
 (0)