diff --git a/src/doc/book b/src/doc/book
index fa91738b66367..e871c45989255 160000
--- a/src/doc/book
+++ b/src/doc/book
@@ -1 +1 @@
-Subproject commit fa91738b66367b6f70b078251868a071f1991ace
+Subproject commit e871c4598925594421d63e929fee292e6e071f97
diff --git a/src/doc/nomicon b/src/doc/nomicon
index 7fd493465b7dd..7f7a597b47ed6 160000
--- a/src/doc/nomicon
+++ b/src/doc/nomicon
@@ -1 +1 @@
-Subproject commit 7fd493465b7dd6cf3476f0b834884059bbdd1d93
+Subproject commit 7f7a597b47ed6c35c2a0f0ee6a69050fe2d5e013
diff --git a/src/doc/reference b/src/doc/reference
index 821355a6fd642..b9fb838054b84 160000
--- a/src/doc/reference
+++ b/src/doc/reference
@@ -1 +1 @@
-Subproject commit 821355a6fd642b71988a2f88a3162fb358732012
+Subproject commit b9fb838054b8441223c22eeae5b6d8e498071cd0
diff --git a/src/doc/rust-by-example b/src/doc/rust-by-example
index e459fb3f07f2b..bc342a475c09b 160000
--- a/src/doc/rust-by-example
+++ b/src/doc/rust-by-example
@@ -1 +1 @@
-Subproject commit e459fb3f07f2b930ccd25d348671b8eae233fd64
+Subproject commit bc342a475c09b6df8004d518382e6d5b6bcb49f7
diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs
index 61856236a1491..a3704d1154e08 100644
--- a/src/librustc_codegen_llvm/back/lto.rs
+++ b/src/librustc_codegen_llvm/back/lto.rs
@@ -80,9 +80,7 @@ impl LtoModuleCodegen {
                 let module = module.take().unwrap();
                 {
                     let config = cgcx.config(module.kind);
-                    let llmod = module.module_llvm.llmod();
-                    let tm = &*module.module_llvm.tm;
-                    run_pass_manager(cgcx, tm, llmod, config, false);
+                    run_pass_manager(cgcx, &module, config, false);
                     timeline.record("fat-done");
                 }
                 Ok(module)
@@ -557,8 +555,7 @@ fn thin_lto(cgcx: &CodegenContext,
 }
 
 fn run_pass_manager(cgcx: &CodegenContext,
-                    tm: &llvm::TargetMachine,
-                    llmod: &llvm::Module,
+                    module: &ModuleCodegen,
                     config: &ModuleConfig,
                     thin: bool) {
     // Now we have one massive module inside of llmod. Time to run the
@@ -569,7 +566,8 @@ fn run_pass_manager(cgcx: &CodegenContext,
     debug!("running the pass manager");
     unsafe {
         let pm = llvm::LLVMCreatePassManager();
-        llvm::LLVMRustAddAnalysisPasses(tm, pm, llmod);
+        let llmod = module.module_llvm.llmod();
+        llvm::LLVMRustAddAnalysisPasses(module.module_llvm.tm, pm, llmod);
 
         if config.verify_llvm_ir {
             let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
@@ -864,7 +862,7 @@ impl ThinModule {
             // little differently.
             info!("running thin lto passes over {}", module.name);
             let config = cgcx.config(module.kind);
-            run_pass_manager(cgcx, module.module_llvm.tm, llmod, config, true);
+            run_pass_manager(cgcx, &module, config, true);
             cgcx.save_temp_bitcode(&module, "thin-lto-after-pm");
             timeline.record("thin-done");
         }
diff --git a/src/librustc_codegen_llvm/back/write.rs b/src/librustc_codegen_llvm/back/write.rs
index 81619c219757b..ba1315956fb2c 100644
--- a/src/librustc_codegen_llvm/back/write.rs
+++ b/src/librustc_codegen_llvm/back/write.rs
@@ -633,7 +633,7 @@ unsafe fn optimize(cgcx: &CodegenContext,
                  None,
                  &format!("llvm module passes [{}]", module_name.unwrap()),
                  || {
-            llvm::LLVMRunPassManager(mpm, llmod)
+            llvm::LLVMRunPassManager(mpm, llmod);
         });
 
         // Deallocate managers that we're now done with
@@ -691,6 +691,38 @@ unsafe fn codegen(cgcx: &CodegenContext,
             create_msvc_imps(cgcx, llcx, llmod);
         }
 
+        // Ok now this one's a super interesting invocations. SIMD in rustc is
+        // difficult where we want some parts of the program to be able to use
+        // some SIMD features while other parts of the program don't. The real
+        // tough part is that we want this to actually work correctly!
+        //
+        // We go to great lengths to make sure this works, and one crucial
+        // aspect is that vector arguments (simd types) are never passed by
+        // value in the ABI of functions. It turns out, however, that LLVM will
+        // undo our "clever work" of passing vector types by reference. Its
+        // argument promotion pass will promote these by-ref arguments to
+        // by-val. That, however, introduces codegen errors!
+        //
+        // The upstream LLVM bug [1] has unfortunatey not really seen a lot of
+        // activity. The Rust bug [2], however, has seen quite a lot of reports
+        // of this in the wild. As a result, this is worked around locally here.
+        // We have a custom transformation, `LLVMRustDemoteSimdArguments`, which
+        // does the opposite of argument promotion by demoting any by-value SIMD
+        // arguments in function signatures to pointers intead of being
+        // by-value.
+        //
+        // This operates at the LLVM IR layer because LLVM is thwarting our
+        // codegen and this is the only chance we get to make sure it's correct
+        // before we hit codegen.
+        //
+        // Hopefully one day the upstream LLVM bug will be fixed and we'll no
+        // longer need this!
+        //
+        // [1]: https://bugs.llvm.org/show_bug.cgi?id=37358
+        // [2]: https://github.com/rust-lang/rust/issues/50154
+        llvm::LLVMRustDemoteSimdArguments(llmod);
+        cgcx.save_temp_bitcode(&module, "simd-demoted");
+
         // A codegen-specific pass manager is used to generate object
         // files for an LLVM module.
         //
diff --git a/src/librustc_codegen_llvm/llvm/ffi.rs b/src/librustc_codegen_llvm/llvm/ffi.rs
index 0b98fa4eaf551..e2b0142490933 100644
--- a/src/librustc_codegen_llvm/llvm/ffi.rs
+++ b/src/librustc_codegen_llvm/llvm/ffi.rs
@@ -1138,6 +1138,8 @@ extern "C" {
     /// Runs a pass manager on a module.
     pub fn LLVMRunPassManager(PM: &PassManager<'a>, M: &'a Module) -> Bool;
 
+    pub fn LLVMRustDemoteSimdArguments(M: &'a Module);
+
     pub fn LLVMInitializePasses();
 
     pub fn LLVMPassManagerBuilderCreate() -> &'static mut PassManagerBuilder;
diff --git a/src/librustc_llvm/build.rs b/src/librustc_llvm/build.rs
index 7d01ed556c8dd..ad5db19839ef0 100644
--- a/src/librustc_llvm/build.rs
+++ b/src/librustc_llvm/build.rs
@@ -162,7 +162,9 @@ fn main() {
     }
 
     build_helper::rerun_if_changed_anything_in_dir(Path::new("../rustllvm"));
-    cfg.file("../rustllvm/PassWrapper.cpp")
+    cfg
+       .file("../rustllvm/DemoteSimd.cpp")
+       .file("../rustllvm/PassWrapper.cpp")
        .file("../rustllvm/RustWrapper.cpp")
        .file("../rustllvm/ArchiveWrapper.cpp")
        .file("../rustllvm/Linker.cpp")
diff --git a/src/librustc_traits/chalk_context.rs b/src/librustc_traits/chalk_context.rs
index 371fa46f37010..5d6badf120286 100644
--- a/src/librustc_traits/chalk_context.rs
+++ b/src/librustc_traits/chalk_context.rs
@@ -23,6 +23,7 @@ use rustc::traits::{
     Goal,
     GoalKind,
     Clause,
+    ProgramClauseCategory,
     QuantifierKind,
     Environment,
     InEnvironment,
@@ -30,6 +31,7 @@ use rustc::traits::{
 use rustc::ty::fold::{TypeFoldable, TypeFolder, TypeVisitor};
 use rustc::ty::subst::Kind;
 use rustc::ty::{self, TyCtxt};
+use rustc::hir::def_id::DefId;
 
 use std::fmt::{self, Debug};
 use std::marker::PhantomData;
@@ -330,46 +332,230 @@ impl context::UnificationOps<ChalkArenas<'gcx>, ChalkArenas<'tcx>>
 {
     fn program_clauses(
         &self,
-        _environment: &Environment<'tcx>,
+        environment: &Environment<'tcx>,
         goal: &DomainGoal<'tcx>,
     ) -> Vec<Clause<'tcx>> {
         use rustc::traits::WhereClause::*;
 
-        match goal {
-            DomainGoal::Holds(Implemented(_trait_predicate)) => {
+        fn assemble_clauses_from_impls<'tcx>(
+            tcx: ty::TyCtxt<'_, '_, 'tcx>,
+            trait_def_id: DefId,
+            clauses: &mut Vec<Clause<'tcx>>
+        ) {
+            tcx.for_each_impl(trait_def_id, |impl_def_id| {
+                clauses.extend(
+                    tcx.program_clauses_for(impl_def_id)
+                        .into_iter()
+                        .cloned()
+                );
+            });
+        }
+
+        fn assemble_clauses_from_assoc_ty_values<'tcx>(
+            tcx: ty::TyCtxt<'_, '_, 'tcx>,
+            trait_def_id: DefId,
+            clauses: &mut Vec<Clause<'tcx>>
+        ) {
+            tcx.for_each_impl(trait_def_id, |impl_def_id| {
+                for def_id in tcx.associated_item_def_ids(impl_def_id).iter() {
+                    clauses.extend(
+                        tcx.program_clauses_for(*def_id)
+                            .into_iter()
+                            .cloned()
+                    );
+                }
+            });
+        }
+
+        let mut clauses = match goal {
+            DomainGoal::Holds(Implemented(trait_predicate)) => {
+                // These come from:
+                // * implementations of the trait itself (rule `Implemented-From-Impl`)
+                // * the trait decl (rule `Implemented-From-Env`)
+
+                let mut clauses = vec![];
+                assemble_clauses_from_impls(
+                    self.infcx.tcx,
+                    trait_predicate.def_id(),
+                    &mut clauses
+                );
+
+                // FIXME: we need to add special rules for builtin impls:
+                // * `Copy` / `Clone`
+                // * `Sized`
+                // * `Unsize`
+                // * `Generator`
+                // * `FnOnce` / `FnMut` / `Fn`
+                // * trait objects
+                // * auto traits
+
+                // Rule `Implemented-From-Env` will be computed from the environment.
+                clauses
+            }
+
+            DomainGoal::Holds(ProjectionEq(projection_predicate)) => {
+                // These come from:
+                // * the assoc type definition (rule `ProjectionEq-Placeholder`)
+                // * normalization of the assoc ty values (rule `ProjectionEq-Normalize`)
+                // * implied bounds from trait definitions (rule `Implied-Bound-From-Trait`)
+                // * implied bounds from type definitions (rule `Implied-Bound-From-Type`)
+
+                let clauses = self.infcx.tcx.program_clauses_for(
+                    projection_predicate.projection_ty.item_def_id
+                ).into_iter()
+
+                    // only select `ProjectionEq-Placeholder` and `ProjectionEq-Normalize`
+                    .filter(|clause| clause.category() == ProgramClauseCategory::Other)
+
+                    .cloned()
+                    .collect::<Vec<_>>();
+
+                // Rules `Implied-Bound-From-Trait` and `Implied-Bound-From-Type` will be computed
+                // from the environment.
+                clauses
+            }
+
+            DomainGoal::Holds(RegionOutlives(..)) => {
                 // These come from:
-                //
-                // - Trait definitions (implied bounds)
-                // - Implementations of the trait itself
-                panic!()
+                // * implied bounds from trait definitions (rule `Implied-Bound-From-Trait`)
+                // * implied bounds from type definitions (rule `Implied-Bound-From-Type`)
+
+                // All of these rules are computed in the environment.
+                vec![]
             }
 
-            DomainGoal::Holds(ProjectionEq(_projection_predicate)) => {
+            DomainGoal::Holds(TypeOutlives(..)) => {
                 // These come from:
-                panic!()
+                // * implied bounds from trait definitions (rule `Implied-Bound-From-Trait`)
+                // * implied bounds from type definitions (rule `Implied-Bound-From-Type`)
+
+                // All of these rules are computed in the environment.
+                vec![]
             }
 
-            DomainGoal::Holds(RegionOutlives(_region_outlives)) => {
-                panic!()
+            DomainGoal::WellFormed(WellFormed::Trait(trait_predicate)) => {
+                // These come from -- the trait decl (rule `WellFormed-TraitRef`).
+                self.infcx.tcx.program_clauses_for(trait_predicate.def_id())
+                    .into_iter()
+
+                    // only select `WellFormed-TraitRef`
+                    .filter(|clause| clause.category() == ProgramClauseCategory::WellFormed)
+
+                    .cloned()
+                    .collect()
             }
 
-            DomainGoal::Holds(TypeOutlives(_type_outlives)) => {
-                panic!()
+            DomainGoal::WellFormed(WellFormed::Ty(ty)) => {
+                // These come from:
+                // * the associated type definition if `ty` refers to an unnormalized
+                //   associated type (rule `WellFormed-AssocTy`)
+                // * custom rules for built-in types
+                // * the type definition otherwise (rule `WellFormed-Type`)
+                let clauses = match ty.sty {
+                    ty::Projection(data) => {
+                        self.infcx.tcx.program_clauses_for(data.item_def_id)
+                    }
+
+                    // These types are always WF (recall that we do not check
+                    // for parameters to be WF)
+                    ty::Bool |
+                    ty::Char |
+                    ty::Int(..) |
+                    ty::Uint(..) |
+                    ty::Float(..) |
+                    ty::Str |
+                    ty::RawPtr(..) |
+                    ty::FnPtr(..) |
+                    ty::Param(..) |
+                    ty::Never => {
+                        ty::List::empty()
+                    }
+
+                    // WF if inner type is `Sized`
+                    ty::Slice(..) |
+                    ty::Array(..) => {
+                        ty::List::empty()
+                    }
+
+                    ty::Tuple(..) => {
+                        ty::List::empty()
+                    }
+
+                    // WF if `sub_ty` outlives `region`
+                    ty::Ref(..) => {
+                        ty::List::empty()
+                    }
+
+                    ty::Dynamic(..) => {
+                        // FIXME: no rules yet for trait objects
+                        ty::List::empty()
+                    }
+
+                    ty::Adt(def, ..) => {
+                        self.infcx.tcx.program_clauses_for(def.did)
+                    }
+
+                    ty::Foreign(def_id) |
+                    ty::FnDef(def_id, ..) |
+                    ty::Closure(def_id, ..) |
+                    ty::Generator(def_id, ..) |
+                    ty::Opaque(def_id, ..) => {
+                        self.infcx.tcx.program_clauses_for(def_id)
+                    }
+
+                    ty::GeneratorWitness(..) |
+                    ty::UnnormalizedProjection(..) |
+                    ty::Infer(..) |
+                    ty::Error => {
+                        bug!("unexpected type {:?}", ty)
+                    }
+                };
+
+                clauses.into_iter()
+                    .filter(|clause| clause.category() == ProgramClauseCategory::WellFormed)
+                    .cloned()
+                    .collect()
             }
 
-            DomainGoal::WellFormed(WellFormed::Trait(_trait_predicate)) => {
-                // These come from -- the trait decl.
-                panic!()
+            DomainGoal::FromEnv(FromEnv::Trait(..)) => {
+                // These come from:
+                // * implied bounds from trait definitions (rule `Implied-Bound-From-Trait`)
+                // * implied bounds from type definitions (rule `Implied-Bound-From-Type`)
+                // * implied bounds from assoc type defs (rules `Implied-Trait-From-AssocTy`,
+                //   `Implied-Bound-From-AssocTy` and `Implied-WC-From-AssocTy`)
+
+                // All of these rules are computed in the environment.
+                vec![]
             }
 
-            DomainGoal::WellFormed(WellFormed::Ty(_ty)) => panic!(),
+            DomainGoal::FromEnv(FromEnv::Ty(..)) => {
+                // There are no `FromEnv::Ty(..) :- ...` rules (this predicate only
+                // comes from the environment).
+                vec![]
+            }
 
-            DomainGoal::FromEnv(FromEnv::Trait(_trait_predicate)) => panic!(),
+            DomainGoal::Normalize(projection_predicate) => {
+                // These come from -- assoc ty values (rule `Normalize-From-Impl`).
+                let mut clauses = vec![];
 
-            DomainGoal::FromEnv(FromEnv::Ty(_ty)) => panic!(),
+                assemble_clauses_from_assoc_ty_values(
+                    self.infcx.tcx,
+                    projection_predicate.projection_ty.trait_ref(self.infcx.tcx).def_id,
+                    &mut clauses
+                );
 
-            DomainGoal::Normalize(_) => panic!(),
-        }
+                clauses
+            }
+        };
+
+        let environment = self.infcx.tcx.lift_to_global(environment)
+            .expect("environment is not global");
+        clauses.extend(
+            self.infcx.tcx.program_clauses_for_env(environment)
+                .into_iter()
+                .cloned()
+        );
+        clauses
     }
 
     fn instantiate_binders_universally(
diff --git a/src/librustc_traits/lowering/environment.rs b/src/librustc_traits/lowering/environment.rs
index 04290ca6b76e4..c71898f73ecad 100644
--- a/src/librustc_traits/lowering/environment.rs
+++ b/src/librustc_traits/lowering/environment.rs
@@ -86,13 +86,16 @@ impl ClauseVisitor<'set, 'a, 'tcx> {
             ty::Slice(..) |
             ty::RawPtr(..) |
             ty::FnPtr(..) |
-            ty::Never |
             ty::Tuple(..) |
+            ty::Never |
+            ty::Param(..) => (),
+
             ty::GeneratorWitness(..) |
             ty::UnnormalizedProjection(..) |
-            ty::Param(..) |
             ty::Infer(..) |
-            ty::Error => (),
+            ty::Error => {
+                bug!("unexpected type {:?}", ty);
+            }
         }
     }
 
diff --git a/src/librustc_traits/lowering/mod.rs b/src/librustc_traits/lowering/mod.rs
index fb598a335482b..46581397aee2d 100644
--- a/src/librustc_traits/lowering/mod.rs
+++ b/src/librustc_traits/lowering/mod.rs
@@ -433,7 +433,7 @@ pub fn program_clauses_for_associated_type_def<'a, 'tcx>(
     let wf_clause = ProgramClause {
         goal: DomainGoal::WellFormed(WellFormed::Ty(placeholder_ty)),
         hypotheses: tcx.mk_goals(iter::once(hypothesis)),
-        category: ProgramClauseCategory::Other,
+        category: ProgramClauseCategory::WellFormed,
     };
 
     // Rule Implied-Trait-From-AssocTy
diff --git a/src/rustllvm/DemoteSimd.cpp b/src/rustllvm/DemoteSimd.cpp
new file mode 100644
index 0000000000000..e9203baa0d7b1
--- /dev/null
+++ b/src/rustllvm/DemoteSimd.cpp
@@ -0,0 +1,189 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#include <vector>
+#include <set>
+
+#include "rustllvm.h"
+
+#if LLVM_VERSION_GE(5, 0)
+
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Module.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+static std::vector<Function*>
+GetFunctionsWithSimdArgs(Module *M) {
+  std::vector<Function*> Ret;
+
+  for (auto &F : M->functions()) {
+    // Skip all intrinsic calls as these are always tightly controlled to "work
+    // correctly", so no need to fixup any of these.
+    if (F.isIntrinsic())
+      continue;
+
+    // We're only interested in rustc-defined functions, not unstably-defined
+    // imported SIMD ffi functions.
+    if (F.isDeclaration())
+      continue;
+
+    // Argument promotion only happens on internal functions, so skip demoting
+    // arguments in external functions like FFI shims and such.
+    if (!F.hasLocalLinkage())
+      continue;
+
+    // If any argument to this function is a by-value vector type, then that's
+    // bad! The compiler didn't generate any functions that looked like this,
+    // and we try to rely on LLVM to not do this! Argument promotion may,
+    // however, promote arguments from behind references. In any case, figure
+    // out if we're interested in demoting this argument.
+    if (any_of(F.args(), [](Argument &arg) { return arg.getType()->isVectorTy(); }))
+      Ret.push_back(&F);
+  }
+
+  return Ret;
+}
+
+extern "C" void
+LLVMRustDemoteSimdArguments(LLVMModuleRef Mod) {
+  Module *M = unwrap(Mod);
+
+  auto Functions = GetFunctionsWithSimdArgs(M);
+
+  for (auto F : Functions) {
+    // Build up our list of new parameters and new argument attributes.
+    // We're only changing those arguments which are vector types.
+    SmallVector<Type*, 8> Params;
+    SmallVector<AttributeSet, 8> ArgAttrVec;
+    auto PAL = F->getAttributes();
+    for (auto &Arg : F->args()) {
+      auto *Ty = Arg.getType();
+      if (Ty->isVectorTy()) {
+        Params.push_back(PointerType::get(Ty, 0));
+        ArgAttrVec.push_back(AttributeSet());
+      } else {
+        Params.push_back(Ty);
+        ArgAttrVec.push_back(PAL.getParamAttributes(Arg.getArgNo()));
+      }
+    }
+
+    // Replace `F` with a new function with our new signature. I'm... not really
+    // sure how this works, but this is all the steps `ArgumentPromotion` does
+    // to replace a signature as well.
+    assert(!F->isVarArg()); // ArgumentPromotion should skip these fns
+    FunctionType *NFTy = FunctionType::get(F->getReturnType(), Params, false);
+    Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+    NF->copyAttributesFrom(F);
+    NF->setSubprogram(F->getSubprogram());
+    F->setSubprogram(nullptr);
+    NF->setAttributes(AttributeList::get(F->getContext(),
+                                         PAL.getFnAttributes(),
+                                         PAL.getRetAttributes(),
+                                         ArgAttrVec));
+    ArgAttrVec.clear();
+    F->getParent()->getFunctionList().insert(F->getIterator(), NF);
+    NF->takeName(F);
+
+    // Iterate over all invocations of `F`, updating all `call` instructions to
+    // store immediate vector types in a local `alloc` instead of a by-value
+    // vector.
+    //
+    // Like before, much of this is copied from the `ArgumentPromotion` pass in
+    // LLVM.
+    SmallVector<Value*, 16> Args;
+    while (!F->use_empty()) {
+      CallSite CS(F->user_back());
+      assert(CS.getCalledFunction() == F);
+      Instruction *Call = CS.getInstruction();
+      const AttributeList &CallPAL = CS.getAttributes();
+
+      // Loop over the operands, inserting an `alloca` and a store for any
+      // argument we're demoting to be by reference
+      //
+      // FIXME: we probably want to figure out an LLVM pass to run and clean up
+      // this function and instructions we're generating, we should in theory
+      // only generate a maximum number of `alloca` instructions rather than
+      // one-per-variable unconditionally.
+      CallSite::arg_iterator AI = CS.arg_begin();
+      size_t ArgNo = 0;
+      for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+           ++I, ++AI, ++ArgNo) {
+        if (I->getType()->isVectorTy()) {
+          AllocaInst *AllocA = new AllocaInst(I->getType(), 0, nullptr, "", Call);
+          new StoreInst(*AI, AllocA, Call);
+          Args.push_back(AllocA);
+          ArgAttrVec.push_back(AttributeSet());
+        } else {
+          Args.push_back(*AI);
+          ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
+        }
+      }
+      assert(AI == CS.arg_end());
+
+      // Create a new call instructions which we'll use to replace the old call
+      // instruction, copying over as many attributes and such as possible.
+      SmallVector<OperandBundleDef, 1> OpBundles;
+      CS.getOperandBundlesAsDefs(OpBundles);
+
+      CallSite NewCS;
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+        InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                           Args, OpBundles, "", Call);
+      } else {
+        auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call);
+        NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
+        NewCS = NewCall;
+      }
+      NewCS.setCallingConv(CS.getCallingConv());
+      NewCS.setAttributes(
+          AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
+                             CallPAL.getRetAttributes(), ArgAttrVec));
+      NewCS->setDebugLoc(Call->getDebugLoc());
+      Args.clear();
+      ArgAttrVec.clear();
+      Call->replaceAllUsesWith(NewCS.getInstruction());
+      NewCS->takeName(Call);
+      Call->eraseFromParent();
+    }
+
+    // Splice the body of the old function right into the new function.
+    NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+    // Update our new function to replace all uses of the by-value argument with
+    // loads of the pointer argument we've generated.
+    //
+    // FIXME: we probably want to only generate one load instruction per
+    // function? Or maybe run an LLVM pass to clean up this function?
+    for (Function::arg_iterator I = F->arg_begin(),
+                                E = F->arg_end(),
+                                I2 = NF->arg_begin();
+         I != E;
+         ++I, ++I2) {
+      if (I->getType()->isVectorTy()) {
+        I->replaceAllUsesWith(new LoadInst(&*I2, "", &NF->begin()->front()));
+      } else {
+        I->replaceAllUsesWith(&*I2);
+      }
+      I2->takeName(&*I);
+    }
+
+    // Delete all references to the old function, it should be entirely dead
+    // now.
+    M->getFunctionList().remove(F);
+  }
+}
+
+#else // LLVM_VERSION_GE(8, 0)
+extern "C" void
+LLVMRustDemoteSimdArguments(LLVMModuleRef Mod) {
+}
+#endif // LLVM_VERSION_GE(8, 0)
diff --git a/src/test/run-make/simd-argument-promotion-thwarted/Makefile b/src/test/run-make/simd-argument-promotion-thwarted/Makefile
new file mode 100644
index 0000000000000..3095432d0fe69
--- /dev/null
+++ b/src/test/run-make/simd-argument-promotion-thwarted/Makefile
@@ -0,0 +1,13 @@
+-include ../../run-make-fulldeps/tools.mk
+
+ifeq ($(TARGET),x86_64-unknown-linux-gnu)
+all:
+	$(RUSTC) t1.rs -C opt-level=3
+	$(TMPDIR)/t1
+	$(RUSTC) t2.rs -C opt-level=3
+	$(TMPDIR)/t2
+	$(RUSTC) t3.rs -C opt-level=3
+	$(TMPDIR)/t3
+else
+all:
+endif
diff --git a/src/test/run-make/simd-argument-promotion-thwarted/t1.rs b/src/test/run-make/simd-argument-promotion-thwarted/t1.rs
new file mode 100644
index 0000000000000..cb4a3dd7d4a7c
--- /dev/null
+++ b/src/test/run-make/simd-argument-promotion-thwarted/t1.rs
@@ -0,0 +1,21 @@
+use std::arch::x86_64;
+
+fn main() {
+    if !is_x86_feature_detected!("avx2") {
+        return println!("AVX2 is not supported on this machine/build.");
+    }
+    let load_bytes: [u8; 32] = [0x0f; 32];
+    let lb_ptr = load_bytes.as_ptr();
+    let reg_load = unsafe {
+        x86_64::_mm256_loadu_si256(
+            lb_ptr as *const x86_64::__m256i
+        )
+    };
+    println!("{:?}", reg_load);
+    let mut store_bytes: [u8; 32] = [0; 32];
+    let sb_ptr = store_bytes.as_mut_ptr();
+    unsafe {
+        x86_64::_mm256_storeu_si256(sb_ptr as *mut x86_64::__m256i, reg_load);
+    }
+    assert_eq!(load_bytes, store_bytes);
+}
diff --git a/src/test/run-make/simd-argument-promotion-thwarted/t2.rs b/src/test/run-make/simd-argument-promotion-thwarted/t2.rs
new file mode 100644
index 0000000000000..0e42b82a223d0
--- /dev/null
+++ b/src/test/run-make/simd-argument-promotion-thwarted/t2.rs
@@ -0,0 +1,14 @@
+use std::arch::x86_64::*;
+
+fn main() {
+    if !is_x86_feature_detected!("avx") {
+        return println!("AVX is not supported on this machine/build.");
+    }
+    unsafe {
+        let f = _mm256_set_pd(2.0, 2.0, 2.0, 2.0);
+        let r = _mm256_mul_pd(f, f);
+
+        union A { a: __m256d, b: [f64; 4] }
+        assert_eq!(A { a: r }.b, [4.0, 4.0, 4.0, 4.0]);
+    }
+}
diff --git a/src/test/run-make/simd-argument-promotion-thwarted/t3.rs b/src/test/run-make/simd-argument-promotion-thwarted/t3.rs
new file mode 100644
index 0000000000000..10062ab3e4643
--- /dev/null
+++ b/src/test/run-make/simd-argument-promotion-thwarted/t3.rs
@@ -0,0 +1,52 @@
+use std::arch::x86_64::*;
+
+#[target_feature(enable = "avx")]
+unsafe fn avx_mul(a: __m256, b: __m256) -> __m256 {
+    _mm256_mul_ps(a, b)
+}
+
+#[target_feature(enable = "avx")]
+unsafe fn avx_store(p: *mut f32, a: __m256) {
+    _mm256_storeu_ps(p, a)
+}
+
+#[target_feature(enable = "avx")]
+unsafe fn avx_setr(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 {
+    _mm256_setr_ps(a, b, c, d, e, f, g, h)
+}
+
+#[target_feature(enable = "avx")]
+unsafe fn avx_set1(a: f32) -> __m256 {
+    _mm256_set1_ps(a)
+}
+
+struct Avx(__m256);
+
+fn mul(a: Avx, b: Avx) -> Avx {
+    unsafe { Avx(avx_mul(a.0, b.0)) }
+}
+
+fn set1(a: f32) -> Avx {
+    unsafe { Avx(avx_set1(a)) }
+}
+
+fn setr(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> Avx {
+    unsafe { Avx(avx_setr(a, b, c, d, e, f, g, h)) }
+}
+
+unsafe fn store(p: *mut f32, a: Avx) {
+    avx_store(p, a.0);
+}
+
+fn main() {
+    if !is_x86_feature_detected!("avx") {
+        return println!("AVX is not supported on this machine/build.");
+    }
+    let mut result = [0.0f32; 8];
+    let a = mul(setr(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0), set1(0.25));
+    unsafe {
+        store(result.as_mut_ptr(), a);
+    }
+
+    assert_eq!(result, [0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.50, 1.75]);
+}
diff --git a/src/tools/tidy/src/lib.rs b/src/tools/tidy/src/lib.rs
index c4bd0bbd03ca3..c5f5896d286c3 100644
--- a/src/tools/tidy/src/lib.rs
+++ b/src/tools/tidy/src/lib.rs
@@ -78,6 +78,7 @@ fn filter_dirs(path: &Path) -> bool {
         "src/tools/lldb",
         "src/target",
         "src/stdsimd",
+        "target",
     ];
     skip.iter().any(|p| path.ends_with(p))
 }