From f49126e3d612ec5f7e571a7ccfb3e4447cfa427c Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Sun, 8 Jan 2023 18:23:13 -0800 Subject: [PATCH 1/3] Document wf constraints on control flow in cleanup blocks Also fixes a bug in dominator computation --- .../src/transform/validate.rs | 62 +++++++++++++++++-- .../src/graph/dominators/mod.rs | 5 +- compiler/rustc_middle/src/mir/syntax.rs | 7 +++ 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/compiler/rustc_const_eval/src/transform/validate.rs b/compiler/rustc_const_eval/src/transform/validate.rs index 94e1b95a0eb3c..9f429d3a7d984 100644 --- a/compiler/rustc_const_eval/src/transform/validate.rs +++ b/compiler/rustc_const_eval/src/transform/validate.rs @@ -1,6 +1,8 @@ //! Validates the MIR to ensure that invariants are upheld. -use rustc_data_structures::fx::FxHashSet; +use std::collections::hash_map::Entry; + +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_index::bit_set::BitSet; use rustc_infer::traits::Reveal; use rustc_middle::mir::interpret::Scalar; @@ -18,7 +20,7 @@ use rustc_mir_dataflow::storage::always_storage_live_locals; use rustc_mir_dataflow::{Analysis, ResultsCursor}; use rustc_target::abi::{Size, VariantIdx}; -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] enum EdgeKind { Unwind, Normal, @@ -57,7 +59,7 @@ impl<'tcx> MirPass<'tcx> for Validator { .iterate_to_fixpoint() .into_results_cursor(body); - TypeChecker { + let mut checker = TypeChecker { when: &self.when, body, tcx, @@ -67,8 +69,9 @@ impl<'tcx> MirPass<'tcx> for Validator { storage_liveness, place_cache: Vec::new(), value_cache: Vec::new(), - } - .visit_body(body); + }; + checker.visit_body(body); + checker.check_cleanup_control_flow(); } } @@ -134,6 +137,55 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> { } } + fn check_cleanup_control_flow(&self) { + let doms = self.body.basic_blocks.dominators(); + let mut post_contract_node = FxHashMap::default(); + let mut get_post_contract_node = |mut bb| { + if let Some(res) = post_contract_node.get(&bb) { + return *res; + } + let mut dom_path = vec![]; + while self.body.basic_blocks[bb].is_cleanup { + dom_path.push(bb); + bb = doms.immediate_dominator(bb); + } + let root = *dom_path.last().unwrap(); + for bb in dom_path { + post_contract_node.insert(bb, root); + } + root + }; + + let mut parent = FxHashMap::default(); + for (bb, bb_data) in self.body.basic_blocks.iter_enumerated() { + if !bb_data.is_cleanup || !self.reachable_blocks.contains(bb) { + continue; + } + let bb = get_post_contract_node(bb); + for s in bb_data.terminator().successors() { + let s = get_post_contract_node(s); + if s == bb { + continue; + } + match parent.entry(bb) { + Entry::Vacant(e) => { + e.insert(s); + } + Entry::Occupied(e) if s != *e.get() => self.fail( + Location { block: bb, statement_index: 0 }, + format!( + "Cleanup control flow violation: The blocks dominated by {:?} have edges to both {:?} and {:?}", + bb, + s, + *e.get() + ) + ), + Entry::Occupied(_) => (), + } + } + } + } + /// Check if src can be assigned into dest. /// This is not precise, it will accept some incorrect assignments. fn mir_assign_valid_types(&self, src: Ty<'tcx>, dest: Ty<'tcx>) -> bool { diff --git a/compiler/rustc_data_structures/src/graph/dominators/mod.rs b/compiler/rustc_data_structures/src/graph/dominators/mod.rs index ea2a4388b92f0..07b1ace218945 100644 --- a/compiler/rustc_data_structures/src/graph/dominators/mod.rs +++ b/compiler/rustc_data_structures/src/graph/dominators/mod.rs @@ -135,7 +135,10 @@ pub fn dominators(graph: G) -> Dominators { // This loop computes the semi[w] for w. semi[w] = w; for v in graph.predecessors(pre_order_to_real[w]) { - let v = real_to_pre_order[v].unwrap(); + // Reachable vertices may have unreachable predecessors, so ignore any of them + let Some(v) = real_to_pre_order[v] else { + continue + }; // eval returns a vertex x from which semi[x] is minimum among // vertices semi[v] +> x *> v. diff --git a/compiler/rustc_middle/src/mir/syntax.rs b/compiler/rustc_middle/src/mir/syntax.rs index 6b4489026d3d3..0c395cae5665c 100644 --- a/compiler/rustc_middle/src/mir/syntax.rs +++ b/compiler/rustc_middle/src/mir/syntax.rs @@ -512,6 +512,13 @@ pub struct CopyNonOverlapping<'tcx> { /// must also be `cleanup`. This is a part of the type system and checked statically, so it is /// still an error to have such an edge in the CFG even if it's known that it won't be taken at /// runtime. +/// 4. The induced subgraph on cleanup blocks must look roughly like an upside down tree. This is +/// necessary to ensure that landing pad information can be correctly codegened. More precisely: +/// +/// Begin with the standard control flow graph `G`. Modify `G` as follows: for any two cleanup +/// vertices `u` and `v` such that `u` dominates `v`, contract `u` and `v` into a single vertex, +/// deleting self edges and duplicate edges in the process. The cleanup blocks of the resulting +/// graph must form an inverted forest. #[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq, TypeFoldable, TypeVisitable)] pub enum TerminatorKind<'tcx> { /// Block has one successor; we continue execution there. From ec3d9934103ae33d2116bb5791b38921902c8539 Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Fri, 13 Jan 2023 08:32:54 -0800 Subject: [PATCH 2/3] Add cycle checking to cleanup control flow validation --- .../src/transform/validate.rs | 65 ++++++++++++++----- compiler/rustc_middle/src/mir/syntax.rs | 11 ++-- 2 files changed, 54 insertions(+), 22 deletions(-) diff --git a/compiler/rustc_const_eval/src/transform/validate.rs b/compiler/rustc_const_eval/src/transform/validate.rs index 9f429d3a7d984..b4f1ab6226714 100644 --- a/compiler/rustc_const_eval/src/transform/validate.rs +++ b/compiler/rustc_const_eval/src/transform/validate.rs @@ -1,9 +1,8 @@ //! Validates the MIR to ensure that invariants are upheld. -use std::collections::hash_map::Entry; - use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_index::bit_set::BitSet; +use rustc_index::vec::IndexVec; use rustc_infer::traits::Reveal; use rustc_middle::mir::interpret::Scalar; use rustc_middle::mir::visit::NonUseContext::VarDebugInfo; @@ -140,23 +139,27 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> { fn check_cleanup_control_flow(&self) { let doms = self.body.basic_blocks.dominators(); let mut post_contract_node = FxHashMap::default(); + // Reusing the allocation across invocations of the closure + let mut dom_path = vec![]; let mut get_post_contract_node = |mut bb| { - if let Some(res) = post_contract_node.get(&bb) { - return *res; - } - let mut dom_path = vec![]; - while self.body.basic_blocks[bb].is_cleanup { + let root = loop { + if let Some(root) = post_contract_node.get(&bb) { + break *root; + } + let parent = doms.immediate_dominator(bb); dom_path.push(bb); - bb = doms.immediate_dominator(bb); - } - let root = *dom_path.last().unwrap(); - for bb in dom_path { + if !self.body.basic_blocks[parent].is_cleanup { + break bb; + } + bb = parent; + }; + for bb in dom_path.drain(..) { post_contract_node.insert(bb, root); } root }; - let mut parent = FxHashMap::default(); + let mut parent = IndexVec::from_elem(None, &self.body.basic_blocks); for (bb, bb_data) in self.body.basic_blocks.iter_enumerated() { if !bb_data.is_cleanup || !self.reachable_blocks.contains(bb) { continue; @@ -167,23 +170,49 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> { if s == bb { continue; } - match parent.entry(bb) { - Entry::Vacant(e) => { - e.insert(s); + let parent = &mut parent[bb]; + match parent { + None => { + *parent = Some(s); } - Entry::Occupied(e) if s != *e.get() => self.fail( + Some(e) if *e == s => (), + Some(e) => self.fail( Location { block: bb, statement_index: 0 }, format!( "Cleanup control flow violation: The blocks dominated by {:?} have edges to both {:?} and {:?}", bb, s, - *e.get() + *e ) ), - Entry::Occupied(_) => (), } } } + + // Check for cycles + let mut stack = FxHashSet::default(); + for i in 0..parent.len() { + let mut bb = BasicBlock::from_usize(i); + stack.clear(); + stack.insert(bb); + loop { + let Some(parent )= parent[bb].take() else { + break + }; + let no_cycle = stack.insert(parent); + if !no_cycle { + self.fail( + Location { block: bb, statement_index: 0 }, + format!( + "Cleanup control flow violation: Cycle involving edge {:?} -> {:?}", + bb, parent, + ), + ); + break; + } + bb = parent; + } + } } /// Check if src can be assigned into dest. diff --git a/compiler/rustc_middle/src/mir/syntax.rs b/compiler/rustc_middle/src/mir/syntax.rs index 0c395cae5665c..52c2b10cbbea9 100644 --- a/compiler/rustc_middle/src/mir/syntax.rs +++ b/compiler/rustc_middle/src/mir/syntax.rs @@ -512,13 +512,16 @@ pub struct CopyNonOverlapping<'tcx> { /// must also be `cleanup`. This is a part of the type system and checked statically, so it is /// still an error to have such an edge in the CFG even if it's known that it won't be taken at /// runtime. -/// 4. The induced subgraph on cleanup blocks must look roughly like an upside down tree. This is -/// necessary to ensure that landing pad information can be correctly codegened. More precisely: +/// 4. The control flow between cleanup blocks must look like an upside down tree. Roughly +/// speaking, this means that control flow that looks like a V is allowed, while control flow +/// that looks like a W is not. This is necessary to ensure that landing pad information can be +/// correctly codegened on MSVC. More precisely: /// /// Begin with the standard control flow graph `G`. Modify `G` as follows: for any two cleanup /// vertices `u` and `v` such that `u` dominates `v`, contract `u` and `v` into a single vertex, -/// deleting self edges and duplicate edges in the process. The cleanup blocks of the resulting -/// graph must form an inverted forest. +/// deleting self edges and duplicate edges in the process. Now remove all vertices from `G` +/// that are not cleanup vertices or are not reachable. The resulting graph must be an inverted +/// tree, that is each vertex may have at most one successor and there may be no cycles. #[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq, TypeFoldable, TypeVisitable)] pub enum TerminatorKind<'tcx> { /// Block has one successor; we continue execution there. From 4bc963eba67e61507d2069edf10cfec1d7f8ec0a Mon Sep 17 00:00:00 2001 From: Jakob Degen Date: Mon, 16 Jan 2023 15:01:16 -0800 Subject: [PATCH 3/3] Avoid trivial checks on cleanup control flow in MIR validator --- .../rustc_const_eval/src/transform/validate.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_const_eval/src/transform/validate.rs b/compiler/rustc_const_eval/src/transform/validate.rs index b4f1ab6226714..dd168a9ac3cd3 100644 --- a/compiler/rustc_const_eval/src/transform/validate.rs +++ b/compiler/rustc_const_eval/src/transform/validate.rs @@ -64,6 +64,7 @@ impl<'tcx> MirPass<'tcx> for Validator { tcx, param_env, mir_phase, + unwind_edge_count: 0, reachable_blocks: traversal::reachable_as_bitset(body), storage_liveness, place_cache: Vec::new(), @@ -80,6 +81,7 @@ struct TypeChecker<'a, 'tcx> { tcx: TyCtxt<'tcx>, param_env: ParamEnv<'tcx>, mir_phase: MirPhase, + unwind_edge_count: usize, reachable_blocks: BitSet, storage_liveness: ResultsCursor<'a, 'tcx, MaybeStorageLive<'static>>, place_cache: Vec>, @@ -104,7 +106,7 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> { ); } - fn check_edge(&self, location: Location, bb: BasicBlock, edge_kind: EdgeKind) { + fn check_edge(&mut self, location: Location, bb: BasicBlock, edge_kind: EdgeKind) { if bb == START_BLOCK { self.fail(location, "start block must not have predecessors") } @@ -113,10 +115,12 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> { match (src.is_cleanup, bb.is_cleanup, edge_kind) { // Non-cleanup blocks can jump to non-cleanup blocks along non-unwind edges (false, false, EdgeKind::Normal) - // Non-cleanup blocks can jump to cleanup blocks along unwind edges - | (false, true, EdgeKind::Unwind) // Cleanup blocks can jump to cleanup blocks along non-unwind edges | (true, true, EdgeKind::Normal) => {} + // Non-cleanup blocks can jump to cleanup blocks along unwind edges + (false, true, EdgeKind::Unwind) => { + self.unwind_edge_count += 1; + } // All other jumps are invalid _ => { self.fail( @@ -137,6 +141,9 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> { } fn check_cleanup_control_flow(&self) { + if self.unwind_edge_count <= 1 { + return; + } let doms = self.body.basic_blocks.dominators(); let mut post_contract_node = FxHashMap::default(); // Reusing the allocation across invocations of the closure @@ -196,7 +203,7 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> { stack.clear(); stack.insert(bb); loop { - let Some(parent )= parent[bb].take() else { + let Some(parent)= parent[bb].take() else { break }; let no_cycle = stack.insert(parent);