Skip to content

Commit c2d44bd

Browse files
committed
[X86] Lower calls with clang.arc.attachedcall bundle
This patch adds support for lowering function calls with the `clang.arc.attachedcall` bundle. The goal is to expand such calls to the following sequence of instructions: callq @fn movq %rax, %rdi callq _objc_retainAutoreleasedReturnValue / _objc_unsafeClaimAutoreleasedReturnValue This sequence of instructions triggers Objective-C runtime optimizations, hence we want to ensure no instructions get moved in between them. This patch achieves that by adding a new CALL_RVMARKER ISD node, which gets turned into the CALL64_RVMARKER pseudo, which eventually gets expanded into the sequence mentioned above. The ObjC runtime function to call is determined by the argument in the bundle, which is passed through as a target constant to the pseudo. @ahatanak is working on using this attribute in the front- & middle-end. Together with the front- & middle-end changes, this should address PR31925 for X86. This is the X86 version of 46bc40e, which added similar support for AArch64. Reviewed By: ab Differential Revision: https://reviews.llvm.org/D94597
1 parent d54712a commit c2d44bd

8 files changed

+341
-13
lines changed

llvm/lib/Target/X86/X86ExpandPseudo.cpp

+79-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ class X86ExpandPseudo : public MachineFunctionPass {
6262
private:
6363
void ExpandICallBranchFunnel(MachineBasicBlock *MBB,
6464
MachineBasicBlock::iterator MBBI);
65-
65+
void expandCALL_RVMARKER(MachineBasicBlock &MBB,
66+
MachineBasicBlock::iterator MBBI);
6667
bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
6768
bool ExpandMBB(MachineBasicBlock &MBB);
6869

@@ -186,6 +187,78 @@ void X86ExpandPseudo::ExpandICallBranchFunnel(
186187
JTMBB->erase(JTInst);
187188
}
188189

190+
void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
191+
MachineBasicBlock::iterator MBBI) {
192+
// Expand CALL_RVMARKER pseudo to call instruction, followed by the special
193+
//"movq %rax, %rdi" marker.
194+
// TODO: Mark the sequence as bundle, to avoid passes moving other code
195+
// in between.
196+
MachineInstr &MI = *MBBI;
197+
198+
MachineInstr *OriginalCall;
199+
MachineOperand &CallTarget = MI.getOperand(1);
200+
assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
201+
"invalid operand for regular call");
202+
unsigned Opc = -1;
203+
if (MI.getOpcode() == X86::CALL64m_RVMARKER)
204+
Opc = X86::CALL64m;
205+
else if (MI.getOpcode() == X86::CALL64r_RVMARKER)
206+
Opc = X86::CALL64r;
207+
else if (MI.getOpcode() == X86::CALL64pcrel32_RVMARKER)
208+
Opc = X86::CALL64pcrel32;
209+
else
210+
llvm_unreachable("unexpected opcode");
211+
212+
OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
213+
unsigned OpStart = 1;
214+
bool RAXImplicitDead = false;
215+
for (; OpStart < MI.getNumOperands(); ++OpStart) {
216+
MachineOperand &Op = MI.getOperand(OpStart);
217+
// RAX may be 'implicit dead', if there are no other users of the return
218+
// value. We introduce a new use, so change it to 'implicit def'.
219+
if (Op.isReg() && Op.isImplicit() && Op.isDead() &&
220+
TRI->regsOverlap(Op.getReg(), X86::RAX)) {
221+
Op.setIsDead(false);
222+
Op.setIsDef(true);
223+
RAXImplicitDead = true;
224+
}
225+
OriginalCall->addOperand(Op);
226+
}
227+
228+
// Emit marker "movq %rax, %rdi". %rdi is not callee-saved, so it cannot be
229+
// live across the earlier call. The call to the ObjC runtime function returns
230+
// the first argument, so the value of %rax is unchanged after the ObjC
231+
// runtime call.
232+
auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::MOV64rr))
233+
.addReg(X86::RDI, RegState::Define)
234+
.addReg(X86::RAX)
235+
.getInstr();
236+
if (MI.shouldUpdateCallSiteInfo())
237+
MBB.getParent()->moveCallSiteInfo(&MI, Marker);
238+
239+
// Emit call to ObjC runtime.
240+
unsigned RuntimeCallType = MI.getOperand(0).getImm();
241+
assert(RuntimeCallType <= 1 && "objc runtime call type must be 0 or 1");
242+
Module *M = MBB.getParent()->getFunction().getParent();
243+
auto &Context = M->getContext();
244+
auto *I8PtrTy = PointerType::get(IntegerType::get(Context, 8), 0);
245+
FunctionCallee Fn = M->getOrInsertFunction(
246+
RuntimeCallType == 0 ? "objc_retainAutoreleasedReturnValue"
247+
: "objc_unsafeClaimAutoreleasedReturnValue",
248+
FunctionType::get(I8PtrTy, {I8PtrTy}, false));
249+
const uint32_t *RegMask =
250+
TRI->getCallPreservedMask(*MBB.getParent(), CallingConv::C);
251+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32))
252+
.addGlobalAddress(cast<GlobalValue>(Fn.getCallee()), 0, 0)
253+
.addRegMask(RegMask)
254+
.addReg(X86::RAX,
255+
RegState::Implicit |
256+
(RAXImplicitDead ? (RegState::Dead | RegState::Define)
257+
: RegState::Define))
258+
.getInstr();
259+
MI.eraseFromParent();
260+
}
261+
189262
/// If \p MBBI is a pseudo instruction, this method expands
190263
/// it to the corresponding (sequence of) actual instruction(s).
191264
/// \returns true if \p MBBI has been expanded.
@@ -521,6 +594,11 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
521594
MI.setDesc(TII->get(X86::TILEZERO));
522595
return true;
523596
}
597+
case X86::CALL64pcrel32_RVMARKER:
598+
case X86::CALL64r_RVMARKER:
599+
case X86::CALL64m_RVMARKER:
600+
expandCALL_RVMARKER(MBB, MBBI);
601+
return true;
524602
}
525603
llvm_unreachable("Previous switch has a fallthrough?");
526604
}

llvm/lib/Target/X86/X86ISelLowering.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "llvm/ADT/StringSwitch.h"
2929
#include "llvm/Analysis/BlockFrequencyInfo.h"
3030
#include "llvm/Analysis/EHPersonalities.h"
31+
#include "llvm/Analysis/ObjCARCUtil.h"
3132
#include "llvm/Analysis/ProfileSummaryInfo.h"
3233
#include "llvm/Analysis/VectorUtils.h"
3334
#include "llvm/CodeGen/IntrinsicLowering.h"
@@ -4430,9 +4431,27 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
44304431

44314432
if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
44324433
Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4434+
} else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4435+
// Calls with a "clang.arc.attachedcall" bundle are special. They should be
4436+
// expanded to the call, directly followed by a special marker sequence and
4437+
// a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4438+
assert(!isTailCall &&
4439+
"tail calls cannot be marked with clang.arc.attachedcall");
4440+
assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
4441+
4442+
// Add target constant to select ObjC runtime call just before the call
4443+
// target. RuntimeCallType == 0 selects objc_retainAutoreleasedReturnValue,
4444+
// RuntimeCallType == 0 selects objc_unsafeClaimAutoreleasedReturnValue when
4445+
// epxanding the pseudo.
4446+
unsigned RuntimeCallType =
4447+
objcarc::hasAttachedCallOpBundle(CLI.CB, true) ? 0 : 1;
4448+
Ops.insert(Ops.begin() + 1,
4449+
DAG.getTargetConstant(RuntimeCallType, dl, MVT::i32));
4450+
Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
44334451
} else {
44344452
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
44354453
}
4454+
44364455
InFlag = Chain.getValue(1);
44374456
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
44384457
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
@@ -31285,6 +31304,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3128531304
NODE_NAME_CASE(FLD)
3128631305
NODE_NAME_CASE(FST)
3128731306
NODE_NAME_CASE(CALL)
31307+
NODE_NAME_CASE(CALL_RVMARKER)
3128831308
NODE_NAME_CASE(BT)
3128931309
NODE_NAME_CASE(CMP)
3129031310
NODE_NAME_CASE(FCMP)

llvm/lib/Target/X86/X86ISelLowering.h

+4
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ namespace llvm {
7676
/// Same as call except it adds the NoTrack prefix.
7777
NT_CALL,
7878

79+
// Pseudo for a OBJC call that gets emitted together with a special
80+
// marker instruction.
81+
CALL_RVMARKER,
82+
7983
/// X86 compare and logical compare instructions.
8084
CMP,
8185
FCMP,

llvm/lib/Target/X86/X86InstrCompiler.td

+6
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,12 @@ def : Pat<(X86call (i64 tglobaladdr:$dst)),
11971197
def : Pat<(X86call (i64 texternalsym:$dst)),
11981198
(CALL64pcrel32 texternalsym:$dst)>;
11991199

1200+
def : Pat<(X86call_rvmarker (timm:$sel), (i64 texternalsym:$dst)),
1201+
(CALL64pcrel32_RVMARKER timm:$sel, texternalsym:$dst)>;
1202+
def : Pat<(X86call_rvmarker (timm:$sel), (i64 tglobaladdr:$dst)),
1203+
(CALL64pcrel32_RVMARKER timm:$sel, tglobaladdr:$dst)>;
1204+
1205+
12001206
// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
12011207
// can never use callee-saved registers. That is the purpose of the GR64_TC
12021208
// register classes.

llvm/lib/Target/X86/X86InstrControl.td

+16
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,22 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
415415
}
416416
}
417417

418+
let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
419+
Uses = [RSP, SSP],
420+
SchedRW = [WriteJump] in {
421+
def CALL64m_RVMARKER :
422+
PseudoI<(outs), (ins i32imm:$sel, i64mem:$dst), [(X86call_rvmarker timm:$sel, (loadi64 addr:$dst))]>,
423+
Requires<[In64BitMode]>;
424+
425+
def CALL64r_RVMARKER :
426+
PseudoI<(outs), (ins i32imm:$sel, GR64:$dst), [(X86call_rvmarker timm:$sel, GR64:$dst)]>,
427+
Requires<[In64BitMode]>;
428+
429+
def CALL64pcrel32_RVMARKER :
430+
PseudoI<(outs), (ins i32imm:$sel, i64i32imm_brtarget:$dst), []>,
431+
Requires<[In64BitMode]>;
432+
}
433+
418434
// Conditional tail calls are similar to the above, but they are branches
419435
// rather than barriers, and they use EFLAGS.
420436
let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,

llvm/lib/Target/X86/X86InstrInfo.td

+5
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,11 @@ def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
204204
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
205205
SDNPVariadic]>;
206206

207+
def X86call_rvmarker : SDNode<"X86ISD::CALL_RVMARKER", SDT_X86Call,
208+
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
209+
SDNPVariadic]>;
210+
211+
207212
def X86NoTrackCall : SDNode<"X86ISD::NT_CALL", SDT_X86Call,
208213
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
209214
SDNPVariadic]>;

llvm/test/CodeGen/X86/call-rv-marker.ll

+56-12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc -mtriple=x86_64-apple-macosx -o - %s | FileCheck --check-prefix=CHECK %s
1+
; RUN: llc -mtriple=x86_64-apple-macosx -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK %s
22

33
; TODO: support marker generation with GlobalISel
44
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -23,16 +23,33 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
2323
@g = global i8* null, align 8
2424
@fptr = global i8* ()* null, align 8
2525

26-
define i8* @rv_marker_1() {
27-
; CHECK-LABEL: rv_marker_1:
26+
define i8* @rv_marker_1_retain() {
27+
; CHECK-LABEL: rv_marker_1_retain:
2828
; CHECK: pushq %rax
2929
; CHECK-NEXT: .cfi_def_cfa_offset 16
3030
; CHECK-NEXT: callq _foo1
31+
; CHECK-NEXT: movq %rax, %rdi
32+
; CHECK-NEXT: callq _objc_retainAutoreleasedReturnValue
33+
; CHECK-NEXT: popq %rcx
34+
; CHECK-NEXT: retq
35+
;
36+
entry:
37+
%call = call i8* @foo1() [ "clang.arc.attachedcall"(i64 0) ]
38+
ret i8* %call
39+
}
40+
41+
define i8* @rv_marker_1_claim() {
42+
; CHECK-LABEL: rv_marker_1_claim:
43+
; CHECK: pushq %rax
44+
; CHECK-NEXT: .cfi_def_cfa_offset 16
45+
; CHECK-NEXT: callq _foo1
46+
; CHECK-NEXT: movq %rax, %rdi
47+
; CHECK-NEXT: callq _objc_unsafeClaimAutoreleasedReturnValue
3148
; CHECK-NEXT: popq %rcx
3249
; CHECK-NEXT: retq
3350
;
3451
entry:
35-
%call = call "rv_marker" i8* @foo1()
52+
%call = call i8* @foo1() [ "clang.arc.attachedcall"(i64 1) ]
3653
ret i8* %call
3754
}
3855

@@ -45,13 +62,15 @@ define void @rv_marker_2_select(i32 %c) {
4562
; CHECK-NEXT: adcl $0, %edi
4663
; CHECK-NEXT: callq _foo0
4764
; CHECK-NEXT: movq %rax, %rdi
65+
; CHECK-NEXT: callq _objc_retainAutoreleasedReturnValue
66+
; CHECK-NEXT: movq %rax, %rdi
4867
; CHECK-NEXT: popq %rax
4968
; CHECK-NEXT: jmp _foo2
5069
;
5170
entry:
5271
%tobool.not = icmp eq i32 %c, 0
5372
%.sink = select i1 %tobool.not, i32 2, i32 1
54-
%call1 = call "rv_marker" i8* @foo0(i32 %.sink)
73+
%call1 = call i8* @foo0(i32 %.sink) [ "clang.arc.attachedcall"(i64 0) ]
5574
tail call void @foo2(i8* %call1)
5675
ret void
5776
}
@@ -67,11 +86,13 @@ define void @rv_marker_3() personality i8* bitcast (i32 (...)* @__gxx_personalit
6786
; CHECK-NEXT: .cfi_offset %rbx, -24
6887
; CHECK-NEXT: .cfi_offset %r14, -16
6988
; CHECK-NEXT: callq _foo1
89+
; CHECK-NEXT: movq %rax, %rdi
90+
; CHECK-NEXT: callq _objc_retainAutoreleasedReturnValue
7091
; CHECK-NEXT: movq %rax, %rbx
7192
; CHECK-NEXT: Ltmp0:
7293
;
7394
entry:
74-
%call = call "rv_marker" i8* @foo1()
95+
%call = call i8* @foo1() [ "clang.arc.attachedcall"(i64 0) ]
7596
invoke void @objc_object(i8* %call) #5
7697
to label %invoke.cont unwind label %lpad
7798

@@ -98,13 +119,15 @@ define void @rv_marker_4() personality i8* bitcast (i32 (...)* @__gxx_personalit
98119
; CHECK-NEXT: .cfi_offset %r14, -16
99120
; CHECK-NEXT: Ltmp3:
100121
; CHECK-NEXT: callq _foo1
122+
; CHECK-NEXT: movq %rax, %rdi
123+
; CHECK-NEXT: callq _objc_retainAutoreleasedReturnValue
101124
; CHECK-NEXT: Ltmp4:
102125
;
103126
entry:
104127
%s = alloca %struct.S, align 1
105128
%0 = getelementptr inbounds %struct.S, %struct.S* %s, i64 0, i32 0
106129
call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) #2
107-
%call = invoke "rv_marker" i8* @foo1()
130+
%call = invoke i8* @foo1() [ "clang.arc.attachedcall"(i64 0) ]
108131
to label %invoke.cont unwind label %lpad
109132

110133
invoke.cont: ; preds = %entry
@@ -135,12 +158,16 @@ ehcleanup: ; preds = %lpad1, %lpad
135158
resume { i8*, i32 } %.pn
136159
}
137160

161+
; TODO: This should use "callq *_fptr(%rip)".
138162
define i8* @rv_marker_5_indirect_call() {
139163
; CHECK-LABEL: rv_marker_5_indirect_call
140164
; CHECK: pushq %rbx
141165
; CHECK-NEXT: .cfi_def_cfa_offset 16
142166
; CHECK-NEXT: .cfi_offset %rbx, -16
143-
; CHECK-NEXT: callq *_fptr(%rip)
167+
; CHECK-NEXT: movq _fptr(%rip), %rax
168+
; CHECK-NEXT: callq *%rax
169+
; CHECK-NEXT: movq %rax, %rdi
170+
; CHECK-NEXT: callq _objc_retainAutoreleasedReturnValue
144171
; CHECK-NEXT: movq %rax, %rbx
145172
; CHECK-NEXT: movq %rax, %rdi
146173
; CHECK-NEXT: callq _foo2
@@ -149,13 +176,13 @@ define i8* @rv_marker_5_indirect_call() {
149176
; CHECK-NEXT: retq
150177
;
151178
entry:
152-
%0 = load i8* ()*, i8* ()** @fptr, align 8
153-
%call = call "rv_marker" i8* %0()
179+
%lv = load i8* ()*, i8* ()** @fptr, align 8
180+
%call = call i8* %lv() [ "clang.arc.attachedcall"(i64 0) ]
154181
tail call void @foo2(i8* %call)
155182
ret i8* %call
156183
}
157184

158-
declare void @foo(i64, i64, i64)
185+
declare i8* @foo(i64, i64, i64)
159186

160187
define void @rv_marker_multiarg(i64 %a, i64 %b, i64 %c) {
161188
; CHECK-LABEL: rv_marker_multiarg
@@ -165,11 +192,28 @@ define void @rv_marker_multiarg(i64 %a, i64 %b, i64 %c) {
165192
; CHECK-NEXT: movq %rdx, %rdi
166193
; CHECK-NEXT: movq %rax, %rdx
167194
; CHECK-NEXT: callq _foo
195+
; CHECK-NEXT: movq %rax, %rdi
196+
; CHECK-NEXT: callq _objc_retainAutoreleasedReturnValue
168197
; CHECK-NEXT: popq %rax
169198
; CHECK-NEXT: retq
170199
;
171-
call "rv_marker" void @foo(i64 %c, i64 %b, i64 %a)
200+
%r = call i8* @foo(i64 %c, i64 %b, i64 %a) [ "clang.arc.attachedcall"(i64 0) ]
172201
ret void
173202
}
174203

204+
define void @test_nonlazybind() {
205+
; CHECK-LABEL: _test_nonlazybind:
206+
; CHECK: bb.0:
207+
; CHECK-NEXT: pushq %rax
208+
; CHECK-NEXT: .cfi_def_cfa_offset 16
209+
; CHECK-NEXT: callq *_foo_nonlazybind@GOTPCREL(%rip)
210+
; CHECK-NEXT: movq %rax, %rdi
211+
; CHECK-NEXT: callq _objc_retainAutoreleasedReturnValue
212+
;
213+
%call1 = notail call i8* @foo_nonlazybind() [ "clang.arc.attachedcall"(i64 0) ]
214+
ret void
215+
}
216+
217+
declare i8* @foo_nonlazybind() nonlazybind
218+
175219
declare i32 @__gxx_personality_v0(...)

0 commit comments

Comments
 (0)