Skip to content

Commit 24c8fef

Browse files
authored
[AMDGPU] Add target hook to isGlobalMemoryObject (llvm#112781) (#74)
We want special handing for IGLP instructions in the scheduler but they should still be treated like they have side effects by other passes. Add a target hook to the ScheduleDAGInstrs DAG builder so that we have more control over this. Fixes: SWDEV-447200 (cherry picked from commit 657fb44)
1 parent 6183d22 commit 24c8fef

10 files changed

+86
-49
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

+4
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ class TargetInstrInfo : public MCInstrInfo {
135135
const TargetRegisterInfo *TRI,
136136
const MachineFunction &MF) const;
137137

138+
/// Returns true if MI is an instruction we are unable to reason about
139+
/// (like a call or something with unmodeled side effects).
140+
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const;
141+
138142
/// Return true if the instruction is trivially rematerializable, meaning it
139143
/// has no side effects and requires no operands that aren't always available.
140144
/// This means the only allowed uses are constants and unallocatable physical

llvm/lib/CodeGen/ScheduleDAGInstrs.cpp

+3-7
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "llvm/CodeGen/ScheduleDAG.h"
3636
#include "llvm/CodeGen/ScheduleDFS.h"
3737
#include "llvm/CodeGen/SlotIndexes.h"
38+
#include "llvm/CodeGen/TargetInstrInfo.h"
3839
#include "llvm/CodeGen/TargetRegisterInfo.h"
3940
#include "llvm/CodeGen/TargetSubtargetInfo.h"
4041
#include "llvm/Config/llvm-config.h"
@@ -547,12 +548,6 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
547548
}
548549
}
549550

550-
/// Returns true if MI is an instruction we are unable to reason about
551-
/// (like a call or something with unmodeled side effects).
552-
static inline bool isGlobalMemoryObject(MachineInstr *MI) {
553-
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
554-
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
555-
}
556551

557552
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
558553
unsigned Latency) {
@@ -901,8 +896,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
901896
// isLoadFromStackSLot are not usable after stack slots are lowered to
902897
// actual addresses).
903898

899+
const TargetInstrInfo *TII = ST.getInstrInfo();
904900
// This is a barrier event that acts as a pivotal node in the DAG.
905-
if (isGlobalMemoryObject(&MI)) {
901+
if (TII->isGlobalMemoryObject(&MI)) {
906902

907903
// Become the barrier chain.
908904
if (BarrierChain)

llvm/lib/CodeGen/TargetInstrInfo.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -1913,3 +1913,8 @@ bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
19131913
}
19141914
return true;
19151915
}
1916+
1917+
bool TargetInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const {
1918+
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
1919+
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
1920+
}

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

-21
Original file line numberDiff line numberDiff line change
@@ -240,23 +240,6 @@ class SchedGroup {
240240
}
241241
};
242242

243-
// Remove all existing edges from a SCHED_BARRIER or SCHED_GROUP_BARRIER.
244-
static void resetEdges(SUnit &SU, ScheduleDAGInstrs *DAG) {
245-
assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER ||
246-
SU.getInstr()->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
247-
SU.getInstr()->getOpcode() == AMDGPU::IGLP_OPT);
248-
249-
while (!SU.Preds.empty())
250-
for (auto &P : SU.Preds)
251-
SU.removePred(P);
252-
253-
while (!SU.Succs.empty())
254-
for (auto &S : SU.Succs)
255-
for (auto &SP : S.getSUnit()->Preds)
256-
if (SP.getSUnit() == &SU)
257-
S.getSUnit()->removePred(SP);
258-
}
259-
260243
using SUToCandSGsPair = std::pair<SUnit *, SmallVector<int, 4>>;
261244
using SUsToCandSGsVec = SmallVector<SUToCandSGsPair, 4>;
262245

@@ -460,7 +443,6 @@ void PipelineSolver::makePipeline() {
460443
// Command line requested IGroupLP doesn't have SGBarr
461444
if (!SGBarr)
462445
continue;
463-
resetEdges(*SGBarr, DAG);
464446
SG.link(*SGBarr, false);
465447
}
466448
}
@@ -2576,7 +2558,6 @@ void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
25762558
initSchedGroupBarrierPipelineStage(R);
25772559
FoundSB = true;
25782560
} else if (Opc == AMDGPU::IGLP_OPT) {
2579-
resetEdges(*R, DAG);
25802561
if (!FoundSB && !FoundIGLP) {
25812562
FoundIGLP = true;
25822563
ShouldApplyIGLP = initIGLPOpt(*R);
@@ -2598,7 +2579,6 @@ void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
25982579
assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER);
25992580
// Remove all existing edges from the SCHED_BARRIER that were added due to the
26002581
// instruction having side effects.
2601-
resetEdges(SchedBarrier, DAG);
26022582
LLVM_DEBUG(dbgs() << "Building SchedGroup for SchedBarrier with Mask: "
26032583
<< MI.getOperand(0).getImm() << "\n");
26042584
auto InvertedMask =
@@ -2656,7 +2636,6 @@ void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
26562636
std::vector<SUnit>::reverse_iterator RIter) {
26572637
// Remove all existing edges from the SCHED_GROUP_BARRIER that were added due
26582638
// to the instruction having side effects.
2659-
resetEdges(*RIter, DAG);
26602639
MachineInstr &SGB = *RIter->getInstr();
26612640
assert(SGB.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER);
26622641
int32_t SGMask = SGB.getOperand(0).getImm();

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

+10-7
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,12 @@ static void getRegisterPressures(
188188
Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();
189189
}
190190

191+
// Return true if the instruction is mutually exclusive with all non-IGLP DAG
192+
// mutations, requiring all other mutations to be disabled.
193+
static bool isIGLPMutationOnly(unsigned Opcode) {
194+
return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
195+
}
196+
191197
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
192198
bool AtTop,
193199
const RegPressureTracker &RPTracker,
@@ -1014,8 +1020,7 @@ bool GCNSchedStage::initGCNRegion() {
10141020
StageID == GCNSchedStageID::ILPInitialSchedule) {
10151021
for (auto &I : DAG) {
10161022
Unsched.push_back(&I);
1017-
if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
1018-
I.getOpcode() == AMDGPU::IGLP_OPT)
1023+
if (isIGLPMutationOnly(I.getOpcode()))
10191024
DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
10201025
}
10211026
} else {
@@ -1749,11 +1754,9 @@ void GCNScheduleDAGMILive::updateRegionBoundaries(
17491754
}
17501755

17511756
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
1752-
return std::any_of(
1753-
DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) {
1754-
unsigned Opc = MI->getOpcode();
1755-
return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1756-
});
1757+
return any_of(*DAG, [](MachineBasicBlock::iterator MI) {
1758+
return isIGLPMutationOnly(MI->getOpcode());
1759+
});
17571760
}
17581761

17591762
GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -10013,3 +10013,10 @@ void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI,
1001310013
Op.setSubReg(AMDGPU::sub0);
1001410014
MI.addOperand(MachineOperand::CreateReg(NewVR, false, true));
1001510015
}
10016+
10017+
bool SIInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const {
10018+
if (isIGLP(*MI))
10019+
return false;
10020+
10021+
return TargetInstrInfo::isGlobalMemoryObject(MI);
10022+
}

llvm/lib/Target/AMDGPU/SIInstrInfo.h

+9
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
237237
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
238238
int64_t &Offset1) const override;
239239

240+
bool isGlobalMemoryObject(const MachineInstr *MI) const override;
241+
240242
bool getMemOperandsWithOffsetWidth(
241243
const MachineInstr &LdSt,
242244
SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
@@ -977,6 +979,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
977979
return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
978980
}
979981

982+
bool isIGLP(unsigned Opcode) const {
983+
return Opcode == AMDGPU::SCHED_BARRIER ||
984+
Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
985+
}
986+
987+
bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); }
988+
980989
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
981990
switch (Opcode) {
982991
case AMDGPU::S_WAITCNT_soft:

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@
2525
; GCN-NEXT: ; implicit-def: $vgpr79
2626
; GCN-NEXT: ; implicit-def: $vgpr80
2727
; GCN-NEXT: ; implicit-def: $vgpr91
28-
; GCN-NEXT: ;;#ASMSTART
29-
; GCN-NEXT: s_waitcnt vmcnt(8)
30-
; GCN-NEXT: ;;#ASMEND
3128
; GCN-NEXT: ; kill: killed $sgpr16_sgpr17_sgpr18_sgpr19
3229
; GCN-NEXT: ; iglp_opt mask(0x00000002)
3330
; GCN-NEXT: s_nop 1
@@ -477,6 +474,9 @@
477474
; GCN-NEXT: s_waitcnt lgkmcnt(0)
478475
; GCN-NEXT: buffer_inv sc0 sc1
479476
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[4:5], v[8:9], v[32:47]
477+
; GCN-NEXT: ;;#ASMSTART
478+
; GCN-NEXT: s_waitcnt vmcnt(8)
479+
; GCN-NEXT: ;;#ASMEND
480480
; GCN-NEXT: v_mov_b32_e32 v4, 0
481481
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[6:7], v[0:1], v[32:47]
482482
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[2:3], v[0:1], v[48:63]

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll

+34
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,40 @@ entry:
283283
ret void
284284
}
285285

286+
define amdgpu_kernel void @test_iglp_opt_asm_sideeffect(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
287+
; GCN-LABEL: test_iglp_opt_asm_sideeffect:
288+
; GCN: ; %bb.0: ; %entry
289+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
290+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
291+
; GCN-NEXT: ; iglp_opt mask(0x00000000)
292+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
293+
; GCN-NEXT: v_add_u32_e32 v1, s0, v0
294+
; GCN-NEXT: ds_read_b32 v1, v1
295+
; GCN-NEXT: v_add_u32_e32 v0, s1, v0
296+
; GCN-NEXT: v_mov_b32_e32 v2, s0
297+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
298+
; GCN-NEXT: ds_write_b32 v0, v1
299+
; GCN-NEXT: ;;#ASMSTART
300+
; GCN-NEXT: ;;#ASMEND
301+
; GCN-NEXT: ds_read_b32 v0, v2 offset:256
302+
; GCN-NEXT: v_mov_b32_e32 v1, s1
303+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
304+
; GCN-NEXT: ds_write_b32 v1, v0 offset:256
305+
; GCN-NEXT: s_endpgm
306+
entry:
307+
%idx = call i32 @llvm.amdgcn.workitem.id.x()
308+
%load.0.addr = getelementptr float, ptr addrspace(3) %in, i32 %idx
309+
%load.0 = load float, ptr addrspace(3) %load.0.addr
310+
%store.0.addr = getelementptr float, ptr addrspace(3) %out, i32 %idx
311+
store float %load.0, ptr addrspace(3) %store.0.addr
312+
call void asm sideeffect "", ""() #1
313+
call void @llvm.amdgcn.iglp.opt(i32 0) #1
314+
%load.1.addr = getelementptr float, ptr addrspace(3) %in, i32 64
315+
%load.1 = load float, ptr addrspace(3) %load.1.addr
316+
%store.1.addr = getelementptr float, ptr addrspace(3) %out, i32 64
317+
store float %load.1, ptr addrspace(3) %store.1.addr
318+
ret void
319+
}
286320

287321
declare void @llvm.amdgcn.iglp.opt(i32) #1
288322
declare i32 @llvm.amdgcn.workitem.id.x() #1

0 commit comments

Comments
 (0)