Skip to content

Commit 2d92f7d

Browse files
authored
[X86] Support lowering for APX promoted BMI instructions. (#77433)
R16-R31 was added into GPRs in #70958, This patch supports the lowering for promoted BMI instructions in EVEX space, enc/dec has been supported in #73899. RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
1 parent f661709 commit 2d92f7d

11 files changed

+1091
-57
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

+26-16
Original file line numberDiff line numberDiff line change
@@ -4086,14 +4086,17 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
40864086
SDValue Control;
40874087
unsigned ROpc, MOpc;
40884088

4089+
#define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC)
40894090
if (!PreferBEXTR) {
40904091
assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
40914092
// If we can't make use of BEXTR then we can't fuse shift+mask stages.
40924093
// Let's perform the mask first, and apply shift later. Note that we need to
40934094
// widen the mask to account for the fact that we'll apply shift afterwards!
40944095
Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
4095-
ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
4096-
MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
4096+
ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rr)
4097+
: GET_EGPR_IF_ENABLED(X86::BZHI32rr);
4098+
MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rm)
4099+
: GET_EGPR_IF_ENABLED(X86::BZHI32rm);
40974100
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
40984101
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
40994102
} else {
@@ -4108,8 +4111,10 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
41084111
} else {
41094112
assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
41104113
// BMI requires the immediate to placed in a register.
4111-
ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
4112-
MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
4114+
ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rr)
4115+
: GET_EGPR_IF_ENABLED(X86::BEXTR32rr);
4116+
MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rm)
4117+
: GET_EGPR_IF_ENABLED(X86::BEXTR32rm);
41134118
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
41144119
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
41154120
}
@@ -5482,25 +5487,30 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
54825487
switch (NVT.SimpleTy) {
54835488
default: llvm_unreachable("Unsupported VT!");
54845489
case MVT::i32:
5485-
Opc = UseMULXHi ? X86::MULX32Hrr :
5486-
UseMULX ? X86::MULX32rr :
5487-
IsSigned ? X86::IMUL32r : X86::MUL32r;
5488-
MOpc = UseMULXHi ? X86::MULX32Hrm :
5489-
UseMULX ? X86::MULX32rm :
5490-
IsSigned ? X86::IMUL32m : X86::MUL32m;
5490+
Opc = UseMULXHi ? X86::MULX32Hrr
5491+
: UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rr)
5492+
: IsSigned ? X86::IMUL32r
5493+
: X86::MUL32r;
5494+
MOpc = UseMULXHi ? X86::MULX32Hrm
5495+
: UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rm)
5496+
: IsSigned ? X86::IMUL32m
5497+
: X86::MUL32m;
54915498
LoReg = UseMULX ? X86::EDX : X86::EAX;
54925499
HiReg = X86::EDX;
54935500
break;
54945501
case MVT::i64:
5495-
Opc = UseMULXHi ? X86::MULX64Hrr :
5496-
UseMULX ? X86::MULX64rr :
5497-
IsSigned ? X86::IMUL64r : X86::MUL64r;
5498-
MOpc = UseMULXHi ? X86::MULX64Hrm :
5499-
UseMULX ? X86::MULX64rm :
5500-
IsSigned ? X86::IMUL64m : X86::MUL64m;
5502+
Opc = UseMULXHi ? X86::MULX64Hrr
5503+
: UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rr)
5504+
: IsSigned ? X86::IMUL64r
5505+
: X86::MUL64r;
5506+
MOpc = UseMULXHi ? X86::MULX64Hrm
5507+
: UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rm)
5508+
: IsSigned ? X86::IMUL64m
5509+
: X86::MUL64m;
55015510
LoReg = UseMULX ? X86::RDX : X86::RAX;
55025511
HiReg = X86::RDX;
55035512
break;
5513+
#undef GET_EGPR_IF_ENABLED
55045514
}
55055515

55065516
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;

llvm/lib/Target/X86/X86InstrArithmetic.td

+11-5
Original file line numberDiff line numberDiff line change
@@ -1338,17 +1338,23 @@ defm ANDN32 : AndN<Xi32, "_EVEX">, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode]
13381338
defm ANDN64 : AndN<Xi64, "_EVEX">, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>;
13391339
}
13401340

1341-
let Predicates = [HasBMI], AddedComplexity = -6 in {
1341+
multiclass Andn_Pats<string suffix> {
13421342
def : Pat<(and (not GR32:$src1), GR32:$src2),
1343-
(ANDN32rr GR32:$src1, GR32:$src2)>;
1343+
(!cast<Instruction>(ANDN32rr#suffix) GR32:$src1, GR32:$src2)>;
13441344
def : Pat<(and (not GR64:$src1), GR64:$src2),
1345-
(ANDN64rr GR64:$src1, GR64:$src2)>;
1345+
(!cast<Instruction>(ANDN64rr#suffix) GR64:$src1, GR64:$src2)>;
13461346
def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
1347-
(ANDN32rm GR32:$src1, addr:$src2)>;
1347+
(!cast<Instruction>(ANDN32rm#suffix) GR32:$src1, addr:$src2)>;
13481348
def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
1349-
(ANDN64rm GR64:$src1, addr:$src2)>;
1349+
(!cast<Instruction>(ANDN64rm#suffix) GR64:$src1, addr:$src2)>;
13501350
}
13511351

1352+
let Predicates = [HasBMI, NoEGPR], AddedComplexity = -6 in
1353+
defm : Andn_Pats<"">;
1354+
1355+
let Predicates = [HasBMI, HasEGPR], AddedComplexity = -6 in
1356+
defm : Andn_Pats<"_EVEX">;
1357+
13521358
//===----------------------------------------------------------------------===//
13531359
// MULX Instruction
13541360
//

llvm/lib/Target/X86/X86InstrMisc.td

+43-15
Original file line numberDiff line numberDiff line change
@@ -1241,43 +1241,49 @@ let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in {
12411241
defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX;
12421242
}
12431243

1244-
let Predicates = [HasBMI] in {
1244+
multiclass Bls_Pats<string suffix> {
12451245
// FIXME(1): patterns for the load versions are not implemented
12461246
// FIXME(2): By only matching `add_su` and `ineg_su` we may emit
12471247
// extra `mov` instructions if `src` has future uses. It may be better
12481248
// to always match if `src` has more users.
12491249
def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
1250-
(BLSR32rr GR32:$src)>;
1250+
(!cast<Instruction>(BLSR32rr#suffix) GR32:$src)>;
12511251
def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
1252-
(BLSR64rr GR64:$src)>;
1252+
(!cast<Instruction>(BLSR64rr#suffix) GR64:$src)>;
12531253

12541254
def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
1255-
(BLSMSK32rr GR32:$src)>;
1255+
(!cast<Instruction>(BLSMSK32rr#suffix) GR32:$src)>;
12561256
def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
1257-
(BLSMSK64rr GR64:$src)>;
1257+
(!cast<Instruction>(BLSMSK64rr#suffix) GR64:$src)>;
12581258

12591259
def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
1260-
(BLSI32rr GR32:$src)>;
1260+
(!cast<Instruction>(BLSI32rr#suffix) GR32:$src)>;
12611261
def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
1262-
(BLSI64rr GR64:$src)>;
1262+
(!cast<Instruction>(BLSI64rr#suffix) GR64:$src)>;
12631263

12641264
// Versions to match flag producing ops.
12651265
def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
1266-
(BLSR32rr GR32:$src)>;
1266+
(!cast<Instruction>(BLSR32rr#suffix) GR32:$src)>;
12671267
def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
1268-
(BLSR64rr GR64:$src)>;
1268+
(!cast<Instruction>(BLSR64rr#suffix) GR64:$src)>;
12691269

12701270
def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
1271-
(BLSMSK32rr GR32:$src)>;
1271+
(!cast<Instruction>(BLSMSK32rr#suffix) GR32:$src)>;
12721272
def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
1273-
(BLSMSK64rr GR64:$src)>;
1273+
(!cast<Instruction>(BLSMSK64rr#suffix) GR64:$src)>;
12741274

12751275
def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
1276-
(BLSI32rr GR32:$src)>;
1276+
(!cast<Instruction>(BLSI32rr#suffix) GR32:$src)>;
12771277
def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
1278-
(BLSI64rr GR64:$src)>;
1278+
(!cast<Instruction>(BLSI64rr#suffix) GR64:$src)>;
12791279
}
12801280

1281+
let Predicates = [HasBMI, NoEGPR] in
1282+
defm : Bls_Pats<"">;
1283+
1284+
let Predicates = [HasBMI, HasEGPR] in
1285+
defm : Bls_Pats<"_EVEX">;
1286+
12811287
multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
12821288
X86FoldableSchedWrite sched, string Suffix = ""> {
12831289
let SchedRW = [sched], Form = MRMSrcReg4VOp3 in
@@ -1324,7 +1330,7 @@ def AndMask64 : ImmLeaf<i64, [{
13241330
}]>;
13251331

13261332
// Use BEXTR for 64-bit 'and' with large immediate 'mask'.
1327-
let Predicates = [HasBMI, NoBMI2, NoTBM] in {
1333+
let Predicates = [HasBMI, NoBMI2, NoTBM, NoEGPR] in {
13281334
def : Pat<(and GR64:$src, AndMask64:$mask),
13291335
(BEXTR64rr GR64:$src,
13301336
(SUBREG_TO_REG (i64 0),
@@ -1335,8 +1341,19 @@ let Predicates = [HasBMI, NoBMI2, NoTBM] in {
13351341
(MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
13361342
}
13371343

1344+
let Predicates = [HasBMI, NoBMI2, NoTBM, HasEGPR] in {
1345+
def : Pat<(and GR64:$src, AndMask64:$mask),
1346+
(BEXTR64rr_EVEX GR64:$src,
1347+
(SUBREG_TO_REG (i64 0),
1348+
(MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
1349+
def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
1350+
(BEXTR64rm_EVEX addr:$src,
1351+
(SUBREG_TO_REG (i64 0),
1352+
(MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
1353+
}
1354+
13381355
// Use BZHI for 64-bit 'and' with large immediate 'mask'.
1339-
let Predicates = [HasBMI2, NoTBM] in {
1356+
let Predicates = [HasBMI2, NoTBM, NoEGPR] in {
13401357
def : Pat<(and GR64:$src, AndMask64:$mask),
13411358
(BZHI64rr GR64:$src,
13421359
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
@@ -1347,6 +1364,17 @@ let Predicates = [HasBMI2, NoTBM] in {
13471364
(MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
13481365
}
13491366

1367+
let Predicates = [HasBMI2, NoTBM, HasEGPR] in {
1368+
def : Pat<(and GR64:$src, AndMask64:$mask),
1369+
(BZHI64rr_EVEX GR64:$src,
1370+
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
1371+
(MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
1372+
def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
1373+
(BZHI64rm_EVEX addr:$src,
1374+
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
1375+
(MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
1376+
}
1377+
13501378
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
13511379
X86MemOperand x86memop, SDPatternOperator OpNode,
13521380
PatFrag ld_frag, string Suffix = ""> {

llvm/lib/Target/X86/X86InstrShiftRotate.td

+27-20
Original file line numberDiff line numberDiff line change
@@ -284,32 +284,32 @@ defm SHRX64: ShiftX<"shrx", Xi64>, XD;
284284
defm SHLX32: ShiftX<"shlx", Xi32>, PD;
285285
defm SHLX64: ShiftX<"shlx", Xi64>, PD;
286286

287-
multiclass RORX_Pats {
287+
multiclass RORX_Pats<string suffix> {
288288
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
289289
let AddedComplexity = 10 in {
290290
def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
291-
(RORX32ri GR32:$src, imm:$shamt)>;
291+
(!cast<Instruction>(RORX32ri#suffix) GR32:$src, imm:$shamt)>;
292292
def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
293-
(RORX64ri GR64:$src, imm:$shamt)>;
293+
(!cast<Instruction>(RORX64ri#suffix) GR64:$src, imm:$shamt)>;
294294

295295
def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
296-
(RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
296+
(!cast<Instruction>(RORX32ri#suffix) GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
297297
def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
298-
(RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
298+
(!cast<Instruction>(RORX64ri#suffix) GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
299299
}
300300

301301
def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
302-
(RORX32mi addr:$src, imm:$shamt)>;
302+
(!cast<Instruction>(RORX32mi#suffix) addr:$src, imm:$shamt)>;
303303
def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
304-
(RORX64mi addr:$src, imm:$shamt)>;
304+
(!cast<Instruction>(RORX64mi#suffix) addr:$src, imm:$shamt)>;
305305

306306
def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
307-
(RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
307+
(!cast<Instruction>(RORX32mi#suffix) addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
308308
def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
309-
(RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
309+
(!cast<Instruction>(RORX64mi#suffix) addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
310310
}
311311

312-
multiclass ShiftX_Pats<SDNode op> {
312+
multiclass ShiftX_Pats<SDNode op, string suffix = ""> {
313313
// Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
314314
// immediate shift, i.e. the following code is considered better
315315
//
@@ -325,16 +325,16 @@ multiclass ShiftX_Pats<SDNode op> {
325325
//
326326
let AddedComplexity = 1 in {
327327
def : Pat<(op GR32:$src1, GR8:$src2),
328-
(!cast<Instruction>(NAME#"32rr") GR32:$src1,
328+
(!cast<Instruction>(NAME#"32rr"#suffix) GR32:$src1,
329329
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
330330
def : Pat<(op GR64:$src1, GR8:$src2),
331-
(!cast<Instruction>(NAME#"64rr") GR64:$src1,
331+
(!cast<Instruction>(NAME#"64rr"#suffix) GR64:$src1,
332332
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
333333
def : Pat<(op GR32:$src1, (shiftMask32 GR8:$src2)),
334-
(!cast<Instruction>(NAME#"32rr") GR32:$src1,
334+
(!cast<Instruction>(NAME#"32rr"#suffix) GR32:$src1,
335335
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
336336
def : Pat<(op GR64:$src1, (shiftMask64 GR8:$src2)),
337-
(!cast<Instruction>(NAME#"64rr") GR64:$src1,
337+
(!cast<Instruction>(NAME#"64rr"#suffix) GR64:$src1,
338338
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
339339
}
340340
// We prefer to use
@@ -348,22 +348,29 @@ multiclass ShiftX_Pats<SDNode op> {
348348
//
349349
// This priority is enforced by IsProfitableToFoldLoad.
350350
def : Pat<(op (loadi32 addr:$src1), GR8:$src2),
351-
(!cast<Instruction>(NAME#"32rm") addr:$src1,
351+
(!cast<Instruction>(NAME#"32rm"#suffix) addr:$src1,
352352
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
353353
def : Pat<(op (loadi64 addr:$src1), GR8:$src2),
354-
(!cast<Instruction>(NAME#"64rm") addr:$src1,
354+
(!cast<Instruction>(NAME#"64rm"#suffix) addr:$src1,
355355
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
356356
def : Pat<(op (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
357-
(!cast<Instruction>(NAME#"32rm") addr:$src1,
357+
(!cast<Instruction>(NAME#"32rm"#suffix) addr:$src1,
358358
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
359359
def : Pat<(op (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
360-
(!cast<Instruction>(NAME#"64rm") addr:$src1,
360+
(!cast<Instruction>(NAME#"64rm"#suffix) addr:$src1,
361361
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
362362
}
363363

364-
let Predicates = [HasBMI2] in {
365-
defm : RORX_Pats;
364+
let Predicates = [HasBMI2, NoEGPR] in {
365+
defm : RORX_Pats<"">;
366366
defm SARX : ShiftX_Pats<sra>;
367367
defm SHRX : ShiftX_Pats<srl>;
368368
defm SHLX : ShiftX_Pats<shl>;
369369
}
370+
371+
let Predicates = [HasBMI2, HasEGPR] in {
372+
defm : RORX_Pats<"_EVEX">;
373+
defm SARX : ShiftX_Pats<sra, "_EVEX">;
374+
defm SHRX : ShiftX_Pats<srl, "_EVEX">;
375+
defm SHLX : ShiftX_Pats<shl, "_EVEX">;
376+
}

0 commit comments

Comments
 (0)