Skip to content

Commit 67cce1c

Browse files
committed
[X86] Adjust some IceLake fp shuffle schedule classes (PR48110)
The IceLake scheduler model is still mainly a copy of the SkylakeServer model. This patch adjusts the fp shuffle classes to account for most instructions now working on Port 1 as well as Port 5. This is based off Agner + uops.info as well as the PR48110 report. Differential Revision: https://reviews.llvm.org/D115752
1 parent c1340b9 commit 67cce1c

File tree

6 files changed

+97
-84
lines changed

6 files changed

+97
-84
lines changed

llvm/lib/Target/X86/X86SchedIceLake.td

+24-11
Original file line numberDiff line numberDiff line change
@@ -331,12 +331,12 @@ defm : ICXWriteResPair<WriteFLogicZ, [ICXPort05], 1, [1], 1, 7>;
331331
defm : ICXWriteResPair<WriteFTest, [ICXPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
332332
defm : ICXWriteResPair<WriteFTestY, [ICXPort0], 2, [1], 1, 7>;
333333
defm : ICXWriteResPair<WriteFTestZ, [ICXPort0], 2, [1], 1, 7>;
334-
defm : ICXWriteResPair<WriteFShuffle, [ICXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
335-
defm : ICXWriteResPair<WriteFShuffleY, [ICXPort5], 1, [1], 1, 7>;
336-
defm : ICXWriteResPair<WriteFShuffleZ, [ICXPort5], 1, [1], 1, 7>;
337-
defm : ICXWriteResPair<WriteFVarShuffle, [ICXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
338-
defm : ICXWriteResPair<WriteFVarShuffleY, [ICXPort5], 1, [1], 1, 7>;
339-
defm : ICXWriteResPair<WriteFVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
334+
defm : ICXWriteResPair<WriteFShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector shuffles.
335+
defm : ICXWriteResPair<WriteFShuffleY, [ICXPort15], 1, [1], 1, 7>;
336+
defm : ICXWriteResPair<WriteFShuffleZ, [ICXPort5], 1, [1], 1, 7>;
337+
defm : ICXWriteResPair<WriteFVarShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector variable shuffles.
338+
defm : ICXWriteResPair<WriteFVarShuffleY, [ICXPort15], 1, [1], 1, 7>;
339+
defm : ICXWriteResPair<WriteFVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
340340
defm : ICXWriteResPair<WriteFBlend, [ICXPort015], 1, [1], 1, 6>; // Floating point vector blends.
341341
defm : ICXWriteResPair<WriteFBlendY,[ICXPort015], 1, [1], 1, 7>;
342342
defm : ICXWriteResPair<WriteFBlendZ,[ICXPort015], 1, [1], 1, 7>;
@@ -665,8 +665,14 @@ def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r",
665665
"KMOV(B|D|Q|W)kr",
666666
"UCOM_F(P?)r",
667667
"VPBROADCAST(D|Q)rr",
668+
"(V?)INSERTPS(Z?)rr",
669+
"(V?)MOV(HL|LH)PS(Z?)rr",
670+
"(V?)MOVDDUP(Y|Z|Z128|Z256)?rr",
668671
"(V?)PALIGNR(Y|Z|Z128|Z256)?rri",
669-
"(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr")>;
672+
"(V?)PERMIL(PD|PS)(Y|Z|Z128|Z256)?ri",
673+
"(V?)PERMIL(PD|PS)(Y|Z|Z128|Z256)?rr",
674+
"(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr",
675+
"(V?)UNPCK(L|H)(PD|PS)(Y|Z|Z128|Z256)?rr")>;
670676

671677
def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> {
672678
let Latency = 1;
@@ -705,6 +711,7 @@ def: InstRW<[ICXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
705711
"VBLENDMPS(Z128|Z256)rr",
706712
"VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr",
707713
"(V?)PADD(B|D|Q|W)rr",
714+
"(V?)MOV(SD|SS)(Z?)rr",
708715
"VPBLENDD(Y?)rri",
709716
"VPBLENDMB(Z128|Z256)rr",
710717
"VPBLENDMD(Z128|Z256)rr",
@@ -1298,11 +1305,14 @@ def ICXWriteResGroup92 : SchedWriteRes<[ICXPort5,ICXPort23]> {
12981305
let NumMicroOps = 2;
12991306
let ResourceCycles = [1,1];
13001307
}
1301-
def: InstRW<[ICXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
1302-
"VMOVSSZrm(b?)",
1308+
def: InstRW<[ICXWriteResGroup92], (instregex "VMOV(SD|SS)Zrm(b?)",
13031309
"VPBROADCAST(B|W)(Z128)?rm",
1310+
"(V?)INSERTPS(Z?)rm",
13041311
"(V?)PALIGNR(Z128)?rmi",
1305-
"(V?)PACK(U|S)S(DW|WB)(Z128)?rm")>;
1312+
"(V?)PERMIL(PD|PS)(Z128)?m(b?)i",
1313+
"(V?)PERMIL(PD|PS)(Z128)?rm",
1314+
"(V?)PACK(U|S)S(DW|WB)(Z128)?rm",
1315+
"(V?)UNPCK(L|H)(PD|PS)(Z128)?rm")>;
13061316

13071317
def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort015]> {
13081318
let Latency = 7;
@@ -1542,7 +1552,10 @@ def: InstRW<[ICXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
15421552
"VPBROADCASTB(Z|Z256)rm(b?)",
15431553
"VPBROADCASTW(Z|Z256)rm(b?)",
15441554
"(V?)PALIGNR(Y|Z|Z256)rmi",
1545-
"(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm")>;
1555+
"(V?)PERMIL(PD|PS)(Y|Z|Z256)m(b?)i",
1556+
"(V?)PERMIL(PD|PS)(Y|Z|Z256)rm",
1557+
"(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm",
1558+
"(V?)UNPCK(L|H)(PD|PS)(Y|Z|Z256)rm")>;
15461559
def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm,
15471560
VPBROADCASTWYrm,
15481561
VPMOVSXBDYrm,

llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s

+29-29
Original file line numberDiff line numberDiff line change
@@ -1311,18 +1311,18 @@ vzeroupper
13111311
# CHECK-NEXT: 1 5 0.50 * vmovq (%rax), %xmm2
13121312
# CHECK-NEXT: 1 2 1.00 vmovq %xmm0, %rcx
13131313
# CHECK-NEXT: 2 1 1.00 * vmovq %xmm0, (%rax)
1314-
# CHECK-NEXT: 1 1 1.00 vmovsd %xmm0, %xmm1, %xmm2
1314+
# CHECK-NEXT: 1 1 0.33 vmovsd %xmm0, %xmm1, %xmm2
13151315
# CHECK-NEXT: 2 1 1.00 * vmovsd %xmm0, (%rax)
13161316
# CHECK-NEXT: 1 5 0.50 * vmovsd (%rax), %xmm2
1317-
# CHECK-NEXT: 1 1 1.00 vmovshdup %xmm0, %xmm2
1317+
# CHECK-NEXT: 1 1 0.50 vmovshdup %xmm0, %xmm2
13181318
# CHECK-NEXT: 1 6 0.50 * vmovshdup (%rax), %xmm2
1319-
# CHECK-NEXT: 1 1 1.00 vmovshdup %ymm0, %ymm2
1319+
# CHECK-NEXT: 1 1 0.50 vmovshdup %ymm0, %ymm2
13201320
# CHECK-NEXT: 1 7 0.50 * vmovshdup (%rax), %ymm2
1321-
# CHECK-NEXT: 1 1 1.00 vmovsldup %xmm0, %xmm2
1321+
# CHECK-NEXT: 1 1 0.50 vmovsldup %xmm0, %xmm2
13221322
# CHECK-NEXT: 1 6 0.50 * vmovsldup (%rax), %xmm2
1323-
# CHECK-NEXT: 1 1 1.00 vmovsldup %ymm0, %ymm2
1323+
# CHECK-NEXT: 1 1 0.50 vmovsldup %ymm0, %ymm2
13241324
# CHECK-NEXT: 1 7 0.50 * vmovsldup (%rax), %ymm2
1325-
# CHECK-NEXT: 1 1 1.00 vmovss %xmm0, %xmm1, %xmm2
1325+
# CHECK-NEXT: 1 1 0.33 vmovss %xmm0, %xmm1, %xmm2
13261326
# CHECK-NEXT: 2 1 1.00 * vmovss %xmm0, (%rax)
13271327
# CHECK-NEXT: 1 5 0.50 * vmovss (%rax), %xmm2
13281328
# CHECK-NEXT: 1 1 0.33 vmovupd %xmm0, %xmm2
@@ -1650,14 +1650,14 @@ vzeroupper
16501650
# CHECK-NEXT: 2 11 1.00 * vrsqrtps (%rax), %ymm2
16511651
# CHECK-NEXT: 1 4 1.00 vrsqrtss %xmm0, %xmm1, %xmm2
16521652
# CHECK-NEXT: 2 9 1.00 * vrsqrtss (%rax), %xmm1, %xmm2
1653-
# CHECK-NEXT: 1 1 1.00 vshufpd $1, %xmm0, %xmm1, %xmm2
1654-
# CHECK-NEXT: 2 7 1.00 * vshufpd $1, (%rax), %xmm1, %xmm2
1655-
# CHECK-NEXT: 1 1 1.00 vshufpd $1, %ymm0, %ymm1, %ymm2
1656-
# CHECK-NEXT: 2 8 1.00 * vshufpd $1, (%rax), %ymm1, %ymm2
1657-
# CHECK-NEXT: 1 1 1.00 vshufps $1, %xmm0, %xmm1, %xmm2
1658-
# CHECK-NEXT: 2 7 1.00 * vshufps $1, (%rax), %xmm1, %xmm2
1659-
# CHECK-NEXT: 1 1 1.00 vshufps $1, %ymm0, %ymm1, %ymm2
1660-
# CHECK-NEXT: 2 8 1.00 * vshufps $1, (%rax), %ymm1, %ymm2
1653+
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %xmm0, %xmm1, %xmm2
1654+
# CHECK-NEXT: 2 7 0.50 * vshufpd $1, (%rax), %xmm1, %xmm2
1655+
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %ymm0, %ymm1, %ymm2
1656+
# CHECK-NEXT: 2 8 0.50 * vshufpd $1, (%rax), %ymm1, %ymm2
1657+
# CHECK-NEXT: 1 1 0.50 vshufps $1, %xmm0, %xmm1, %xmm2
1658+
# CHECK-NEXT: 2 7 0.50 * vshufps $1, (%rax), %xmm1, %xmm2
1659+
# CHECK-NEXT: 1 1 0.50 vshufps $1, %ymm0, %ymm1, %ymm2
1660+
# CHECK-NEXT: 2 8 0.50 * vshufps $1, (%rax), %ymm1, %ymm2
16611661
# CHECK-NEXT: 1 18 6.00 vsqrtpd %xmm0, %xmm2
16621662
# CHECK-NEXT: 2 24 6.00 * vsqrtpd (%rax), %xmm2
16631663
# CHECK-NEXT: 1 18 12.00 vsqrtpd %ymm0, %ymm2
@@ -1738,7 +1738,7 @@ vzeroupper
17381738

17391739
# CHECK: Resource pressure per iteration:
17401740
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
1741-
# CHECK-NEXT: - 123.00 317.42 221.42 173.17 173.17 34.00 312.92 6.25 12.67 - -
1741+
# CHECK-NEXT: - 123.00 318.08 228.08 173.17 173.17 34.00 305.58 6.25 12.67 - -
17421742

17431743
# CHECK: Resource pressure by instruction:
17441744
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -2023,18 +2023,18 @@ vzeroupper
20232023
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - vmovq (%rax), %xmm2
20242024
# CHECK-NEXT: - - 1.00 - - - - - - - - - vmovq %xmm0, %rcx
20252025
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovq %xmm0, (%rax)
2026-
# CHECK-NEXT: - - - - - - - 1.00 - - - - vmovsd %xmm0, %xmm1, %xmm2
2026+
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovsd %xmm0, %xmm1, %xmm2
20272027
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovsd %xmm0, (%rax)
20282028
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - vmovsd (%rax), %xmm2
2029-
# CHECK-NEXT: - - - - - - - 1.00 - - - - vmovshdup %xmm0, %xmm2
2029+
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vmovshdup %xmm0, %xmm2
20302030
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - vmovshdup (%rax), %xmm2
2031-
# CHECK-NEXT: - - - - - - - 1.00 - - - - vmovshdup %ymm0, %ymm2
2031+
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vmovshdup %ymm0, %ymm2
20322032
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - vmovshdup (%rax), %ymm2
2033-
# CHECK-NEXT: - - - - - - - 1.00 - - - - vmovsldup %xmm0, %xmm2
2033+
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vmovsldup %xmm0, %xmm2
20342034
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - vmovsldup (%rax), %xmm2
2035-
# CHECK-NEXT: - - - - - - - 1.00 - - - - vmovsldup %ymm0, %ymm2
2035+
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vmovsldup %ymm0, %ymm2
20362036
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - vmovsldup (%rax), %ymm2
2037-
# CHECK-NEXT: - - - - - - - 1.00 - - - - vmovss %xmm0, %xmm1, %xmm2
2037+
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovss %xmm0, %xmm1, %xmm2
20382038
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 - - vmovss %xmm0, (%rax)
20392039
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - vmovss (%rax), %xmm2
20402040
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - - - vmovupd %xmm0, %xmm2
@@ -2362,14 +2362,14 @@ vzeroupper
23622362
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vrsqrtps (%rax), %ymm2
23632363
# CHECK-NEXT: - - 1.00 - - - - - - - - - vrsqrtss %xmm0, %xmm1, %xmm2
23642364
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - - - vrsqrtss (%rax), %xmm1, %xmm2
2365-
# CHECK-NEXT: - - - - - - - 1.00 - - - - vshufpd $1, %xmm0, %xmm1, %xmm2
2366-
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshufpd $1, (%rax), %xmm1, %xmm2
2367-
# CHECK-NEXT: - - - - - - - 1.00 - - - - vshufpd $1, %ymm0, %ymm1, %ymm2
2368-
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshufpd $1, (%rax), %ymm1, %ymm2
2369-
# CHECK-NEXT: - - - - - - - 1.00 - - - - vshufps $1, %xmm0, %xmm1, %xmm2
2370-
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshufps $1, (%rax), %xmm1, %xmm2
2371-
# CHECK-NEXT: - - - - - - - 1.00 - - - - vshufps $1, %ymm0, %ymm1, %ymm2
2372-
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - - - vshufps $1, (%rax), %ymm1, %ymm2
2365+
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vshufpd $1, %xmm0, %xmm1, %xmm2
2366+
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vshufpd $1, (%rax), %xmm1, %xmm2
2367+
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vshufpd $1, %ymm0, %ymm1, %ymm2
2368+
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vshufpd $1, (%rax), %ymm1, %ymm2
2369+
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vshufps $1, %xmm0, %xmm1, %xmm2
2370+
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vshufps $1, (%rax), %xmm1, %xmm2
2371+
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - - - vshufps $1, %ymm0, %ymm1, %ymm2
2372+
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - - - vshufps $1, (%rax), %ymm1, %ymm2
23732373
# CHECK-NEXT: - 6.00 1.00 - - - - - - - - - vsqrtpd %xmm0, %xmm2
23742374
# CHECK-NEXT: - 6.00 1.00 - 0.50 0.50 - - - - - - vsqrtpd (%rax), %xmm2
23752375
# CHECK-NEXT: - 12.00 1.00 - - - - - - - - - vsqrtpd %ymm0, %ymm2

0 commit comments

Comments
 (0)