Skip to content

Commit 96a7660

Browse files
committed
[CostModel][X86] Update baseline CTTZ/CTLZ costs for x86_64
Followup to llvm#123623 - now that the CMOV has been removed, the throughput has improved, reducing the benefit of vectorization on pre-x86-64-v3 CPUs
1 parent a94226f commit 96a7660

File tree

10 files changed

+200
-98
lines changed

10 files changed

+200
-98
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -4329,9 +4329,15 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
43294329
{ ISD::ABS, MVT::i64, { 1, 2, 3, 3 } }, // SUB+CMOV
43304330
{ ISD::BITREVERSE, MVT::i64, { 10, 12, 20, 22 } },
43314331
{ ISD::BSWAP, MVT::i64, { 1, 2, 1, 2 } },
4332-
{ ISD::CTLZ, MVT::i64, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
4332+
{ ISD::CTLZ, MVT::i64, { 1, 2, 3, 3 } }, // MOV+BSR+XOR
4333+
{ ISD::CTLZ, MVT::i32, { 1, 2, 3, 3 } }, // MOV+BSR+XOR
4334+
{ ISD::CTLZ, MVT::i16, { 2, 2, 3, 3 } }, // MOV+BSR+XOR
4335+
{ ISD::CTLZ, MVT::i8, { 2, 2, 4, 3 } }, // MOV+BSR+XOR
43334336
{ ISD::CTLZ_ZERO_UNDEF, MVT::i64,{ 1, 2, 2, 2 } }, // BSR+XOR
4334-
{ ISD::CTTZ, MVT::i64, { 2, 2, 3, 4 } }, // TEST+BSF+CMOV/BRANCH
4337+
{ ISD::CTTZ, MVT::i64, { 1, 2, 2, 2 } }, // MOV+BSF
4338+
{ ISD::CTTZ, MVT::i32, { 1, 2, 2, 2 } }, // MOV+BSF
4339+
{ ISD::CTTZ, MVT::i16, { 2, 2, 2, 2 } }, // MOV+BSF
4340+
{ ISD::CTTZ, MVT::i8, { 2, 2, 2, 2 } }, // MOV+BSF
43354341
{ ISD::CTTZ_ZERO_UNDEF, MVT::i64,{ 1, 2, 1, 2 } }, // BSF
43364342
{ ISD::CTPOP, MVT::i64, { 10, 6, 19, 19 } },
43374343
{ ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },

llvm/test/Analysis/CostModel/X86/ctlz-codesize.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)
1717

1818
define i64 @var_ctlz_i64(i64 %a) {
1919
; NOLZCNT-LABEL: 'var_ctlz_i64'
20-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
20+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
2121
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctlz
2222
;
2323
; LZCNT-LABEL: 'var_ctlz_i64'
@@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {
4343

4444
define i32 @var_ctlz_i32(i32 %a) {
4545
; NOLZCNT-LABEL: 'var_ctlz_i32'
46-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
46+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
4747
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctlz
4848
;
4949
; LZCNT-LABEL: 'var_ctlz_i32'
@@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {
6969

7070
define i16 @var_ctlz_i16(i16 %a) {
7171
; NOLZCNT-LABEL: 'var_ctlz_i16'
72-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
72+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
7373
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctlz
7474
;
7575
; LZCNT-LABEL: 'var_ctlz_i16'
@@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {
9595

9696
define i8 @var_ctlz_i8(i8 %a) {
9797
; NOLZCNT-LABEL: 'var_ctlz_i8'
98-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
98+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
9999
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctlz
100100
;
101101
; LZCNT-LABEL: 'var_ctlz_i8'

llvm/test/Analysis/CostModel/X86/ctlz-sizelatency.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)
1717

1818
define i64 @var_ctlz_i64(i64 %a) {
1919
; NOLZCNT-LABEL: 'var_ctlz_i64'
20-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
20+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
2121
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %ctlz
2222
;
2323
; LZCNT-LABEL: 'var_ctlz_i64'
@@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {
4343

4444
define i32 @var_ctlz_i32(i32 %a) {
4545
; NOLZCNT-LABEL: 'var_ctlz_i32'
46-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
46+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
4747
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %ctlz
4848
;
4949
; LZCNT-LABEL: 'var_ctlz_i32'
@@ -69,7 +69,7 @@ define i32 @var_ctlz_i32u(i32 %a) {
6969

7070
define i16 @var_ctlz_i16(i16 %a) {
7171
; NOLZCNT-LABEL: 'var_ctlz_i16'
72-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
72+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
7373
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %ctlz
7474
;
7575
; LZCNT-LABEL: 'var_ctlz_i16'
@@ -95,7 +95,7 @@ define i16 @var_ctlz_i16u(i16 %a) {
9595

9696
define i8 @var_ctlz_i8(i8 %a) {
9797
; NOLZCNT-LABEL: 'var_ctlz_i8'
98-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
98+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
9999
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %ctlz
100100
;
101101
; LZCNT-LABEL: 'var_ctlz_i8'

llvm/test/Analysis/CostModel/X86/ctlz.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)
1717

1818
define i64 @var_ctlz_i64(i64 %a) {
1919
; NOLZCNT-LABEL: 'var_ctlz_i64'
20-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
20+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
2121
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz
2222
;
2323
; LZCNT-LABEL: 'var_ctlz_i64'
@@ -43,7 +43,7 @@ define i64 @var_ctlz_i64u(i64 %a) {
4343

4444
define i32 @var_ctlz_i32(i32 %a) {
4545
; NOLZCNT-LABEL: 'var_ctlz_i32'
46-
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
46+
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
4747
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz
4848
;
4949
; LZCNT-LABEL: 'var_ctlz_i32'

llvm/test/Analysis/CostModel/X86/cttz-codesize.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ declare i8 @llvm.cttz.i8(i8, i1)
1818

1919
define i64 @var_cttz_i64(i64 %a) {
2020
; NOBMI-LABEL: 'var_cttz_i64'
21-
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
21+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
2222
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %cttz
2323
;
2424
; BMI-LABEL: 'var_cttz_i64'
@@ -40,7 +40,7 @@ define i64 @var_cttz_i64u(i64 %a) {
4040

4141
define i32 @var_cttz_i32(i32 %a) {
4242
; NOBMI-LABEL: 'var_cttz_i32'
43-
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
43+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
4444
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
4545
;
4646
; BMI-LABEL: 'var_cttz_i32'

llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ declare i8 @llvm.cttz.i8(i8, i1)
1818

1919
define i64 @var_cttz_i64(i64 %a) {
2020
; NOBMI-LABEL: 'var_cttz_i64'
21-
; NOBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
21+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
2222
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %cttz
2323
;
2424
; BMI-LABEL: 'var_cttz_i64'
@@ -44,7 +44,7 @@ define i64 @var_cttz_i64u(i64 %a) {
4444

4545
define i32 @var_cttz_i32(i32 %a) {
4646
; NOBMI-LABEL: 'var_cttz_i32'
47-
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
47+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
4848
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz
4949
;
5050
; BMI-LABEL: 'var_cttz_i32'
@@ -70,7 +70,7 @@ define i32 @var_cttz_i32u(i32 %a) {
7070

7171
define i16 @var_cttz_i16(i16 %a) {
7272
; NOBMI-LABEL: 'var_cttz_i16'
73-
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 false)
73+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 false)
7474
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %cttz
7575
;
7676
; BMI-LABEL: 'var_cttz_i16'
@@ -96,7 +96,7 @@ define i16 @var_cttz_i16u(i16 %a) {
9696

9797
define i8 @var_cttz_i8(i8 %a) {
9898
; NOBMI-LABEL: 'var_cttz_i8'
99-
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 false)
99+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 false)
100100
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %cttz
101101
;
102102
; BMI-LABEL: 'var_cttz_i8'

llvm/test/Analysis/CostModel/X86/cttz.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ declare i8 @llvm.cttz.i8(i8, i1)
1818

1919
define i64 @var_cttz_i64(i64 %a) {
2020
; NOBMI-LABEL: 'var_cttz_i64'
21-
; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
21+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 false)
2222
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %cttz
2323
;
2424
; BMI-LABEL: 'var_cttz_i64'
@@ -44,7 +44,7 @@ define i64 @var_cttz_i64u(i64 %a) {
4444

4545
define i32 @var_cttz_i32(i32 %a) {
4646
; NOBMI-LABEL: 'var_cttz_i32'
47-
; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
47+
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false)
4848
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %cttz
4949
;
5050
; BMI-LABEL: 'var_cttz_i32'

llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
232232

233233
define void @cttz(i32 %a, <16 x i32> %va) {
234234
; THRU-LABEL: 'cttz'
235-
; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
235+
; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
236236
; THRU-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
237237
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
238238
;
@@ -242,12 +242,12 @@ define void @cttz(i32 %a, <16 x i32> %va) {
242242
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
243243
;
244244
; SIZE-LABEL: 'cttz'
245-
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
245+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
246246
; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
247247
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
248248
;
249249
; SIZE_LATE-LABEL: 'cttz'
250-
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
250+
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
251251
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
252252
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
253253
;

0 commit comments

Comments
 (0)