Skip to content

Commit 1697ede

Browse files
davemgreenNoumanAmir657
authored andcommitted
[ValueTracking] Compute KnownFP state from recursive select/phi. (llvm#113686)
Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from llvm#83200.
1 parent 9fd9308 commit 1697ede

File tree

4 files changed

+104
-14
lines changed

4 files changed

+104
-14
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6003,6 +6003,13 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
60036003
if (IncValue == P)
60046004
continue;
60056005

6006+
// If the Use is a select of this phi, use the fp class of the other
6007+
// operand to break the recursion.
6008+
Value *V;
6009+
if (match(IncValue, m_Select(m_Value(), m_Specific(P), m_Value(V))) ||
6010+
match(IncValue, m_Select(m_Value(), m_Value(V), m_Specific(P))))
6011+
IncValue = V;
6012+
60066013
KnownFPClass KnownSrc;
60076014
// Recurse, but cap the recursion to two levels, because we don't want
60086015
// to waste time spinning around in loops. We need at least depth 2 to

llvm/test/Transforms/Attributor/nofpclass-phiselect.ll

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -S < %s | FileCheck %s
33

44
define float @phi_select(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) {
5-
; CHECK-LABEL: define float @phi_select
5+
; CHECK-LABEL: define nofpclass(inf) float @phi_select
66
; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br label [[LOOP:%.*]]
@@ -25,6 +25,30 @@ exit:
2525
ret float %select
2626
}
2727

28+
define float @phi_select_c(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) {
29+
; CHECK-LABEL: define nofpclass(inf) float @phi_select_c
30+
; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] {
31+
; CHECK-NEXT: entry:
32+
; CHECK-NEXT: br label [[LOOP:%.*]]
33+
; CHECK: loop:
34+
; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ]
35+
; CHECK-NEXT: [[SELECT]] = select i1 [[C]], float [[ARG]], float [[PHI]]
36+
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
37+
; CHECK: exit:
38+
; CHECK-NEXT: ret float [[SELECT]]
39+
;
40+
entry:
41+
br label %loop
42+
43+
loop:
44+
%phi = phi float [ %base, %entry ], [ %select, %loop ]
45+
%select = select i1 %c, float %arg, float %phi
46+
br i1 %c, label %loop, label %exit
47+
48+
exit:
49+
ret float %select
50+
}
51+
2852
define float @phi_select_onlybase(i1 %c, float nofpclass(inf) %base, float %arg) {
2953
; CHECK-LABEL: define float @phi_select_onlybase
3054
; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float [[ARG:%.*]]) #[[ATTR0]] {

llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 %
4545
; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD7]]
4646
; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]])
4747
; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast <4 x float> [[TMP8]], [[TMP6]]
48-
; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[TMP9]], [[VEC_PHI]]
49-
; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[DOTNOT9]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP10]]
48+
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[DOTNOT9]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[TMP9]]
49+
; CHECK-NEXT: [[PREDPHI]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP10]]
5050
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5151
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5252
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,79 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
1212
; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
1313
; CHECK: [[FOR_BODY_PREHEADER]]:
1414
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
15+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[RAND_BLOCK_LENGTH]], 4
16+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER23:.*]], label %[[VECTOR_PH:.*]]
17+
; CHECK: [[VECTOR_PH]]:
18+
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
19+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Y]], i64 0
20+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
21+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <2 x double> poison, double [[Z]], i64 0
22+
; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT19]], <2 x double> poison, <2 x i32> zeroinitializer
23+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
24+
; CHECK: [[VECTOR_BODY]]:
25+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
26+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
27+
; CHECK-NEXT: [[VEC_PHI15:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
28+
; CHECK-NEXT: [[VEC_PHI16:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
29+
; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
30+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
31+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX]], i64 8
32+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 4
33+
; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x float>, ptr [[TMP23]], align 4
34+
; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
35+
; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[WIDE_LOAD18]] to <2 x double>
36+
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[BROADCAST_SPLAT]], [[TMP2]]
37+
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[BROADCAST_SPLAT]], [[TMP3]]
38+
; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x double> [[TMP4]], [[BROADCAST_SPLAT20]]
39+
; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <2 x double> [[TMP5]], [[BROADCAST_SPLAT20]]
40+
; CHECK-NEXT: [[TMP8:%.*]] = fcmp fast ogt <2 x double> [[TMP6]], zeroinitializer
41+
; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt <2 x double> [[TMP7]], zeroinitializer
42+
; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP6]], [[TMP6]]
43+
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[TMP7]], [[TMP7]]
44+
; CHECK-NEXT: [[TMP12:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP6]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
45+
; CHECK-NEXT: [[TMP13:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
46+
; CHECK-NEXT: [[TMP14]] = fadd fast <2 x double> [[TMP12]], [[VEC_PHI16]]
47+
; CHECK-NEXT: [[TMP15]] = fadd fast <2 x double> [[TMP13]], [[VEC_PHI17]]
48+
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP8]], <2 x double> [[TMP10]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
49+
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x double> [[TMP11]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
50+
; CHECK-NEXT: [[TMP18]] = fadd fast <2 x double> [[TMP16]], [[VEC_PHI]]
51+
; CHECK-NEXT: [[TMP19]] = fadd fast <2 x double> [[TMP17]], [[VEC_PHI15]]
52+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV]], 4
53+
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
54+
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
55+
; CHECK: [[MIDDLE_BLOCK]]:
56+
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x double> [[TMP19]], [[TMP18]]
57+
; CHECK-NEXT: [[TMP21:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX]])
58+
; CHECK-NEXT: [[BIN_RDX21:%.*]] = fadd fast <2 x double> [[TMP15]], [[TMP14]]
59+
; CHECK-NEXT: [[TMP22:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX21]])
60+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
61+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER23]]
62+
; CHECK: [[FOR_BODY_PREHEADER23]]:
63+
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
64+
; CHECK-NEXT: [[V1_012_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP21]], %[[MIDDLE_BLOCK]] ]
65+
; CHECK-NEXT: [[V0_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ]
1566
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
1667
; CHECK: [[FOR_BODY]]:
17-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
18-
; CHECK-NEXT: [[V1_011:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V1_1:%.*]], %[[FOR_BODY]] ]
19-
; CHECK-NEXT: [[V0_010:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V0_1:%.*]], %[[FOR_BODY]] ]
20-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
21-
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
68+
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER23]] ]
69+
; CHECK-NEXT: [[V1_012:%.*]] = phi double [ [[V1_2:%.*]], %[[FOR_BODY]] ], [ [[V1_012_PH]], %[[FOR_BODY_PREHEADER23]] ]
70+
; CHECK-NEXT: [[V0_011:%.*]] = phi double [ [[V0_2:%.*]], %[[FOR_BODY]] ], [ [[V0_011_PH]], %[[FOR_BODY_PREHEADER23]] ]
71+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]]
72+
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
2273
; CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
2374
; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[Y]], [[CONV]]
2475
; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]]
2576
; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00
26-
; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[SUB]], [[V0_010]]
2777
; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]]
28-
; CHECK-NEXT: [[ADD4:%.*]] = fadd fast double [[MUL3]], [[V1_011]]
29-
; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], double [[ADD]], double [[V0_010]]
30-
; CHECK-NEXT: [[V1_1]] = select i1 [[CMP1]], double [[ADD4]], double [[V1_011]]
31-
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
78+
; CHECK-NEXT: [[ADD8:%.*]] = tail call fast double @llvm.maxnum.f64(double [[SUB]], double -0.000000e+00)
79+
; CHECK-NEXT: [[V0_2]] = fadd fast double [[ADD8]], [[V0_011]]
80+
; CHECK-NEXT: [[ADD4:%.*]] = select i1 [[CMP1]], double [[MUL3]], double -0.000000e+00
81+
; CHECK-NEXT: [[V1_2]] = fadd fast double [[ADD4]], [[V1_012]]
82+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
3283
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
33-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
84+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
3485
; CHECK: [[FOR_END_LOOPEXIT]]:
86+
; CHECK-NEXT: [[V0_1:%.*]] = phi double [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ [[V0_2]], %[[FOR_BODY]] ]
87+
; CHECK-NEXT: [[V1_1:%.*]] = phi double [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[V1_2]], %[[FOR_BODY]] ]
3588
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]]
3689
; CHECK-NEXT: br label %[[FOR_END]]
3790
; CHECK: [[FOR_END]]:
@@ -292,3 +345,9 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
292345
declare void @resample(i32 noundef, ptr noundef)
293346
declare double @llvm.exp2.f64(double)
294347
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
348+
;.
349+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
350+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
351+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
352+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
353+
;.

0 commit comments

Comments
 (0)