-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[ValueTracking] Compute known bits from recursive select/phi #113707
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-analysis Author: Yingwei Zheng (dtcxzyw) ChangesThis patch is inspired by #113686. I found that it removes a lot of unnecessary "and X, 1" in some applications that represent boolean values with int. Full diff: https://github.com/llvm/llvm-project/pull/113707.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index e9ed8b3c862b55..e443488a56b3cb 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1566,6 +1566,13 @@ static void computeKnownBitsFromOperator(const Operator *I,
// Skip direct self references.
if (IncValue == P) continue;
+ // If the Use is a select of this phi, use the knownbit of the other
+ // operand to break the recursion.
+ Value *V;
+ if (match(IncValue, m_Select(m_Value(), m_Specific(P), m_Value(V))) ||
+ match(IncValue, m_Select(m_Value(), m_Value(V), m_Specific(P))))
+ IncValue = V;
+
// Change the context instruction to the "edge" that flows into the
// phi. This is important because that is where the value is actually
// "evaluated" even though it is used later somewhere else. (see also
diff --git a/llvm/test/Transforms/InstCombine/known-phi-recurse.ll b/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
index c2007d16ae93be..fd3728324b8ea8 100644
--- a/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
+++ b/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
@@ -142,3 +142,118 @@ end:
ret i32 %res
}
+define i32 @knownbits_phi_select_test1(ptr %p1, ptr %p2, i8 %x) {
+; CHECK-LABEL: @knownbits_phi_select_test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDVAR1:%.*]] = phi i8 [ [[LOAD2:%.*]], [[BB2:%.*]] ], [ [[X:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDVAR3:%.*]] = phi ptr [ [[INDVAR3_NEXT:%.*]], [[BB2]] ], [ [[P1:%.*]], [[ENTRY]] ]
+; CHECK-NEXT: [[INDVAR4:%.*]] = phi i32 [ [[INDVAR4_NEXT:%.*]], [[BB2]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[INDVAR5:%.*]] = phi i32 [ [[INDVAR5_NEXT:%.*]], [[BB2]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: switch i8 [[INDVAR1]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT: i8 0, label [[EXIT:%.*]]
+; CHECK-NEXT: i8 59, label [[BB1:%.*]]
+; CHECK-NEXT: i8 35, label [[BB1]]
+; CHECK-NEXT: ]
+; CHECK: default:
+; CHECK-NEXT: [[EXT:%.*]] = sext i8 [[INDVAR1]] to i64
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, ptr [[P2:%.*]], i64 [[EXT]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load i16, ptr [[GEP1]], align 2
+; CHECK-NEXT: [[MASK:%.*]] = and i16 [[LOAD1]], 8192
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i16 [[MASK]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[BB2]], label [[BB1]]
+; CHECK: bb1:
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[INDVAR4]], 0
+; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[INDVAR5]], 0
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP3]]
+; CHECK-NEXT: br i1 [[OR_COND]], label [[BB2]], label [[EXIT]]
+; CHECK: bb2:
+; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i8 [[INDVAR1]], 39
+; CHECK-NEXT: [[EXT2:%.*]] = zext i1 [[CMP4]] to i32
+; CHECK-NEXT: [[INDVAR4_NEXT]] = xor i32 [[INDVAR4]], [[EXT2]]
+; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i8 [[INDVAR1]], 34
+; CHECK-NEXT: [[EXT3:%.*]] = zext i1 [[CMP6]] to i32
+; CHECK-NEXT: [[INDVAR5_NEXT]] = xor i32 [[INDVAR5]], [[EXT3]]
+; CHECK-NEXT: [[INDVAR3_NEXT]] = getelementptr inbounds i8, ptr [[INDVAR3]], i64 1
+; CHECK-NEXT: [[LOAD2]] = load i8, ptr [[INDVAR3_NEXT]], align 1
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 [[INDVAR5]]
+;
+entry:
+ br label %loop
+
+loop:
+ %indvar1 = phi i8 [ %load2, %bb2 ], [ %x, %entry ]
+ %indvar2 = phi i64 [ %indvar2.next, %bb2 ], [ 0, %entry ]
+ %indvar3 = phi ptr [ %indvar3.next, %bb2 ], [ %p1, %entry ]
+ %indvar4 = phi i32 [ %indvar4.next, %bb2 ], [ 0, %entry ]
+ %indvar5 = phi i32 [ %indvar5.next, %bb2 ], [ 0, %entry ]
+ switch i8 %indvar1, label %default [
+ i8 0, label %exit
+ i8 59, label %bb1
+ i8 35, label %bb1
+ ]
+
+default:
+ %ext = sext i8 %indvar1 to i64
+ %gep1 = getelementptr inbounds i16, ptr %p2, i64 %ext
+ %load1 = load i16, ptr %gep1, align 2
+ %mask = and i16 %load1, 8192
+ %cmp1 = icmp eq i16 %mask, 0
+ br i1 %cmp1, label %bb2, label %bb1
+
+bb1:
+ %cmp2 = icmp ne i32 %indvar4, 0
+ %cmp3 = icmp ne i32 %indvar5, 0
+ %or.cond = select i1 %cmp2, i1 true, i1 %cmp3
+ br i1 %or.cond, label %bb2, label %exit
+
+bb2:
+ %cmp4 = icmp eq i8 %indvar1, 39
+ %cmp5 = icmp eq i32 %indvar4, 0
+ %ext2 = zext i1 %cmp5 to i32
+ %indvar4.next = select i1 %cmp4, i32 %ext2, i32 %indvar4
+ %cmp6 = icmp eq i8 %indvar1, 34
+ %cmp7 = icmp eq i32 %indvar5, 0
+ %ext3 = zext i1 %cmp7 to i32
+ %indvar5.next = select i1 %cmp6, i32 %ext3, i32 %indvar5
+ %indvar3.next = getelementptr inbounds i8, ptr %indvar3, i64 1
+ %indvar2.next = add i64 %indvar2, 1
+ %load2 = load i8, ptr %indvar3.next, align 1
+ br label %loop
+
+exit:
+ ret i32 %indvar5
+}
+
+define i8 @knownbits_phi_select_test2() {
+; CHECK-LABEL: @knownbits_phi_select_test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[CONTAIN:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[COND0:%.*]] = call i1 @cond()
+; CHECK-NEXT: [[CONTAIN]] = select i1 [[COND0]], i8 1, i8 [[INDVAR]]
+; CHECK-NEXT: [[COND1:%.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 [[COND1]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret i8 [[CONTAIN]]
+;
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i8 [ 0, %entry ], [ %contain, %loop ]
+ %cond0 = call i1 @cond()
+ %contain = select i1 %cond0, i8 1, i8 %indvar
+ %cond1 = call i1 @cond()
+ br i1 %cond1, label %exit, label %loop
+
+exit:
+ %bool = and i8 %contain, 1
+ ret i8 %bool
+}
+
+declare i1 @cond()
|
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch is inspired by #113686. I found that it removes a lot of unnecessary "and X, 1" in some applications that represent boolean values with int. Full diff: https://github.com/llvm/llvm-project/pull/113707.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index e9ed8b3c862b55..e443488a56b3cb 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1566,6 +1566,13 @@ static void computeKnownBitsFromOperator(const Operator *I,
// Skip direct self references.
if (IncValue == P) continue;
+ // If the Use is a select of this phi, use the knownbit of the other
+ // operand to break the recursion.
+ Value *V;
+ if (match(IncValue, m_Select(m_Value(), m_Specific(P), m_Value(V))) ||
+ match(IncValue, m_Select(m_Value(), m_Value(V), m_Specific(P))))
+ IncValue = V;
+
// Change the context instruction to the "edge" that flows into the
// phi. This is important because that is where the value is actually
// "evaluated" even though it is used later somewhere else. (see also
diff --git a/llvm/test/Transforms/InstCombine/known-phi-recurse.ll b/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
index c2007d16ae93be..fd3728324b8ea8 100644
--- a/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
+++ b/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
@@ -142,3 +142,118 @@ end:
ret i32 %res
}
+define i32 @knownbits_phi_select_test1(ptr %p1, ptr %p2, i8 %x) {
+; CHECK-LABEL: @knownbits_phi_select_test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDVAR1:%.*]] = phi i8 [ [[LOAD2:%.*]], [[BB2:%.*]] ], [ [[X:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDVAR3:%.*]] = phi ptr [ [[INDVAR3_NEXT:%.*]], [[BB2]] ], [ [[P1:%.*]], [[ENTRY]] ]
+; CHECK-NEXT: [[INDVAR4:%.*]] = phi i32 [ [[INDVAR4_NEXT:%.*]], [[BB2]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[INDVAR5:%.*]] = phi i32 [ [[INDVAR5_NEXT:%.*]], [[BB2]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: switch i8 [[INDVAR1]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT: i8 0, label [[EXIT:%.*]]
+; CHECK-NEXT: i8 59, label [[BB1:%.*]]
+; CHECK-NEXT: i8 35, label [[BB1]]
+; CHECK-NEXT: ]
+; CHECK: default:
+; CHECK-NEXT: [[EXT:%.*]] = sext i8 [[INDVAR1]] to i64
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, ptr [[P2:%.*]], i64 [[EXT]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load i16, ptr [[GEP1]], align 2
+; CHECK-NEXT: [[MASK:%.*]] = and i16 [[LOAD1]], 8192
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i16 [[MASK]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[BB2]], label [[BB1]]
+; CHECK: bb1:
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[INDVAR4]], 0
+; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[INDVAR5]], 0
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP3]]
+; CHECK-NEXT: br i1 [[OR_COND]], label [[BB2]], label [[EXIT]]
+; CHECK: bb2:
+; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i8 [[INDVAR1]], 39
+; CHECK-NEXT: [[EXT2:%.*]] = zext i1 [[CMP4]] to i32
+; CHECK-NEXT: [[INDVAR4_NEXT]] = xor i32 [[INDVAR4]], [[EXT2]]
+; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i8 [[INDVAR1]], 34
+; CHECK-NEXT: [[EXT3:%.*]] = zext i1 [[CMP6]] to i32
+; CHECK-NEXT: [[INDVAR5_NEXT]] = xor i32 [[INDVAR5]], [[EXT3]]
+; CHECK-NEXT: [[INDVAR3_NEXT]] = getelementptr inbounds i8, ptr [[INDVAR3]], i64 1
+; CHECK-NEXT: [[LOAD2]] = load i8, ptr [[INDVAR3_NEXT]], align 1
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 [[INDVAR5]]
+;
+entry:
+ br label %loop
+
+loop:
+ %indvar1 = phi i8 [ %load2, %bb2 ], [ %x, %entry ]
+ %indvar2 = phi i64 [ %indvar2.next, %bb2 ], [ 0, %entry ]
+ %indvar3 = phi ptr [ %indvar3.next, %bb2 ], [ %p1, %entry ]
+ %indvar4 = phi i32 [ %indvar4.next, %bb2 ], [ 0, %entry ]
+ %indvar5 = phi i32 [ %indvar5.next, %bb2 ], [ 0, %entry ]
+ switch i8 %indvar1, label %default [
+ i8 0, label %exit
+ i8 59, label %bb1
+ i8 35, label %bb1
+ ]
+
+default:
+ %ext = sext i8 %indvar1 to i64
+ %gep1 = getelementptr inbounds i16, ptr %p2, i64 %ext
+ %load1 = load i16, ptr %gep1, align 2
+ %mask = and i16 %load1, 8192
+ %cmp1 = icmp eq i16 %mask, 0
+ br i1 %cmp1, label %bb2, label %bb1
+
+bb1:
+ %cmp2 = icmp ne i32 %indvar4, 0
+ %cmp3 = icmp ne i32 %indvar5, 0
+ %or.cond = select i1 %cmp2, i1 true, i1 %cmp3
+ br i1 %or.cond, label %bb2, label %exit
+
+bb2:
+ %cmp4 = icmp eq i8 %indvar1, 39
+ %cmp5 = icmp eq i32 %indvar4, 0
+ %ext2 = zext i1 %cmp5 to i32
+ %indvar4.next = select i1 %cmp4, i32 %ext2, i32 %indvar4
+ %cmp6 = icmp eq i8 %indvar1, 34
+ %cmp7 = icmp eq i32 %indvar5, 0
+ %ext3 = zext i1 %cmp7 to i32
+ %indvar5.next = select i1 %cmp6, i32 %ext3, i32 %indvar5
+ %indvar3.next = getelementptr inbounds i8, ptr %indvar3, i64 1
+ %indvar2.next = add i64 %indvar2, 1
+ %load2 = load i8, ptr %indvar3.next, align 1
+ br label %loop
+
+exit:
+ ret i32 %indvar5
+}
+
+define i8 @knownbits_phi_select_test2() {
+; CHECK-LABEL: @knownbits_phi_select_test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[CONTAIN:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[COND0:%.*]] = call i1 @cond()
+; CHECK-NEXT: [[CONTAIN]] = select i1 [[COND0]], i8 1, i8 [[INDVAR]]
+; CHECK-NEXT: [[COND1:%.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 [[COND1]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret i8 [[CONTAIN]]
+;
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i8 [ 0, %entry ], [ %contain, %loop ]
+ %cond0 = call i1 @cond()
+ %contain = select i1 %cond0, i8 1, i8 %indvar
+ %cond1 = call i1 @cond()
+ br i1 %cond1, label %exit, label %loop
+
+exit:
+ %bool = and i8 %contain, 1
+ ret i8 %bool
+}
+
+declare i1 @cond()
|
This looks fine although I have two notes.
|
Looks like it breaks the stage2 build. |
It doesn't improve anything (with +0.01% compile-time overhead). Should I drop it? |
If it doesn't improve anything Edit: Typically I prefer completeness, although I can't really argue on behalf of the change if there are |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM like this or if you want to revert to prior version.
Are you going to update to handle #114008. |
No, feel free to do this :) |
Finish porting llvm#114008 to `KnownBits` (Follow up to llvm#113707).
Finish porting llvm#114008 to `KnownBits` (Follow up to llvm#113707).
…3707) This patch is inspired by llvm#113686. I found that it removes a lot of unnecessary "and X, 1" in some applications that represent boolean values with int.
Finish porting llvm#114008 to `KnownBits` (Follow up to llvm#113707).
if (SI->getTrueValue() == P || SI->getFalseValue() == P) { | ||
IncValue = SI->getTrueValue() == P ? SI->getFalseValue() | ||
: SI->getTrueValue(); | ||
IncDepth = Depth + 1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Forwarding the question from #114689 (review) because the issue was introduced here: Why do we use the full depth for the select case?
As far as I can tell, a) there may be multiple incoming values in this form, resulting in a fan-out problem and b) we are not actually guaranteed that this breaks recursion (because the other select operand may also be based on the phi indirectly) so the fan-out may be recursive as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Answered in #114689 (comment), lets keep the conversion in one place.
…3707) This patch is inspired by llvm#113686. I found that it removes a lot of unnecessary "and X, 1" in some applications that represent boolean values with int.
As discussed on llvm#114689 (review) and following, there is no principled reason why the phi of select case should have a different recursion limit than the general case. There may still be fan-out, and there may still be indirect recursion. Revert that part of llvm#113707.
As discussed on #114689 (review) and following, there is no principled reason why the phi of select case should have a different recursion limit than the general case. There may still be fan-out, and there may still be indirect recursion. Revert that part of #113707.
As discussed on llvm#114689 (review) and following, there is no principled reason why the phi of select case should have a different recursion limit than the general case. There may still be fan-out, and there may still be indirect recursion. Revert that part of llvm#113707.
Finish porting llvm#114008 to `KnownBits` (Follow up to llvm#113707).
This patch is inspired by #113686. I found that it removes a lot of unnecessary "and X, 1" in some applications that represent boolean values with int.