-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[CodeGenPrepare] Convert ctpop(X) ==/!= 1
into ctpop(X) u</u> 2/1
#111284
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Yingwei Zheng (dtcxzyw) ChangesSome targets have better codegen for This patch converts Full diff: https://github.com/llvm/llvm-project/pull/111284.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 631cc26d6022fe..7953c0d09f2a86 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2111,6 +2111,32 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
return false;
}
+/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
+/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
+/// result cannot be zero.
+static bool adjustIsPower2Test(CmpInst *Cmp) {
+ ICmpInst::Predicate Pred;
+ if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
+ return false;
+ if (!ICmpInst::isEquality(Pred))
+ return false;
+ auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
+ if (auto Range = II->getRange()) {
+ Type *Ty = II->getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ if (Range->contains(APInt::getZero(BitWidth)))
+ return false;
+
+ if (Pred == ICmpInst::ICMP_EQ) {
+ Cmp->setPredicate(ICmpInst::ICMP_ULT);
+ Cmp->setOperand(1, ConstantInt::get(Ty, 2));
+ } else
+ Cmp->setPredicate(ICmpInst::ICMP_UGT);
+ return true;
+ }
+ return false;
+}
+
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
@@ -2130,6 +2156,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
return true;
+ if (adjustIsPower2Test(Cmp))
+ return true;
+
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index f5ce73a366125b..0030e9ce80abb4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -15,7 +15,7 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
; CHECK-NONEON-LABEL: cnt32_advsimd:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr w9, w0, #1
-; CHECK-NONEON-NEXT: mov w8, #16843009
+; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
; CHECK-NONEON-NEXT: sub w9, w0, w9
; CHECK-NONEON-NEXT: lsr w10, w9, #2
@@ -50,7 +50,7 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
; CHECK-NONEON-LABEL: cnt32_advsimd_2:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr w9, w0, #1
-; CHECK-NONEON-NEXT: mov w8, #16843009
+; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
; CHECK-NONEON-NEXT: sub w9, w0, w9
; CHECK-NONEON-NEXT: lsr w10, w9, #2
@@ -86,7 +86,7 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
; CHECK-NONEON-LABEL: cnt64_advsimd:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr x9, x0, #1
-; CHECK-NONEON-NEXT: mov x8, #72340172838076673
+; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NONEON-NEXT: sub x9, x0, x9
; CHECK-NONEON-NEXT: lsr x10, x9, #2
@@ -114,7 +114,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
; CHECK-LABEL: cnt32:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr w9, w0, #1
-; CHECK-NEXT: mov w8, #16843009
+; CHECK-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NEXT: and w9, w9, #0x55555555
; CHECK-NEXT: sub w9, w0, w9
; CHECK-NEXT: lsr w10, w9, #2
@@ -130,7 +130,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
; CHECK-NONEON-LABEL: cnt32:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr w9, w0, #1
-; CHECK-NONEON-NEXT: mov w8, #16843009
+; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
; CHECK-NONEON-NEXT: sub w9, w0, w9
; CHECK-NONEON-NEXT: lsr w10, w9, #2
@@ -155,7 +155,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
; CHECK-LABEL: cnt64:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x9, x0, #1
-; CHECK-NEXT: mov x8, #72340172838076673
+; CHECK-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; CHECK-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NEXT: sub x9, x0, x9
; CHECK-NEXT: lsr x10, x9, #2
@@ -171,7 +171,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
; CHECK-NONEON-LABEL: cnt64:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr x9, x0, #1
-; CHECK-NONEON-NEXT: mov x8, #72340172838076673
+; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NONEON-NEXT: sub x9, x0, x9
; CHECK-NONEON-NEXT: lsr x10, x9, #2
@@ -278,5 +278,59 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
ret i1 %cmp
}
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_eq_one_nonzero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, #1
+; CHECK-NEXT: tst w0, w8
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+;
+; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-NONEON: // %bb.0: // %entry
+; CHECK-NONEON-NEXT: sub w8, w0, #1
+; CHECK-NONEON-NEXT: tst w0, w8
+; CHECK-NONEON-NEXT: cset w0, eq
+; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-CSSC: // %bb.0: // %entry
+; CHECK-CSSC-NEXT: sub w8, w0, #1
+; CHECK-CSSC-NEXT: tst w0, w8
+; CHECK-CSSC-NEXT: cset w0, eq
+; CHECK-CSSC-NEXT: ret
+entry:
+ %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_ne_one_nonzero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, #1
+; CHECK-NEXT: tst w0, w8
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+;
+; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-NONEON: // %bb.0: // %entry
+; CHECK-NONEON-NEXT: sub w8, w0, #1
+; CHECK-NONEON-NEXT: tst w0, w8
+; CHECK-NONEON-NEXT: cset w0, ne
+; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-CSSC: // %bb.0: // %entry
+; CHECK-CSSC-NEXT: sub w8, w0, #1
+; CHECK-CSSC-NEXT: tst w0, w8
+; CHECK-CSSC-NEXT: cset w0, ne
+; CHECK-CSSC-NEXT: ret
+entry:
+ %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ne i32 %popcnt, 1
+ ret i1 %cmp
+}
+
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index e24b1b41645cdf..4c52047b928f4d 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1441,3 +1441,42 @@ define i32 @srai_slli2(i16 signext %0) {
%3 = sext i16 %sext to i32
ret i32 %3
}
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; RV32I-LABEL: ctpop32_eq_one_nonzero:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi a1, a0, -1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: ctpop32_eq_one_nonzero:
+; RV32ZBB: # %bb.0: # %entry
+; RV32ZBB-NEXT: cpop a0, a0
+; RV32ZBB-NEXT: sltiu a0, a0, 2
+; RV32ZBB-NEXT: ret
+entry:
+ %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; RV32I-LABEL: ctpop32_ne_one_nonzero:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi a1, a0, -1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: ctpop32_ne_one_nonzero:
+; RV32ZBB: # %bb.0: # %entry
+; RV32ZBB-NEXT: cpop a0, a0
+; RV32ZBB-NEXT: sltiu a0, a0, 2
+; RV32ZBB-NEXT: xori a0, a0, 1
+; RV32ZBB-NEXT: ret
+entry:
+ %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ne i32 %popcnt, 1
+ ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 43a499806ab5ae..1e7814d588e4c0 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1618,3 +1618,84 @@ entry:
%5 = add nsw i32 %4, %0
ret i32 %5
}
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; RV64I-LABEL: ctpop32_eq_one_nonzero:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi a1, a0, -1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: ctpop32_eq_one_nonzero:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: cpopw a0, a0
+; RV64ZBB-NEXT: sltiu a0, a0, 2
+; RV64ZBB-NEXT: ret
+entry:
+ %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; RV64I-LABEL: ctpop32_ne_one_nonzero:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi a1, a0, -1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: ctpop32_ne_one_nonzero:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: cpopw a0, a0
+; RV64ZBB-NEXT: sltiu a0, a0, 2
+; RV64ZBB-NEXT: xori a0, a0, 1
+; RV64ZBB-NEXT: ret
+entry:
+ %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ne i32 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop64_eq_one_nonzero(i64 %x) {
+; RV64I-LABEL: ctpop64_eq_one_nonzero:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi a1, a0, -1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: ctpop64_eq_one_nonzero:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: cpop a0, a0
+; RV64ZBB-NEXT: sltiu a0, a0, 2
+; RV64ZBB-NEXT: ret
+entry:
+ %popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x)
+ %cmp = icmp eq i64 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop32_eq_one_maybezero(i32 %x) {
+; RV64I-LABEL: ctpop32_eq_one_maybezero:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addiw a1, a0, -1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: ctpop32_eq_one_maybezero:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: cpopw a0, a0
+; RV64ZBB-NEXT: addi a0, a0, -1
+; RV64ZBB-NEXT: seqz a0, a0
+; RV64ZBB-NEXT: ret
+entry:
+ %popcnt = call range(i32 0, 16) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %popcnt, 1
+ ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index 8723432de8b6b0..96e33e1dafdc4a 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -220,3 +220,44 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
%r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i1> %r
}
+
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; CHECK-NOBMI-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-NOBMI: # %bb.0: # %entry
+; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
+; CHECK-NOBMI-NEXT: testl %eax, %edi
+; CHECK-NOBMI-NEXT: sete %al
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-BMI2: # %bb.0: # %entry
+; CHECK-BMI2-NEXT: blsrl %edi, %eax
+; CHECK-BMI2-NEXT: sete %al
+; CHECK-BMI2-NEXT: retq
+entry:
+ %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; CHECK-NOBMI-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-NOBMI: # %bb.0: # %entry
+; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
+; CHECK-NOBMI-NEXT: testl %eax, %edi
+; CHECK-NOBMI-NEXT: setne %al
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-BMI2: # %bb.0: # %entry
+; CHECK-BMI2-NEXT: blsrl %edi, %eax
+; CHECK-BMI2-NEXT: setne %al
+; CHECK-BMI2-NEXT: retq
+entry:
+ %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ne i32 %popcnt, 1
+ ret i1 %cmp
+}
|
@llvm/pr-subscribers-backend-x86 Author: Yingwei Zheng (dtcxzyw) ChangesSome targets have better codegen for This patch converts Full diff: https://github.com/llvm/llvm-project/pull/111284.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 631cc26d6022fe..7953c0d09f2a86 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2111,6 +2111,32 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
return false;
}
+/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
+/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
+/// result cannot be zero.
+static bool adjustIsPower2Test(CmpInst *Cmp) {
+ ICmpInst::Predicate Pred;
+ if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
+ return false;
+ if (!ICmpInst::isEquality(Pred))
+ return false;
+ auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
+ if (auto Range = II->getRange()) {
+ Type *Ty = II->getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ if (Range->contains(APInt::getZero(BitWidth)))
+ return false;
+
+ if (Pred == ICmpInst::ICMP_EQ) {
+ Cmp->setPredicate(ICmpInst::ICMP_ULT);
+ Cmp->setOperand(1, ConstantInt::get(Ty, 2));
+ } else
+ Cmp->setPredicate(ICmpInst::ICMP_UGT);
+ return true;
+ }
+ return false;
+}
+
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
@@ -2130,6 +2156,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
return true;
+ if (adjustIsPower2Test(Cmp))
+ return true;
+
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index f5ce73a366125b..0030e9ce80abb4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -15,7 +15,7 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
; CHECK-NONEON-LABEL: cnt32_advsimd:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr w9, w0, #1
-; CHECK-NONEON-NEXT: mov w8, #16843009
+; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
; CHECK-NONEON-NEXT: sub w9, w0, w9
; CHECK-NONEON-NEXT: lsr w10, w9, #2
@@ -50,7 +50,7 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
; CHECK-NONEON-LABEL: cnt32_advsimd_2:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr w9, w0, #1
-; CHECK-NONEON-NEXT: mov w8, #16843009
+; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
; CHECK-NONEON-NEXT: sub w9, w0, w9
; CHECK-NONEON-NEXT: lsr w10, w9, #2
@@ -86,7 +86,7 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
; CHECK-NONEON-LABEL: cnt64_advsimd:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr x9, x0, #1
-; CHECK-NONEON-NEXT: mov x8, #72340172838076673
+; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NONEON-NEXT: sub x9, x0, x9
; CHECK-NONEON-NEXT: lsr x10, x9, #2
@@ -114,7 +114,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
; CHECK-LABEL: cnt32:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr w9, w0, #1
-; CHECK-NEXT: mov w8, #16843009
+; CHECK-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NEXT: and w9, w9, #0x55555555
; CHECK-NEXT: sub w9, w0, w9
; CHECK-NEXT: lsr w10, w9, #2
@@ -130,7 +130,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
; CHECK-NONEON-LABEL: cnt32:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr w9, w0, #1
-; CHECK-NONEON-NEXT: mov w8, #16843009
+; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
; CHECK-NONEON-NEXT: sub w9, w0, w9
; CHECK-NONEON-NEXT: lsr w10, w9, #2
@@ -155,7 +155,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
; CHECK-LABEL: cnt64:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x9, x0, #1
-; CHECK-NEXT: mov x8, #72340172838076673
+; CHECK-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; CHECK-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NEXT: sub x9, x0, x9
; CHECK-NEXT: lsr x10, x9, #2
@@ -171,7 +171,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
; CHECK-NONEON-LABEL: cnt64:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: lsr x9, x0, #1
-; CHECK-NONEON-NEXT: mov x8, #72340172838076673
+; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
; CHECK-NONEON-NEXT: sub x9, x0, x9
; CHECK-NONEON-NEXT: lsr x10, x9, #2
@@ -278,5 +278,59 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
ret i1 %cmp
}
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_eq_one_nonzero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, #1
+; CHECK-NEXT: tst w0, w8
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+;
+; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-NONEON: // %bb.0: // %entry
+; CHECK-NONEON-NEXT: sub w8, w0, #1
+; CHECK-NONEON-NEXT: tst w0, w8
+; CHECK-NONEON-NEXT: cset w0, eq
+; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-CSSC: // %bb.0: // %entry
+; CHECK-CSSC-NEXT: sub w8, w0, #1
+; CHECK-CSSC-NEXT: tst w0, w8
+; CHECK-CSSC-NEXT: cset w0, eq
+; CHECK-CSSC-NEXT: ret
+entry:
+ %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_ne_one_nonzero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, #1
+; CHECK-NEXT: tst w0, w8
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+;
+; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-NONEON: // %bb.0: // %entry
+; CHECK-NONEON-NEXT: sub w8, w0, #1
+; CHECK-NONEON-NEXT: tst w0, w8
+; CHECK-NONEON-NEXT: cset w0, ne
+; CHECK-NONEON-NEXT: ret
+;
+; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-CSSC: // %bb.0: // %entry
+; CHECK-CSSC-NEXT: sub w8, w0, #1
+; CHECK-CSSC-NEXT: tst w0, w8
+; CHECK-CSSC-NEXT: cset w0, ne
+; CHECK-CSSC-NEXT: ret
+entry:
+ %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ne i32 %popcnt, 1
+ ret i1 %cmp
+}
+
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index e24b1b41645cdf..4c52047b928f4d 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1441,3 +1441,42 @@ define i32 @srai_slli2(i16 signext %0) {
%3 = sext i16 %sext to i32
ret i32 %3
}
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; RV32I-LABEL: ctpop32_eq_one_nonzero:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi a1, a0, -1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: ctpop32_eq_one_nonzero:
+; RV32ZBB: # %bb.0: # %entry
+; RV32ZBB-NEXT: cpop a0, a0
+; RV32ZBB-NEXT: sltiu a0, a0, 2
+; RV32ZBB-NEXT: ret
+entry:
+ %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; RV32I-LABEL: ctpop32_ne_one_nonzero:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi a1, a0, -1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: ctpop32_ne_one_nonzero:
+; RV32ZBB: # %bb.0: # %entry
+; RV32ZBB-NEXT: cpop a0, a0
+; RV32ZBB-NEXT: sltiu a0, a0, 2
+; RV32ZBB-NEXT: xori a0, a0, 1
+; RV32ZBB-NEXT: ret
+entry:
+ %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ne i32 %popcnt, 1
+ ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 43a499806ab5ae..1e7814d588e4c0 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1618,3 +1618,84 @@ entry:
%5 = add nsw i32 %4, %0
ret i32 %5
}
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; RV64I-LABEL: ctpop32_eq_one_nonzero:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi a1, a0, -1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: ctpop32_eq_one_nonzero:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: cpopw a0, a0
+; RV64ZBB-NEXT: sltiu a0, a0, 2
+; RV64ZBB-NEXT: ret
+entry:
+ %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; RV64I-LABEL: ctpop32_ne_one_nonzero:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi a1, a0, -1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: ctpop32_ne_one_nonzero:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: cpopw a0, a0
+; RV64ZBB-NEXT: sltiu a0, a0, 2
+; RV64ZBB-NEXT: xori a0, a0, 1
+; RV64ZBB-NEXT: ret
+entry:
+ %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ne i32 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop64_eq_one_nonzero(i64 %x) {
+; RV64I-LABEL: ctpop64_eq_one_nonzero:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi a1, a0, -1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: ctpop64_eq_one_nonzero:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: cpop a0, a0
+; RV64ZBB-NEXT: sltiu a0, a0, 2
+; RV64ZBB-NEXT: ret
+entry:
+ %popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x)
+ %cmp = icmp eq i64 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop32_eq_one_maybezero(i32 %x) {
+; RV64I-LABEL: ctpop32_eq_one_maybezero:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addiw a1, a0, -1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: ctpop32_eq_one_maybezero:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: cpopw a0, a0
+; RV64ZBB-NEXT: addi a0, a0, -1
+; RV64ZBB-NEXT: seqz a0, a0
+; RV64ZBB-NEXT: ret
+entry:
+ %popcnt = call range(i32 0, 16) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %popcnt, 1
+ ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index 8723432de8b6b0..96e33e1dafdc4a 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -220,3 +220,44 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
%r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i1> %r
}
+
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; CHECK-NOBMI-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-NOBMI: # %bb.0: # %entry
+; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
+; CHECK-NOBMI-NEXT: testl %eax, %edi
+; CHECK-NOBMI-NEXT: sete %al
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-BMI2: # %bb.0: # %entry
+; CHECK-BMI2-NEXT: blsrl %edi, %eax
+; CHECK-BMI2-NEXT: sete %al
+; CHECK-BMI2-NEXT: retq
+entry:
+ %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %popcnt, 1
+ ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; CHECK-NOBMI-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-NOBMI: # %bb.0: # %entry
+; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
+; CHECK-NOBMI-NEXT: testl %eax, %edi
+; CHECK-NOBMI-NEXT: setne %al
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-BMI2: # %bb.0: # %entry
+; CHECK-BMI2-NEXT: blsrl %edi, %eax
+; CHECK-BMI2-NEXT: setne %al
+; CHECK-BMI2-NEXT: retq
+entry:
+ %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ne i32 %popcnt, 1
+ ret i1 %cmp
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
return false; | ||
if (!ICmpInst::isEquality(Pred)) | ||
return false; | ||
auto *II = cast<IntrinsicInst>(Cmp->getOperand(0)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check if this is profitable based on the target?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you have an example for a target where this is not profitable or at least neutral?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not really, it should be neutral for amdgpu. It's just weird to do something unconditional in CGP without any kind of target information. I guess the only reason to do it here is to use the better IR version of isKnownNonZero (which we could skip if there's no plus to doing this)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When you say "here" you mean as opposed to InstCombine or DAGCombine? I think the reason to do this in IR is that the range attribute gets lost otherwise. Also, there's another PR open that moves the is pow2 idiom recognition to CGP because it can end up split across blocks otherwise, so we'd end up with this in CGP anyway (#102731).
As for doing it in InstCombine instead, I think it's problematic there because it goes against the usual canonicalization direction and may lead to infinite loops.
llvm/lib/CodeGen/CodeGenPrepare.cpp
Outdated
// Check if it is profitable for the target | ||
ICmpInst::Predicate NewPred = | ||
Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGT; | ||
if (TLI.isCtpopFast(TLI.getValueType(DL, II->getType())) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't depend on ctpop speed? This is only whether the predicate and/or constant is cheaper?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If ctpop is slow, we always convert it into x & (x - 1)
. See also simplifySetCCWithCTPOP
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer it if this cost model check were dropped again.
1533329
to
fb33033
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/168/builds/4308 Here is the relevant piece of the build log for the reference
|
…/u> 2/1`" (#111932) Reverts #111284 to fix clang stage2 builds. Investigating... Failed buildbots: https://lab.llvm.org/buildbot/#/builders/76/builds/3576 https://lab.llvm.org/buildbot/#/builders/168/builds/4308 https://lab.llvm.org/buildbot/#/builders/127/builds/1087
…llvm#111284) Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix llvm#95255.
…/u> 2/1` (#111284)` (#111998) Relands #111284. Test failure with stage2 build has been fixed by #111946. Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After #100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix #95255.
…llvm#111284) Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix llvm#95255.
…/u> 2/1`" (llvm#111932) Reverts llvm#111284 to fix clang stage2 builds. Investigating... Failed buildbots: https://lab.llvm.org/buildbot/#/builders/76/builds/3576 https://lab.llvm.org/buildbot/#/builders/168/builds/4308 https://lab.llvm.org/buildbot/#/builders/127/builds/1087
…/u> 2/1` (llvm#111284)` (llvm#111998) Relands llvm#111284. Test failure with stage2 build has been fixed by llvm#111946. Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix llvm#95255.
Some targets have better codegen for
ctpop(X) u< 2
thanctpop(X) == 1
. After #100899, we set the range of ctpop's return value to indicate the argument/result is non-zero.This patch converts
ctpop(X) ==/!= 1
intoctpop(X) u</u> 2/1
in CGP to fix #95255.