[CodeGenPrepare] Convert `ctpop(X) ==/!= 1` into `ctpop(X) u 2/1` #111284

dtcxzyw · 2024-10-06T08:55:57Z

Some targets have better codegen for ctpop(X) u< 2 than ctpop(X) == 1. After #100899, we set the range of ctpop's return value to indicate the argument/result is non-zero.

This patch converts ctpop(X) ==/!= 1 into ctpop(X) u 2/1 in CGP to fix #95255.

llvmbot · 2024-10-06T08:56:27Z

@llvm/pr-subscribers-backend-aarch64

Author: Yingwei Zheng (dtcxzyw)

Changes

Some targets have better codegen for ctpop(X) u< 2 than ctpop(X) == 1. After #100899, we set the range of ctpop's return value to indicate the argument/result is non-zero.

This patch converts ctpop(X) ==/!= 1 into ctpop(X) u 2/1 in CGP to fix #95255.

Full diff: https://github.com/llvm/llvm-project/pull/111284.diff

5 Files Affected:

(modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+29)
(modified) llvm/test/CodeGen/AArch64/arm64-popcnt.ll (+61-7)
(modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+39)
(modified) llvm/test/CodeGen/RISCV/rv64zbb.ll (+81)
(modified) llvm/test/CodeGen/X86/ispow2.ll (+41)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 631cc26d6022fe..7953c0d09f2a86 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2111,6 +2111,32 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
   return false;
 }
 
+/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
+/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
+/// result cannot be zero.
+static bool adjustIsPower2Test(CmpInst *Cmp) {
+  ICmpInst::Predicate Pred;
+  if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
+    return false;
+  if (!ICmpInst::isEquality(Pred))
+    return false;
+  auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
+  if (auto Range = II->getRange()) {
+    Type *Ty = II->getType();
+    unsigned BitWidth = Ty->getScalarSizeInBits();
+    if (Range->contains(APInt::getZero(BitWidth)))
+      return false;
+
+    if (Pred == ICmpInst::ICMP_EQ) {
+      Cmp->setPredicate(ICmpInst::ICMP_ULT);
+      Cmp->setOperand(1, ConstantInt::get(Ty, 2));
+    } else
+      Cmp->setPredicate(ICmpInst::ICMP_UGT);
+    return true;
+  }
+  return false;
+}
+
 bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (sinkCmpExpression(Cmp, *TLI))
     return true;
@@ -2130,6 +2156,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
     return true;
 
+  if (adjustIsPower2Test(Cmp))
+    return true;
+
   return false;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index f5ce73a366125b..0030e9ce80abb4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -15,7 +15,7 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK-NONEON-LABEL: cnt32_advsimd:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr w9, w0, #1
-; CHECK-NONEON-NEXT:    mov w8, #16843009
+; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NONEON-NEXT:    sub w9, w0, w9
 ; CHECK-NONEON-NEXT:    lsr w10, w9, #2
@@ -50,7 +50,7 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
 ; CHECK-NONEON-LABEL: cnt32_advsimd_2:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr w9, w0, #1
-; CHECK-NONEON-NEXT:    mov w8, #16843009
+; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NONEON-NEXT:    sub w9, w0, w9
 ; CHECK-NONEON-NEXT:    lsr w10, w9, #2
@@ -86,7 +86,7 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
 ; CHECK-NONEON-LABEL: cnt64_advsimd:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr x9, x0, #1
-; CHECK-NONEON-NEXT:    mov x8, #72340172838076673
+; CHECK-NONEON-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; CHECK-NONEON-NEXT:    and x9, x9, #0x5555555555555555
 ; CHECK-NONEON-NEXT:    sub x9, x0, x9
 ; CHECK-NONEON-NEXT:    lsr x10, x9, #2
@@ -114,7 +114,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
 ; CHECK-LABEL: cnt32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr w9, w0, #1
-; CHECK-NEXT:    mov w8, #16843009
+; CHECK-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NEXT:    sub w9, w0, w9
 ; CHECK-NEXT:    lsr w10, w9, #2
@@ -130,7 +130,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
 ; CHECK-NONEON-LABEL: cnt32:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr w9, w0, #1
-; CHECK-NONEON-NEXT:    mov w8, #16843009
+; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NONEON-NEXT:    sub w9, w0, w9
 ; CHECK-NONEON-NEXT:    lsr w10, w9, #2
@@ -155,7 +155,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
 ; CHECK-LABEL: cnt64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr x9, x0, #1
-; CHECK-NEXT:    mov x8, #72340172838076673
+; CHECK-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; CHECK-NEXT:    and x9, x9, #0x5555555555555555
 ; CHECK-NEXT:    sub x9, x0, x9
 ; CHECK-NEXT:    lsr x10, x9, #2
@@ -171,7 +171,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
 ; CHECK-NONEON-LABEL: cnt64:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr x9, x0, #1
-; CHECK-NONEON-NEXT:    mov x8, #72340172838076673
+; CHECK-NONEON-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; CHECK-NONEON-NEXT:    and x9, x9, #0x5555555555555555
 ; CHECK-NONEON-NEXT:    sub x9, x0, x9
 ; CHECK-NONEON-NEXT:    lsr x10, x9, #2
@@ -278,5 +278,59 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
   ret i1 %cmp
 }
 
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_eq_one_nonzero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub w8, w0, #1
+; CHECK-NEXT:    tst w0, w8
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+;
+; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-NONEON:       // %bb.0: // %entry
+; CHECK-NONEON-NEXT:    sub w8, w0, #1
+; CHECK-NONEON-NEXT:    tst w0, w8
+; CHECK-NONEON-NEXT:    cset w0, eq
+; CHECK-NONEON-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-CSSC:       // %bb.0: // %entry
+; CHECK-CSSC-NEXT:    sub w8, w0, #1
+; CHECK-CSSC-NEXT:    tst w0, w8
+; CHECK-CSSC-NEXT:    cset w0, eq
+; CHECK-CSSC-NEXT:    ret
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_ne_one_nonzero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub w8, w0, #1
+; CHECK-NEXT:    tst w0, w8
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+;
+; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-NONEON:       // %bb.0: // %entry
+; CHECK-NONEON-NEXT:    sub w8, w0, #1
+; CHECK-NONEON-NEXT:    tst w0, w8
+; CHECK-NONEON-NEXT:    cset w0, ne
+; CHECK-NONEON-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-CSSC:       // %bb.0: // %entry
+; CHECK-CSSC-NEXT:    sub w8, w0, #1
+; CHECK-CSSC-NEXT:    tst w0, w8
+; CHECK-CSSC-NEXT:    cset w0, ne
+; CHECK-CSSC-NEXT:    ret
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}
+
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index e24b1b41645cdf..4c52047b928f4d 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1441,3 +1441,42 @@ define i32 @srai_slli2(i16 signext %0) {
   %3 = sext i16 %sext to i32
   ret i32 %3
 }
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; RV32I-LABEL: ctpop32_eq_one_nonzero:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi a1, a0, -1
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    seqz a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: ctpop32_eq_one_nonzero:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    cpop a0, a0
+; RV32ZBB-NEXT:    sltiu a0, a0, 2
+; RV32ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; RV32I-LABEL: ctpop32_ne_one_nonzero:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi a1, a0, -1
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    snez a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: ctpop32_ne_one_nonzero:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    cpop a0, a0
+; RV32ZBB-NEXT:    sltiu a0, a0, 2
+; RV32ZBB-NEXT:    xori a0, a0, 1
+; RV32ZBB-NEXT:    ret
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 43a499806ab5ae..1e7814d588e4c0 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1618,3 +1618,84 @@ entry:
   %5 = add nsw i32 %4, %0
   ret i32 %5
 }
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; RV64I-LABEL: ctpop32_eq_one_nonzero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    seqz a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop32_eq_one_nonzero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    sltiu a0, a0, 2
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; RV64I-LABEL: ctpop32_ne_one_nonzero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    snez a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop32_ne_one_nonzero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    sltiu a0, a0, 2
+; RV64ZBB-NEXT:    xori a0, a0, 1
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop64_eq_one_nonzero(i64 %x) {
+; RV64I-LABEL: ctpop64_eq_one_nonzero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    seqz a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop64_eq_one_nonzero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpop a0, a0
+; RV64ZBB-NEXT:    sltiu a0, a0, 2
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x)
+  %cmp = icmp eq i64 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_eq_one_maybezero(i32 %x) {
+; RV64I-LABEL: ctpop32_eq_one_maybezero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addiw a1, a0, -1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop32_eq_one_maybezero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    addi a0, a0, -1
+; RV64ZBB-NEXT:    seqz a0, a0
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i32 0, 16) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index 8723432de8b6b0..96e33e1dafdc4a 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -220,3 +220,44 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
   %r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
   ret <4 x i1> %r
 }
+
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; CHECK-NOBMI-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-NOBMI:       # %bb.0: # %entry
+; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
+; CHECK-NOBMI-NEXT:    testl %eax, %edi
+; CHECK-NOBMI-NEXT:    sete %al
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-BMI2:       # %bb.0: # %entry
+; CHECK-BMI2-NEXT:    blsrl %edi, %eax
+; CHECK-BMI2-NEXT:    sete %al
+; CHECK-BMI2-NEXT:    retq
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; CHECK-NOBMI-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-NOBMI:       # %bb.0: # %entry
+; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
+; CHECK-NOBMI-NEXT:    testl %eax, %edi
+; CHECK-NOBMI-NEXT:    setne %al
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-BMI2:       # %bb.0: # %entry
+; CHECK-BMI2-NEXT:    blsrl %edi, %eax
+; CHECK-BMI2-NEXT:    setne %al
+; CHECK-BMI2-NEXT:    retq
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}

llvmbot · 2024-10-06T08:56:27Z

@llvm/pr-subscribers-backend-x86

Author: Yingwei Zheng (dtcxzyw)

Changes

Some targets have better codegen for ctpop(X) u< 2 than ctpop(X) == 1. After #100899, we set the range of ctpop's return value to indicate the argument/result is non-zero.

This patch converts ctpop(X) ==/!= 1 into ctpop(X) u 2/1 in CGP to fix #95255.

Full diff: https://github.com/llvm/llvm-project/pull/111284.diff

5 Files Affected:

(modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+29)
(modified) llvm/test/CodeGen/AArch64/arm64-popcnt.ll (+61-7)
(modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+39)
(modified) llvm/test/CodeGen/RISCV/rv64zbb.ll (+81)
(modified) llvm/test/CodeGen/X86/ispow2.ll (+41)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 631cc26d6022fe..7953c0d09f2a86 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2111,6 +2111,32 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
   return false;
 }
 
+/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
+/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
+/// result cannot be zero.
+static bool adjustIsPower2Test(CmpInst *Cmp) {
+  ICmpInst::Predicate Pred;
+  if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
+    return false;
+  if (!ICmpInst::isEquality(Pred))
+    return false;
+  auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
+  if (auto Range = II->getRange()) {
+    Type *Ty = II->getType();
+    unsigned BitWidth = Ty->getScalarSizeInBits();
+    if (Range->contains(APInt::getZero(BitWidth)))
+      return false;
+
+    if (Pred == ICmpInst::ICMP_EQ) {
+      Cmp->setPredicate(ICmpInst::ICMP_ULT);
+      Cmp->setOperand(1, ConstantInt::get(Ty, 2));
+    } else
+      Cmp->setPredicate(ICmpInst::ICMP_UGT);
+    return true;
+  }
+  return false;
+}
+
 bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (sinkCmpExpression(Cmp, *TLI))
     return true;
@@ -2130,6 +2156,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
     return true;
 
+  if (adjustIsPower2Test(Cmp))
+    return true;
+
   return false;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index f5ce73a366125b..0030e9ce80abb4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -15,7 +15,7 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK-NONEON-LABEL: cnt32_advsimd:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr w9, w0, #1
-; CHECK-NONEON-NEXT:    mov w8, #16843009
+; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NONEON-NEXT:    sub w9, w0, w9
 ; CHECK-NONEON-NEXT:    lsr w10, w9, #2
@@ -50,7 +50,7 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
 ; CHECK-NONEON-LABEL: cnt32_advsimd_2:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr w9, w0, #1
-; CHECK-NONEON-NEXT:    mov w8, #16843009
+; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NONEON-NEXT:    sub w9, w0, w9
 ; CHECK-NONEON-NEXT:    lsr w10, w9, #2
@@ -86,7 +86,7 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
 ; CHECK-NONEON-LABEL: cnt64_advsimd:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr x9, x0, #1
-; CHECK-NONEON-NEXT:    mov x8, #72340172838076673
+; CHECK-NONEON-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; CHECK-NONEON-NEXT:    and x9, x9, #0x5555555555555555
 ; CHECK-NONEON-NEXT:    sub x9, x0, x9
 ; CHECK-NONEON-NEXT:    lsr x10, x9, #2
@@ -114,7 +114,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
 ; CHECK-LABEL: cnt32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr w9, w0, #1
-; CHECK-NEXT:    mov w8, #16843009
+; CHECK-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NEXT:    sub w9, w0, w9
 ; CHECK-NEXT:    lsr w10, w9, #2
@@ -130,7 +130,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
 ; CHECK-NONEON-LABEL: cnt32:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr w9, w0, #1
-; CHECK-NONEON-NEXT:    mov w8, #16843009
+; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NONEON-NEXT:    sub w9, w0, w9
 ; CHECK-NONEON-NEXT:    lsr w10, w9, #2
@@ -155,7 +155,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
 ; CHECK-LABEL: cnt64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr x9, x0, #1
-; CHECK-NEXT:    mov x8, #72340172838076673
+; CHECK-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; CHECK-NEXT:    and x9, x9, #0x5555555555555555
 ; CHECK-NEXT:    sub x9, x0, x9
 ; CHECK-NEXT:    lsr x10, x9, #2
@@ -171,7 +171,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
 ; CHECK-NONEON-LABEL: cnt64:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr x9, x0, #1
-; CHECK-NONEON-NEXT:    mov x8, #72340172838076673
+; CHECK-NONEON-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; CHECK-NONEON-NEXT:    and x9, x9, #0x5555555555555555
 ; CHECK-NONEON-NEXT:    sub x9, x0, x9
 ; CHECK-NONEON-NEXT:    lsr x10, x9, #2
@@ -278,5 +278,59 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
   ret i1 %cmp
 }
 
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_eq_one_nonzero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub w8, w0, #1
+; CHECK-NEXT:    tst w0, w8
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+;
+; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-NONEON:       // %bb.0: // %entry
+; CHECK-NONEON-NEXT:    sub w8, w0, #1
+; CHECK-NONEON-NEXT:    tst w0, w8
+; CHECK-NONEON-NEXT:    cset w0, eq
+; CHECK-NONEON-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-CSSC:       // %bb.0: // %entry
+; CHECK-CSSC-NEXT:    sub w8, w0, #1
+; CHECK-CSSC-NEXT:    tst w0, w8
+; CHECK-CSSC-NEXT:    cset w0, eq
+; CHECK-CSSC-NEXT:    ret
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_ne_one_nonzero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub w8, w0, #1
+; CHECK-NEXT:    tst w0, w8
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+;
+; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-NONEON:       // %bb.0: // %entry
+; CHECK-NONEON-NEXT:    sub w8, w0, #1
+; CHECK-NONEON-NEXT:    tst w0, w8
+; CHECK-NONEON-NEXT:    cset w0, ne
+; CHECK-NONEON-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-CSSC:       // %bb.0: // %entry
+; CHECK-CSSC-NEXT:    sub w8, w0, #1
+; CHECK-CSSC-NEXT:    tst w0, w8
+; CHECK-CSSC-NEXT:    cset w0, ne
+; CHECK-CSSC-NEXT:    ret
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}
+
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index e24b1b41645cdf..4c52047b928f4d 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1441,3 +1441,42 @@ define i32 @srai_slli2(i16 signext %0) {
   %3 = sext i16 %sext to i32
   ret i32 %3
 }
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; RV32I-LABEL: ctpop32_eq_one_nonzero:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi a1, a0, -1
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    seqz a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: ctpop32_eq_one_nonzero:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    cpop a0, a0
+; RV32ZBB-NEXT:    sltiu a0, a0, 2
+; RV32ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; RV32I-LABEL: ctpop32_ne_one_nonzero:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi a1, a0, -1
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    snez a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: ctpop32_ne_one_nonzero:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    cpop a0, a0
+; RV32ZBB-NEXT:    sltiu a0, a0, 2
+; RV32ZBB-NEXT:    xori a0, a0, 1
+; RV32ZBB-NEXT:    ret
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 43a499806ab5ae..1e7814d588e4c0 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1618,3 +1618,84 @@ entry:
   %5 = add nsw i32 %4, %0
   ret i32 %5
 }
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; RV64I-LABEL: ctpop32_eq_one_nonzero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    seqz a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop32_eq_one_nonzero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    sltiu a0, a0, 2
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; RV64I-LABEL: ctpop32_ne_one_nonzero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    snez a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop32_ne_one_nonzero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    sltiu a0, a0, 2
+; RV64ZBB-NEXT:    xori a0, a0, 1
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop64_eq_one_nonzero(i64 %x) {
+; RV64I-LABEL: ctpop64_eq_one_nonzero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    seqz a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop64_eq_one_nonzero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpop a0, a0
+; RV64ZBB-NEXT:    sltiu a0, a0, 2
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x)
+  %cmp = icmp eq i64 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_eq_one_maybezero(i32 %x) {
+; RV64I-LABEL: ctpop32_eq_one_maybezero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addiw a1, a0, -1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop32_eq_one_maybezero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    addi a0, a0, -1
+; RV64ZBB-NEXT:    seqz a0, a0
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i32 0, 16) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index 8723432de8b6b0..96e33e1dafdc4a 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -220,3 +220,44 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
   %r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
   ret <4 x i1> %r
 }
+
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; CHECK-NOBMI-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-NOBMI:       # %bb.0: # %entry
+; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
+; CHECK-NOBMI-NEXT:    testl %eax, %edi
+; CHECK-NOBMI-NEXT:    sete %al
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-BMI2:       # %bb.0: # %entry
+; CHECK-BMI2-NEXT:    blsrl %edi, %eax
+; CHECK-BMI2-NEXT:    sete %al
+; CHECK-BMI2-NEXT:    retq
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; CHECK-NOBMI-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-NOBMI:       # %bb.0: # %entry
+; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
+; CHECK-NOBMI-NEXT:    testl %eax, %edi
+; CHECK-NOBMI-NEXT:    setne %al
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-BMI2:       # %bb.0: # %entry
+; CHECK-BMI2-NEXT:    blsrl %edi, %eax
+; CHECK-BMI2-NEXT:    setne %al
+; CHECK-BMI2-NEXT:    retq
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}

llvm/lib/CodeGen/CodeGenPrepare.cpp

goldsteinn

LGTM

arsenm · 2024-10-07T08:28:10Z

llvm/lib/CodeGen/CodeGenPrepare.cpp

+    return false;
+  if (!ICmpInst::isEquality(Pred))
+    return false;
+  auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));


Check if this is profitable based on the target?

Do you have an example for a target where this is not profitable or at least neutral?

Not really, it should be neutral for amdgpu. It's just weird to do something unconditional in CGP without any kind of target information. I guess the only reason to do it here is to use the better IR version of isKnownNonZero (which we could skip if there's no plus to doing this)

When you say "here" you mean as opposed to InstCombine or DAGCombine? I think the reason to do this in IR is that the range attribute gets lost otherwise. Also, there's another PR open that moves the is pow2 idiom recognition to CGP because it can end up split across blocks otherwise, so we'd end up with this in CGP anyway (#102731).

As for doing it in InstCombine instead, I think it's problematic there because it goes against the usual canonicalization direction and may lead to infinite loops.

arsenm · 2024-10-07T09:56:11Z

llvm/lib/CodeGen/CodeGenPrepare.cpp

+  // Check if it is profitable for the target
+  ICmpInst::Predicate NewPred =
+      Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGT;
+  if (TLI.isCtpopFast(TLI.getValueType(DL, II->getType())) &&


Doesn't depend on ctpop speed? This is only whether the predicate and/or constant is cheaper?

If ctpop is slow, we always convert it into x & (x - 1). See also simplifySetCCWithCTPOP.

I'd prefer it if this cost model check were dropped again.

llvm-ci · 2024-10-11T01:44:35Z

LLVM Buildbot has detected a new failure on builder ppc64le-lld-multistage-test running on ppc64le-lld-multistage-test while building llvm at step 13 "test-build-stage2-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/168/builds/4308

Here is the relevant piece of the build log for the reference

Step 13 (test-build-stage2-unified-tree-check-all) failure: test (failure)
******************** TEST 'Clang :: Analysis/rdar-6442306-1.m' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 1: /home/buildbots/llvm-external-buildbots/workers/ppc64le-lld-multistage-test/ppc64le-lld-multistage-test/build/stage2/bin/clang -cc1 -internal-isystem /home/buildbots/llvm-external-buildbots/workers/ppc64le-lld-multistage-test/ppc64le-lld-multistage-test/build/stage2/lib/clang/20/include -nostdsysteminc -analyze -analyzer-constraints=range -setup-static-analyzer -analyzer-checker=core,alpha.core -analyzer-disable-checker=alpha.core.PointerArithm /home/buildbots/llvm-external-buildbots/workers/ppc64le-lld-multistage-test/ppc64le-lld-multistage-test/llvm-project/clang/test/Analysis/rdar-6442306-1.m -verify
+ /home/buildbots/llvm-external-buildbots/workers/ppc64le-lld-multistage-test/ppc64le-lld-multistage-test/build/stage2/bin/clang -cc1 -internal-isystem /home/buildbots/llvm-external-buildbots/workers/ppc64le-lld-multistage-test/ppc64le-lld-multistage-test/build/stage2/lib/clang/20/include -nostdsysteminc -analyze -analyzer-constraints=range -setup-static-analyzer -analyzer-checker=core,alpha.core -analyzer-disable-checker=alpha.core.PointerArithm /home/buildbots/llvm-external-buildbots/workers/ppc64le-lld-multistage-test/ppc64le-lld-multistage-test/llvm-project/clang/test/Analysis/rdar-6442306-1.m -verify
error: 'expected-error' diagnostics seen but not expected: 
  File /home/buildbots/llvm-external-buildbots/workers/ppc64le-lld-multistage-test/ppc64le-lld-multistage-test/llvm-project/clang/test/Analysis/rdar-6442306-1.m Line 13: array is too large (0 elements)
1 error generated.

--

********************

…/u> 2/1`" (#111932) Reverts #111284 to fix clang stage2 builds. Investigating... Failed buildbots: https://lab.llvm.org/buildbot/#/builders/76/builds/3576 https://lab.llvm.org/buildbot/#/builders/168/builds/4308 https://lab.llvm.org/buildbot/#/builders/127/builds/1087

…llvm#111284) Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u 2/1` in CGP to fix llvm#95255.

…/u> 2/1` (#111284)` (#111998) Relands #111284. Test failure with stage2 build has been fixed by #111946. Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After #100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u 2/1` in CGP to fix #95255.

…llvm#111284) Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u 2/1` in CGP to fix llvm#95255.

…/u> 2/1`" (llvm#111932) Reverts llvm#111284 to fix clang stage2 builds. Investigating... Failed buildbots: https://lab.llvm.org/buildbot/#/builders/76/builds/3576 https://lab.llvm.org/buildbot/#/builders/168/builds/4308 https://lab.llvm.org/buildbot/#/builders/127/builds/1087

…/u> 2/1` (llvm#111284)` (llvm#111998) Relands llvm#111284. Test failure with stage2 build has been fixed by llvm#111946. Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After llvm#100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u 2/1` in CGP to fix llvm#95255.

dtcxzyw requested review from arsenm, nikic, RKSimon, topperc and goldsteinn October 6, 2024 08:55

llvmbot added backend:AArch64 backend:X86 labels Oct 6, 2024

dtcxzyw requested a review from efriedma-quic October 6, 2024 08:57

goldsteinn reviewed Oct 6, 2024

View reviewed changes

llvm/lib/CodeGen/CodeGenPrepare.cpp Outdated Show resolved Hide resolved

goldsteinn approved these changes Oct 6, 2024

View reviewed changes

arsenm reviewed Oct 7, 2024

View reviewed changes

arsenm approved these changes Oct 10, 2024

View reviewed changes

dtcxzyw added 5 commits October 10, 2024 22:44

[CodeGenPrepare] Add pre-commit tests. NFC.

9449548

[CodeGenPrepare] Convert ctpop(X) ==/!= 1 -> ctpop(X) u 2/1

bf11b6d

[CodeGenPrepare] Use isKnownNonZero

2e810b5

[CodeGenPrepare] Check if it is profitable for the target

7357e3a

[CodeGenPrepare] Remove target cost checks

fb33033

dtcxzyw force-pushed the perf/is-power2-nonzero branch from 1533329 to fb33033 Compare October 10, 2024 14:58

dtcxzyw merged commit e3894f5 into llvm:main Oct 11, 2024
8 checks passed

dtcxzyw deleted the perf/is-power2-nonzero branch October 11, 2024 01:08

dtcxzyw mentioned this pull request Oct 11, 2024

Revert "[CodeGenPrepare] Convert ctpop(X) ==/!= 1 into ctpop(X) u 2/1" #111932

Merged

dtcxzyw mentioned this pull request Oct 11, 2024

[InstCombine] poison-generating attributes are not dropped when folding logical and/or of icmps #111934

Closed

dtcxzyw mentioned this pull request Oct 11, 2024

Reland [CodeGenPrepare] Convert ctpop(X) ==/!= 1 into ctpop(X) u 2/1 (#111284) #111998

Merged

dtcxzyw mentioned this pull request Nov 13, 2024

[InstCombine] Drop poison-generating/UB-implying param attrs after changing operands #115988

Open

s-barannikov mentioned this pull request Apr 21, 2025

[CodeGenPrepare] Unfold slow ctpop when used in power-of-two test #102731

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[CodeGenPrepare] Convert `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` #111284

[CodeGenPrepare] Convert `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` #111284

dtcxzyw commented Oct 6, 2024

llvmbot commented Oct 6, 2024

llvmbot commented Oct 6, 2024

goldsteinn left a comment

arsenm Oct 7, 2024

nikic Oct 7, 2024

arsenm Oct 7, 2024

nikic Oct 7, 2024

arsenm Oct 7, 2024

dtcxzyw Oct 7, 2024

nikic Oct 10, 2024

llvm-ci commented Oct 11, 2024

[CodeGenPrepare] Convert ctpop(X) ==/!= 1 into ctpop(X) u</u> 2/1 #111284

[CodeGenPrepare] Convert ctpop(X) ==/!= 1 into ctpop(X) u</u> 2/1 #111284

Conversation

dtcxzyw commented Oct 6, 2024

llvmbot commented Oct 6, 2024

llvmbot commented Oct 6, 2024

goldsteinn left a comment

Choose a reason for hiding this comment

arsenm Oct 7, 2024

Choose a reason for hiding this comment

nikic Oct 7, 2024

Choose a reason for hiding this comment

arsenm Oct 7, 2024

Choose a reason for hiding this comment

nikic Oct 7, 2024

Choose a reason for hiding this comment

arsenm Oct 7, 2024

Choose a reason for hiding this comment

dtcxzyw Oct 7, 2024

Choose a reason for hiding this comment

nikic Oct 10, 2024

Choose a reason for hiding this comment

llvm-ci commented Oct 11, 2024

[CodeGenPrepare] Convert `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` #111284

[CodeGenPrepare] Convert `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` #111284