Skip to content

Commit 47fd32f

Browse files
authored
[DAGCombine] Fix type mismatch in (shl X, cttz(Y)) -> (mul (Y & -Y), X) (#94008)
Proof: https://alive2.llvm.org/ce/z/J7GBMU Same as #92753, the types of LHS and RHS in shift nodes may differ. + When VT is smaller than ShiftVT, it is safe to use trunc. + When VT is larger than ShiftVT, it is safe to use zext iff `is_zero_poison` is true (i.e., `opcode == ISD::CTTZ_ZERO_UNDEF`). See also the counterexample `src_shl_cttz2 -> tgt_shl_cttz2` in the alive2 proofs. Fixes issue #85066 (comment).
1 parent cb7a03b commit 47fd32f

File tree

2 files changed

+52
-4
lines changed

2 files changed

+52
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10120,13 +10120,16 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
1012010120

1012110121
// fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
1012210122
// target.
10123-
if ((N1.getOpcode() == ISD::CTTZ || N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10124-
N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, VT) &&
10123+
if (((N1.getOpcode() == ISD::CTTZ &&
10124+
VT.getScalarSizeInBits() >= ShiftVT.getScalarSizeInBits()) ||
10125+
N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10126+
N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
1012510127
TLI.isOperationLegalOrCustom(ISD::MUL, VT)) {
1012610128
SDValue Y = N1.getOperand(0);
1012710129
SDLoc DL(N);
10128-
SDValue NegY = DAG.getNegative(Y, DL, VT);
10129-
SDValue And = DAG.getNode(ISD::AND, DL, VT, Y, NegY);
10130+
SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10131+
SDValue And =
10132+
DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
1013010133
return DAG.getNode(ISD::MUL, DL, VT, And, N0);
1013110134
}
1013210135

llvm/test/CodeGen/PowerPC/pr85066.ll

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s
3+
4+
; Tests from pr85066
5+
define i64 @test_shl_zext_cttz(i16 %x) {
6+
; CHECK-LABEL: test_shl_zext_cttz:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: oris 3, 3, 1
9+
; CHECK-NEXT: neg 4, 3
10+
; CHECK-NEXT: and 3, 3, 4
11+
; CHECK-NEXT: clrldi 3, 3, 32
12+
; CHECK-NEXT: blr
13+
entry:
14+
%cttz = tail call i16 @llvm.cttz.i16(i16 %x, i1 false)
15+
%zext = zext i16 %cttz to i64
16+
%res = shl i64 1, %zext
17+
ret i64 %res
18+
}
19+
20+
define i64 @test_shl_zext_cttz_zero_is_poison(i16 %x) {
21+
; CHECK-LABEL: test_shl_zext_cttz_zero_is_poison:
22+
; CHECK: # %bb.0: # %entry
23+
; CHECK-NEXT: neg 4, 3
24+
; CHECK-NEXT: and 3, 3, 4
25+
; CHECK-NEXT: clrldi 3, 3, 32
26+
; CHECK-NEXT: blr
27+
entry:
28+
%cttz = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
29+
%zext = zext i16 %cttz to i64
30+
%res = shl i64 1, %zext
31+
ret i64 %res
32+
}
33+
34+
define i16 @test_shl_trunc_cttz(i32 %x) {
35+
; CHECK-LABEL: test_shl_trunc_cttz:
36+
; CHECK: # %bb.0: # %entry
37+
; CHECK-NEXT: neg 4, 3
38+
; CHECK-NEXT: and 3, 3, 4
39+
; CHECK-NEXT: blr
40+
entry:
41+
%cttz = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
42+
%trunc = trunc i32 %cttz to i16
43+
%res = shl i16 1, %trunc
44+
ret i16 %res
45+
}

0 commit comments

Comments
 (0)