-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[DAGCombiner] Inverse transform (select c, (and X, 1), 0)
-> (and (zext c), X)
#66793
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-aarch64 Changes
Patch is 22.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66793.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index db1ebe0e26b9a29..5dfbf29fa666b85 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6294,6 +6294,37 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
return SDValue();
}
+// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
+// We canonicalize to the `select` form in the middle end, but the `and` form
+// gets better codegen and all tested targets (arm, x86, riscv)
+static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F,
+ const SDLoc &DL, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto *FC = dyn_cast<ConstantSDNode>(F);
+ if (FC == nullptr || !FC->isZero())
+ return SDValue();
+
+ EVT CondVT = Cond.getValueType();
+ ISD::NodeType BoolExtOpc =
+ TLI.getExtendForContent(TLI.getBooleanContents(CondVT));
+ if (BoolExtOpc != ISD::ZERO_EXTEND)
+ return SDValue();
+
+ if (T.getOpcode() != ISD::AND)
+ return SDValue();
+
+ auto *TC1 = dyn_cast<ConstantSDNode>(T.getOperand(1));
+ if (TC1 == nullptr || !TC1->isOne())
+ return SDValue();
+
+ EVT OpVT = T.getValueType();
+
+ SDValue CondMask =
+ OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
+ return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
+}
+
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
@@ -11604,6 +11635,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SDValue BinOp = foldSelectOfBinops(N))
return BinOp;
+ if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
+ return R;
+
return SDValue();
}
@@ -21861,7 +21895,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (DAG.isKnownNeverZero(Index))
return DAG.getUNDEF(ScalarVT);
- // Check if the result type doesn't match the inserted element type.
+ // Check if the result type doesn't match the inserted element type.
// The inserted element and extracted element may have mismatched bitwidth.
// As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
SDValue InOp = VecOp.getOperand(0);
diff --git a/llvm/test/CodeGen/AArch64/select-to-and-zext.ll b/llvm/test/CodeGen/AArch64/select-to-and-zext.ll
new file mode 100644
index 000000000000000..42a7c2114ed27ee
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/select-to-and-zext.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s
+
+define i32 @from_cmpeq(i32 %xx, i32 %y) {
+; CHECK-LABEL: from_cmpeq:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w1, #0x1
+; CHECK-NEXT: cmp w0, #9
+; CHECK-NEXT: csel w0, w8, wzr, eq
+; CHECK-NEXT: ret
+ %x = icmp eq i32 %xx, 9
+ %masked = and i32 %y, 1
+
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_cmpeq_fail_bad_andmask(i32 %xx, i32 %y) {
+; CHECK-LABEL: from_cmpeq_fail_bad_andmask:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w1, #0x3
+; CHECK-NEXT: cmp w0, #9
+; CHECK-NEXT: csel w0, w8, wzr, eq
+; CHECK-NEXT: ret
+ %x = icmp eq i32 %xx, 9
+ %masked = and i32 %y, 3
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_i1(i1 %x, i32 %y) {
+; CHECK-LABEL: from_i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, w1
+; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: ret
+ %masked = and i32 %y, 1
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_trunc_i8(i8 %xx, i32 %y) {
+; CHECK-LABEL: from_trunc_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, w1
+; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: ret
+ %masked = and i32 %y, 1
+ %x = trunc i8 %xx to i1
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_trunc_i64(i64 %xx, i32 %y) {
+; CHECK-LABEL: from_trunc_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, w1
+; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: ret
+ %masked = and i32 %y, 1
+ %x = trunc i64 %xx to i1
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_i1_fail_bad_select0(i1 %x, i32 %y) {
+; CHECK-LABEL: from_i1_fail_bad_select0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w1, #0x1
+; CHECK-NEXT: tst w0, #0x1
+; CHECK-NEXT: csinc w0, w8, wzr, ne
+; CHECK-NEXT: ret
+ %masked = and i32 %y, 1
+ %r = select i1 %x, i32 %masked, i32 1
+ ret i32 %r
+}
+
+define i32 @from_i1_fail_bad_select1(i1 %x, i32 %y) {
+; CHECK-LABEL: from_i1_fail_bad_select1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w1, #0x1
+; CHECK-NEXT: tst w0, #0x1
+; CHECK-NEXT: csel w0, wzr, w8, ne
+; CHECK-NEXT: ret
+ %masked = and i32 %y, 1
+ %r = select i1 %x, i32 0, i32 %masked
+ ret i32 %r
+}
diff --git a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
new file mode 100644
index 000000000000000..eacc26c18415da6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
@@ -0,0 +1,152 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+
+
+define i32 @from_cmpeq(i32 %xx, i32 %y) {
+; RV32I-LABEL: from_cmpeq:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a0, a0, -9
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: from_cmpeq:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: addi a0, a0, -9
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: ret
+ %x = icmp eq i32 %xx, 9
+ %masked = and i32 %y, 1
+
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_cmpeq_fail_bad_andmask(i32 %xx, i32 %y) {
+; RV32I-LABEL: from_cmpeq_fail_bad_andmask:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a0, a0, -9
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: andi a0, a0, 3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: from_cmpeq_fail_bad_andmask:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: addi a0, a0, -9
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: andi a0, a0, 3
+; RV64I-NEXT: ret
+ %x = icmp eq i32 %xx, 9
+ %masked = and i32 %y, 3
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_i1(i1 %x, i32 %y) {
+; RV32I-LABEL: from_i1:
+; RV32I: # %bb.0:
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: from_i1:
+; RV64I: # %bb.0:
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ret
+ %masked = and i32 %y, 1
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_trunc_i8(i8 %xx, i32 %y) {
+; RV32I-LABEL: from_trunc_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: from_trunc_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ret
+ %masked = and i32 %y, 1
+ %x = trunc i8 %xx to i1
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_trunc_i64(i64 %xx, i32 %y) {
+; RV32I-LABEL: from_trunc_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: from_trunc_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ret
+ %masked = and i32 %y, 1
+ %x = trunc i64 %xx to i1
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_i1_fail_bad_select0(i1 %x, i32 %y) {
+; RV32I-LABEL: from_i1_fail_bad_select0:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: bnez a0, .LBB5_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: li a0, 1
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB5_2:
+; RV32I-NEXT: andi a0, a1, 1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: from_i1_fail_bad_select0:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: bnez a0, .LBB5_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: li a0, 1
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB5_2:
+; RV64I-NEXT: andi a0, a1, 1
+; RV64I-NEXT: ret
+ %masked = and i32 %y, 1
+ %r = select i1 %x, i32 %masked, i32 1
+ ret i32 %r
+}
+
+define i32 @from_i1_fail_bad_select1(i1 %x, i32 %y) {
+; RV32I-LABEL: from_i1_fail_bad_select1:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: from_i1_fail_bad_select1:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: andi a0, a0, 1
+; RV64I-NEXT: ret
+ %masked = and i32 %y, 1
+ %r = select i1 %x, i32 0, i32 %masked
+ ret i32 %r
+}
diff --git a/llvm/test/CodeGen/SystemZ/pr60413.ll b/llvm/test/CodeGen/SystemZ/pr60413.ll
index 532dbde89c826c6..5a629567d070694 100644
--- a/llvm/test/CodeGen/SystemZ/pr60413.ll
+++ b/llvm/test/CodeGen/SystemZ/pr60413.ll
@@ -13,21 +13,29 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #0
define dso_local void @m() local_unnamed_addr #1 {
; CHECK-LABEL: m:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
+; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
; CHECK-NEXT: aghi %r15, -168
-; CHECK-NEXT: llhrl %r1, f+4
-; CHECK-NEXT: sll %r1, 8
-; CHECK-NEXT: larl %r2, f
-; CHECK-NEXT: ic %r1, 6(%r2)
-; CHECK-NEXT: lr %r0, %r1
-; CHECK-NEXT: nilh %r0, 255
-; CHECK-NEXT: vlvgp %v1, %r1, %r0
-; CHECK-NEXT: vlvgf %v1, %r1, 0
-; CHECK-NEXT: vlvgf %v1, %r1, 2
-; CHECK-NEXT: vlvgp %v0, %r0, %r1
+; CHECK-NEXT: llhrl %r2, f+4
+; CHECK-NEXT: sll %r2, 8
+; CHECK-NEXT: larl %r1, f
+; CHECK-NEXT: ic %r2, 6(%r1)
+; CHECK-NEXT: larl %r1, e
+; CHECK-NEXT: lb %r0, 3(%r1)
+; CHECK-NEXT: clfi %r2, 128
+; CHECK-NEXT: ipm %r1
+; CHECK-NEXT: risbg %r1, %r1, 63, 191, 36
+; CHECK-NEXT: vlvgp %v1, %r2, %r0
+; CHECK-NEXT: vlvgf %v1, %r2, 0
+; CHECK-NEXT: vlvgf %v1, %r2, 2
+; CHECK-NEXT: vlvgp %v0, %r0, %r2
+; CHECK-NEXT: vlvgp %v2, %r2, %r2
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: nilh %r2, 255
+; CHECK-NEXT: chi %r2, 128
+; CHECK-NEXT: ipm %r2
+; CHECK-NEXT: risbg %r2, %r2, 63, 191, 36
; CHECK-NEXT: vlvgf %v0, %r0, 0
; CHECK-NEXT: vlvgf %v0, %r0, 2
-; CHECK-NEXT: vlvgp %v2, %r1, %r1
; CHECK-NEXT: vrepf %v2, %v2, 1
; CHECK-NEXT: vgbm %v3, 30583
; CHECK-NEXT: vn %v0, %v0, %v3
@@ -35,96 +43,84 @@ define dso_local void @m() local_unnamed_addr #1 {
; CHECK-NEXT: vn %v2, %v2, %v3
; CHECK-NEXT: vrepif %v3, 127
; CHECK-NEXT: vchlf %v1, %v1, %v3
-; CHECK-NEXT: vlgvf %r13, %v1, 0
+; CHECK-NEXT: vlgvf %r12, %v1, 0
; CHECK-NEXT: vchlf %v2, %v2, %v3
-; CHECK-NEXT: vlgvf %r3, %v2, 1
-; CHECK-NEXT: nilf %r3, 1
-; CHECK-NEXT: vlgvf %r4, %v2, 0
-; CHECK-NEXT: risbg %r2, %r4, 48, 176, 15
-; CHECK-NEXT: rosbg %r2, %r3, 32, 49, 14
-; CHECK-NEXT: vlgvf %r5, %v2, 2
-; CHECK-NEXT: nilf %r5, 1
-; CHECK-NEXT: rosbg %r2, %r5, 32, 50, 13
-; CHECK-NEXT: vlgvf %r14, %v2, 3
+; CHECK-NEXT: vlgvf %r4, %v2, 1
+; CHECK-NEXT: nilf %r4, 1
+; CHECK-NEXT: vlgvf %r5, %v2, 0
+; CHECK-NEXT: risbg %r3, %r5, 48, 176, 15
+; CHECK-NEXT: rosbg %r3, %r4, 32, 49, 14
+; CHECK-NEXT: vlgvf %r14, %v2, 2
; CHECK-NEXT: nilf %r14, 1
-; CHECK-NEXT: rosbg %r2, %r14, 32, 51, 12
-; CHECK-NEXT: rosbg %r2, %r13, 52, 52, 11
-; CHECK-NEXT: vlgvf %r13, %v1, 1
-; CHECK-NEXT: rosbg %r2, %r13, 53, 53, 10
-; CHECK-NEXT: vlgvf %r13, %v1, 2
-; CHECK-NEXT: rosbg %r2, %r13, 54, 54, 9
-; CHECK-NEXT: vlgvf %r13, %v1, 3
-; CHECK-NEXT: rosbg %r2, %r13, 55, 55, 8
+; CHECK-NEXT: rosbg %r3, %r14, 32, 50, 13
+; CHECK-NEXT: vlgvf %r13, %v2, 3
+; CHECK-NEXT: nilf %r13, 1
+; CHECK-NEXT: rosbg %r3, %r13, 32, 51, 12
+; CHECK-NEXT: rosbg %r3, %r12, 52, 52, 11
+; CHECK-NEXT: vlgvf %r12, %v1, 1
+; CHECK-NEXT: rosbg %r3, %r12, 53, 53, 10
+; CHECK-NEXT: vlgvf %r12, %v1, 2
+; CHECK-NEXT: rosbg %r3, %r12, 54, 54, 9
+; CHECK-NEXT: vlgvf %r12, %v1, 3
+; CHECK-NEXT: rosbg %r3, %r12, 55, 55, 8
; CHECK-NEXT: vchlf %v0, %v0, %v3
-; CHECK-NEXT: vlgvf %r13, %v0, 0
-; CHECK-NEXT: rosbg %r2, %r13, 56, 56, 7
-; CHECK-NEXT: vlgvf %r13, %v0, 1
-; CHECK-NEXT: rosbg %r2, %r13, 57, 57, 6
-; CHECK-NEXT: vlgvf %r13, %v0, 2
-; CHECK-NEXT: rosbg %r2, %r13, 58, 58, 5
-; CHECK-NEXT: vlgvf %r13, %v0, 3
-; CHECK-NEXT: rosbg %r2, %r13, 59, 59, 4
-; CHECK-NEXT: nilf %r4, 1
-; CHECK-NEXT: rosbg %r2, %r4, 32, 60, 3
-; CHECK-NEXT: rosbg %r2, %r3, 32, 61, 2
-; CHECK-NEXT: rosbg %r2, %r5, 32, 62, 1
-; CHECK-NEXT: or %r2, %r14
-; CHECK-NEXT: vlgvb %r4, %v0, 1
-; CHECK-NEXT: vlgvb %r3, %v0, 0
-; CHECK-NEXT: risbg %r3, %r3, 48, 176, 15
-; CHECK-NEXT: rosbg %r3, %r4, 49, 49, 14
-; CHECK-NEXT: vlgvb %r4, %v0, 2
-; CHECK-NEXT: rosbg %r3, %r4, 50, 50, 13
-; CHECK-NEXT: vlgvb %r4, %v0, 3
-; CHECK-NEXT: rosbg %r3, %r4, 51, 51, 12
-; CHECK-NEXT: vlgvb %r4, %v0, 4
-; CHECK-NEXT: rosbg %r3, %r4, 52, 52, 11
-; CHECK-NEXT: vlgvb %r4, %v0, 5
-; CHECK-NEXT: rosbg %r3, %r4, 53, 53, 10
-; CHECK-NEXT: vlgvb %r4, %v0, 6
-; CHECK-NEXT: rosbg %r3, %r4, 54, 54, 9
-; CHECK-NEXT: vlgvb %r4, %v0, 7
-; CHECK-NEXT: rosbg %r3, %r4, 55, 55, 8
-; CHECK-NEXT: vlgvb %r4, %v0, 8
-; CHECK-NEXT: rosbg %r3, %r4, 56, 56, 7
-; CHECK-NEXT: vlgvb %r4, %v0, 9
-; CHECK-NEXT: rosbg %r3, %r4, 57, 57, 6
-; CHECK-NEXT: vlgvb %r4, %v0, 10
-; CHECK-NEXT: rosbg %r3, %r4, 58, 58, 5
-; CHECK-NEXT: vlgvb %r4, %v0, 11
-; CHECK-NEXT: rosbg %r3, %r4, 59, 59, 4
-; CHECK-NEXT: vlgvb %r4, %v0, 12
-; CHECK-NEXT: rosbg %r3, %r4, 60, 60, 3
-; CHECK-NEXT: vlgvb %r4, %v0, 13
-; CHECK-NEXT: rosbg %r3, %r4, 61, 61, 2
-; CHECK-NEXT: vlgvb %r4, %v0, 14
-; CHECK-NEXT: rosbg %r3, %r4, 62, 62, 1
-; CHECK-NEXT: vlgvb %r4, %v0, 15
-; CHECK-NEXT: rosbg %r3, %r4, 63, 63, 0
-; CHECK-NEXT: xilf %r3, 4294967295
-; CHECK-NEXT: clijhe %r1, 128, .LBB0_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: larl %r1, e
-; CHECK-NEXT: llc %r1, 3(%r1)
-; CHECK-NEXT: nilf %r1, 1
-; CHECK-NEXT: j .LBB0_3
-; CHECK-NEXT: .LBB0_2: # %entry
-; CHECK-NEXT: lhi %r1, 0
-; CHECK-NEXT: .LBB0_3: # %entry
-; CHECK-NEXT: or %r2, %r3
-; CHECK-NEXT: chi %r0, 128
-; CHECK-NEXT: jl .LBB0_5
-; CHECK-NEXT: # %bb.4: # %entry
-; CHECK-NEXT: lhi %r1, 0
-; CHECK-NEXT: .LBB0_5: # %entry
-; CHECK-NEXT: tmll %r2, 65535
-; CHECK-NEXT: je .LBB0_7
-; CHECK-NEXT: # %bb.6: # %entry
-; CHECK-NEXT: lhi %r1, 0
-; CHECK-NEXT: .LBB0_7: # %entry
-; CHECK-NEXT: larl %r2, g
-; CHECK-NEXT: stc %r1, 0(%r2)
-; CHECK-NEXT: lmg %r13, %r15, 272(%r15)
+; CHECK-NEXT: vlgvf %r12, %v0, 0
+; CHECK-NEXT: rosbg %r3, %r12, 56, 56, 7
+; CHECK-NEXT: vlgvf %r12, %v0, 1
+; CHECK-NEXT: rosbg %r3, %r12, 57, 57, 6
+; CHECK-NEXT: vlgvf %r12, %v0, 2
+; CHECK-NEXT: rosbg %r3, %r12, 58, 58, 5
+; CHECK-NEXT: vlgvf %r12, %v0, 3
+; CHECK-NEXT: rosbg %r3, %r12, 59, 59, 4
+; CHECK-NEXT: nilf %r5, 1
+; CHECK-NEXT: rosbg %r3, %r5, 32, 60, 3
+; CHECK-NEXT: rosbg %r3, %r4, 32, 61, 2
+; CHECK-NEXT: rosbg %r3, %r14, 32, 62, 1
+; CHECK-NEXT: or %r3, %r13
+; CHECK-NEXT: vlgvb %r5, %v0, 1
+; CHECK-NEXT: vlgvb %r4, %v0, 0
+; CHECK-NEXT: risbg %r4, %r4, 48, 176, 15
+; CHECK-NEXT: rosbg %r4, %r5, 49, 49, 14
+; CHECK-NEXT: vlgvb %r5, %v0, 2
+; CHECK-NEXT: rosbg %r4, %r5, 50, 50, 13
+; CHECK-NEXT: vlgvb %r5, %v0, 3
+; CHECK-NEXT: rosbg %r4, %r5, 51, 51, 12
+; CHECK-NEXT: vlgvb %r5, %v0, 4
+; CHECK-NEXT: rosbg %r4, %r5, 52, 52, 11
+; CHECK-NEXT: vlgvb %r5, %v0, 5
+; CHECK-NEXT: rosbg %r4, %r5, 53, 53, 10
+; CHECK-NEXT: vlgvb %r5, %v0, 6
+; CHECK-NEXT: rosbg %r4, %r5, 54, 54, 9
+; CHECK-NEXT: vlgvb %r5, %v0, 7
+; CHECK-NEXT: rosbg %r4, %r5, 55, 55, 8
+; CHECK-NEXT: vlgvb %r5, %v0, 8
+; CHECK-NEXT: rosbg %r4, %r5, 56, 56, 7
+; CHECK-NEXT: vlgvb %r5, %v0, 9
+; CHECK-NEXT: rosbg %r4, %r5, 57, 57, 6
+; CHECK-NEXT: vlgvb %r5, %v0, 10
+; CHECK-NEXT: rosbg %r4, %r5, 58, 58, 5
+; CHECK-NEXT: vlgvb %r5, %v0, 11
+; CHECK-NEXT: rosbg %r4, %r5, 59, 59, 4
+; CHECK-NEXT: vlgvb %r5, %v0, 12
+; CHECK-NEXT: rosbg %r4, %r5, 60, 60, 3
+; CHECK-NEXT: vlgvb %r5, %v0, 13
+; CHECK-NEXT: rosbg %r4, %r5, 61, 61, 2
+; CHECK-NEXT: vlgvb %r5, %v0, 14
+; CHECK-NEXT: rosbg %r4, %r5, 62, 62, 1
+; CHECK-NEXT: vlgvb %r5, %v0, 15
+; CHECK-NEXT: rosbg %r4, %r5, 63, 63, 0
+; CHECK-NEXT: xilf %r4, 4294967295
+; CHECK-NEXT: or %r4, %r3
+; CHECK-NEXT: tmll %r4, 65535
+; CHECK-NEXT: ipm %r3
+; CHECK-NEXT: afi %r3, -268435456
+; CHECK-NEXT: srl %r3, 31
+; CHECK-NEXT: nr %r2, %r1
+; CHECK-NEXT: nr %r2, %r3
+; CHECK-NEXT: nr %r2, %r0
+; CHECK-NEXT: larl %r1, g
+; CHECK-NEXT: stc %r2, 0(%r1)
+; CHECK-NEXT: lmg %r12, %r15, 264(%r15)
; CHECK-NEXT: br %r14
entry:
%n = alloca i32, align 4
diff --git a/llvm/test/CodeGen/X86/select-to-and-zext.ll b/llvm/test/CodeGen/X86/select-to-and-zext.ll
new file mode 100644
index 000000000000000..7dadbd76a5bb17c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/select-to-and-zext.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s --check-prefix=X64
+
+define i32 @from_cmpeq(i32 %xx, i32 %y) {
+; X86-LABEL: from_cmpeq:
+; X86: # %bb.0:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpl $9, {{[0-9]+}}(%esp)
+; X86-NEXT: sete %al
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: from_cmpeq:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl $9, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %x = icmp eq i32 %xx, 9
+ %masked = and i32 %y, 1
+
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_cmpeq_fail_bad_andmask(i32 %xx, i32 %y) {
+; X86-LABEL: from_cmpeq_fail_bad_andmask:
+; X86: # %bb.0:
+; X86-NEXT: cmpl $9, {{[0-9]+}}(%esp)
+; X86-NEXT: je .LBB1_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB1_1:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $3, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: from_cmpeq_fail_bad_andmask:
+; X64: # %bb.0:
+; X64-NEXT: andl $3, %esi
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl $9, %edi
+; X64-NEXT: cmovel %esi, %eax
+; X64-NEXT: retq
+ %x = icmp eq i32 %xx, 9
+ %masked = and i32 %y, 3
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_i1(i1 %x, i32 %y) {
+; X86-LABEL: from_i1:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: from_i1:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: andl $1, %eax
+; X64-NEXT: retq
+ %masked = and i32 %y, 1
+ %r = select i1 %x, i32 %masked, i32 0
+ ret i32 %r
+}
+
+define i32 @from_trunc_i8(i8 %xx, i32 %y) {
+; X86-LABEL: from_trunc_i8:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: from_trunc_i8:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: andl $1, ...
[truncated]
|
; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s --check-prefix=X86 | ||
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s --check-prefix=X64 | ||
|
||
define i32 @from_cmpeq(i32 %xx, i32 %y) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you regenerate this so that it doesn't have the encoding comments
Not sure whats going on with these stacked PRs - but this needs an actual patch title (and not the branch name) |
(select c, (and X, 1), 0)
-> (and (zext c), X)
b1ee876
to
428057b
Compare
Sorry, growing pains with the github workflow. |
… 0)` -> `(and (zext c), X)`; NFC
The middle end canonicalizes: `(and (zext c), X)` -> `(select c, (and X, 1), 0)` But the `and` + `zext` form gets better codegen.
428057b
to
901560f
Compare
ping. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Pushed. |
@goldsteinn If you manually push a change, you need to do something like this
to keep the association between the PR and the commits (the salient part being the last two lines). If you don't do that, please edit the commit messages to include something like "Closes #xyz" to create a back-link to the PR. |
(select c, (and X, 1), 0)
->(and (zext c), X)
; NFC(select c, (and X, 1), 0)
->(and (zext c), X)