Skip to content

Commit 3b090ff

Browse files
committed
[InstCombine] use demanded vector elements to eliminate partially redundant instructions
In issue #60632, we have vector math ops that differ because an operand is shuffled, but the math has limited demanded elements, so it can be replaced by another instruction: https://alive2.llvm.org/ce/z/TKqq7H I don't think we have anything like this yet - it's like a CSE/GVN fold, but driven by demanded elements of a vector op. This is limited to splat-0 as a first step to keep it simple. Differential Revision: https://reviews.llvm.org/D144760
1 parent 3cf8259 commit 3b090ff

File tree

2 files changed

+76
-20
lines changed

2 files changed

+76
-20
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1713,6 +1713,54 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
17131713
// UB/poison potential, but that should be refined.
17141714
BinaryOperator *BO;
17151715
if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
1716+
Value *X = BO->getOperand(0);
1717+
Value *Y = BO->getOperand(1);
1718+
1719+
// Look for an equivalent binop except that one operand has been shuffled.
1720+
// If the demand for this binop only includes elements that are the same as
1721+
// the other binop, then we may be able to replace this binop with a use of
1722+
// the earlier one.
1723+
//
1724+
// Example:
1725+
// %other_bo = bo (shuf X, {0}), Y
1726+
// %this_extracted_bo = extelt (bo X, Y), 0
1727+
// -->
1728+
// %other_bo = bo (shuf X, {0}), Y
1729+
// %this_extracted_bo = extelt %other_bo, 0
1730+
//
1731+
// TODO: Handle demand of an arbitrary single element or more than one
1732+
// element instead of just element 0.
1733+
// TODO: Unlike general demanded elements transforms, this should be safe
1734+
// for any (div/rem/shift) opcode too.
1735+
if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() &&
1736+
BO->hasOneUse() ) {
1737+
1738+
auto findShufBO = [&](bool MatchShufAsOp0) -> User * {
1739+
// Try to use shuffle-of-operand in place of an operand:
1740+
// bo X, Y --> bo (shuf X), Y
1741+
// bo X, Y --> bo X, (shuf Y)
1742+
BinaryOperator::BinaryOps Opcode = BO->getOpcode();
1743+
Value *ShufOp = MatchShufAsOp0 ? X : Y;
1744+
Value *OtherOp = MatchShufAsOp0 ? Y : X;
1745+
for (User *U : OtherOp->users()) {
1746+
auto Shuf = m_Shuffle(m_Specific(ShufOp), m_Value(), m_ZeroMask());
1747+
if (BO->isCommutative()
1748+
? match(U, m_c_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
1749+
: MatchShufAsOp0
1750+
? match(U, m_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
1751+
: match(U, m_BinOp(Opcode, m_Specific(OtherOp), Shuf)))
1752+
if (DT.dominates(U, I))
1753+
return U;
1754+
}
1755+
return nullptr;
1756+
};
1757+
1758+
if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true))
1759+
return ShufBO;
1760+
if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false))
1761+
return ShufBO;
1762+
}
1763+
17161764
simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
17171765
simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
17181766

llvm/test/Transforms/InstCombine/vec_demanded_elts.ll

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -850,8 +850,7 @@ define void @common_binop_demand_via_splat_op0(<2 x i4> %x, <2 x i4> %y) {
850850
; CHECK-LABEL: @common_binop_demand_via_splat_op0(
851851
; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer
852852
; CHECK-NEXT: [[B_XSHUF_Y:%.*]] = mul <2 x i4> [[XSHUF]], [[Y:%.*]]
853-
; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]]
854-
; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer
853+
; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XSHUF_Y]], <2 x i4> poison, <2 x i32> zeroinitializer
855854
; CHECK-NEXT: call void @use(<2 x i4> [[B_XSHUF_Y]])
856855
; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]])
857856
; CHECK-NEXT: ret void
@@ -870,8 +869,7 @@ define void @common_binop_demand_via_splat_op1(<2 x i4> %p, <2 x i4> %y) {
870869
; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> <i4 0, i4 1>, [[P:%.*]]
871870
; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer
872871
; CHECK-NEXT: [[B_X_YSHUF:%.*]] = mul <2 x i4> [[X]], [[YSHUF]]
873-
; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]]
874-
; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer
872+
; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_X_YSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer
875873
; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]])
876874
; CHECK-NEXT: call void @use(<2 x i4> [[B_X_YSHUF]])
877875
; CHECK-NEXT: ret void
@@ -888,12 +886,11 @@ define void @common_binop_demand_via_splat_op1(<2 x i4> %p, <2 x i4> %y) {
888886

889887
define void @common_binop_demand_via_splat_op0_commute(<2 x i4> %p, <2 x i4> %q) {
890888
; CHECK-LABEL: @common_binop_demand_via_splat_op0_commute(
891-
; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> <i4 0, i4 1>, [[P:%.*]]
889+
; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> <i4 0, i4 poison>, [[P:%.*]]
892890
; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> <i4 1, i4 2>, [[Q:%.*]]
893891
; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X]], <2 x i4> poison, <2 x i32> zeroinitializer
894892
; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = mul <2 x i4> [[Y]], [[XSHUF]]
895-
; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]]
896-
; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer
893+
; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_Y_XSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer
897894
; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]])
898895
; CHECK-NEXT: call void @use(<2 x i4> [[B_Y_XSHUF]])
899896
; CHECK-NEXT: ret void
@@ -912,11 +909,10 @@ define void @common_binop_demand_via_splat_op0_commute(<2 x i4> %p, <2 x i4> %q)
912909
define void @common_binop_demand_via_splat_op1_commute(<2 x i4> %p, <2 x i4> %q) {
913910
; CHECK-LABEL: @common_binop_demand_via_splat_op1_commute(
914911
; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> <i4 0, i4 1>, [[P:%.*]]
915-
; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> <i4 2, i4 3>, [[Q:%.*]]
912+
; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> <i4 2, i4 poison>, [[Q:%.*]]
916913
; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y]], <2 x i4> poison, <2 x i32> zeroinitializer
917914
; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = mul <2 x i4> [[YSHUF]], [[X]]
918-
; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X]], [[Y]]
919-
; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_XY]], <2 x i4> poison, <2 x i32> zeroinitializer
915+
; CHECK-NEXT: [[B_XY_SPLAT:%.*]] = shufflevector <2 x i4> [[B_Y_XSHUF]], <2 x i4> poison, <2 x i32> zeroinitializer
920916
; CHECK-NEXT: call void @use(<2 x i4> [[B_XY_SPLAT]])
921917
; CHECK-NEXT: call void @use(<2 x i4> [[B_Y_XSHUF]])
922918
; CHECK-NEXT: ret void
@@ -932,6 +928,8 @@ define void @common_binop_demand_via_splat_op1_commute(<2 x i4> %p, <2 x i4> %q)
932928
ret void
933929
}
934930

931+
; negative test - wrong operands for sub
932+
935933
define void @common_binop_demand_via_splat_op0_wrong_commute(<2 x i4> %x, <2 x i4> %y) {
936934
; CHECK-LABEL: @common_binop_demand_via_splat_op0_wrong_commute(
937935
; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer
@@ -951,6 +949,8 @@ define void @common_binop_demand_via_splat_op0_wrong_commute(<2 x i4> %x, <2 x i
951949
ret void
952950
}
953951

952+
; negative test - need to reorder insts?
953+
954954
define void @common_binop_demand_via_splat_op0_not_dominated1(<2 x i4> %x, <2 x i4> %y) {
955955
; CHECK-LABEL: @common_binop_demand_via_splat_op0_not_dominated1(
956956
; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]]
@@ -970,6 +970,8 @@ define void @common_binop_demand_via_splat_op0_not_dominated1(<2 x i4> %x, <2 x
970970
ret void
971971
}
972972

973+
; negative test - need to reorder insts?
974+
973975
define void @common_binop_demand_via_splat_op0_not_dominated2(<2 x i4> %x, <2 x i4> %y) {
974976
; CHECK-LABEL: @common_binop_demand_via_splat_op0_not_dominated2(
975977
; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]]
@@ -993,8 +995,7 @@ define i4 @common_binop_demand_via_extelt_op0(<2 x i4> %x, <2 x i4> %y) {
993995
; CHECK-LABEL: @common_binop_demand_via_extelt_op0(
994996
; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer
995997
; CHECK-NEXT: [[B_XSHUF_Y:%.*]] = sub <2 x i4> [[XSHUF]], [[Y:%.*]]
996-
; CHECK-NEXT: [[B_XY:%.*]] = sub nsw <2 x i4> [[X]], [[Y]]
997-
; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XY]], i64 0
998+
; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XSHUF_Y]], i64 0
998999
; CHECK-NEXT: call void @use(<2 x i4> [[B_XSHUF_Y]])
9991000
; CHECK-NEXT: ret i4 [[B_XY0]]
10001001
;
@@ -1011,8 +1012,7 @@ define float @common_binop_demand_via_extelt_op1(<2 x float> %p, <2 x float> %y)
10111012
; CHECK-NEXT: [[X:%.*]] = fsub <2 x float> <float 0.000000e+00, float 1.000000e+00>, [[P:%.*]]
10121013
; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
10131014
; CHECK-NEXT: [[B_X_YSHUF:%.*]] = fdiv <2 x float> [[X]], [[YSHUF]]
1014-
; CHECK-NEXT: [[B_XY:%.*]] = fdiv <2 x float> [[X]], [[Y]]
1015-
; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_XY]], i64 0
1015+
; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_X_YSHUF]], i64 0
10161016
; CHECK-NEXT: call void @use_fp(<2 x float> [[B_X_YSHUF]])
10171017
; CHECK-NEXT: ret float [[B_XY0]]
10181018
;
@@ -1027,12 +1027,11 @@ define float @common_binop_demand_via_extelt_op1(<2 x float> %p, <2 x float> %y)
10271027

10281028
define float @common_binop_demand_via_extelt_op0_commute(<2 x float> %p, <2 x float> %q) {
10291029
; CHECK-LABEL: @common_binop_demand_via_extelt_op0_commute(
1030-
; CHECK-NEXT: [[X:%.*]] = fsub <2 x float> <float 0.000000e+00, float 1.000000e+00>, [[P:%.*]]
1030+
; CHECK-NEXT: [[X:%.*]] = fsub <2 x float> <float 0.000000e+00, float poison>, [[P:%.*]]
10311031
; CHECK-NEXT: [[Y:%.*]] = fsub <2 x float> <float 3.000000e+00, float 2.000000e+00>, [[Q:%.*]]
10321032
; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <2 x i32> zeroinitializer
10331033
; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = fmul nnan <2 x float> [[Y]], [[XSHUF]]
1034-
; CHECK-NEXT: [[B_XY:%.*]] = fmul ninf <2 x float> [[X]], [[Y]]
1035-
; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_XY]], i64 0
1034+
; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x float> [[B_Y_XSHUF]], i64 0
10361035
; CHECK-NEXT: call void @use_fp(<2 x float> [[B_Y_XSHUF]])
10371036
; CHECK-NEXT: ret float [[B_XY0]]
10381037
;
@@ -1049,11 +1048,10 @@ define float @common_binop_demand_via_extelt_op0_commute(<2 x float> %p, <2 x fl
10491048
define i4 @common_binop_demand_via_extelt_op1_commute(<2 x i4> %p, <2 x i4> %q) {
10501049
; CHECK-LABEL: @common_binop_demand_via_extelt_op1_commute(
10511050
; CHECK-NEXT: [[X:%.*]] = sub <2 x i4> <i4 0, i4 1>, [[P:%.*]]
1052-
; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> <i4 2, i4 3>, [[Q:%.*]]
1051+
; CHECK-NEXT: [[Y:%.*]] = sub <2 x i4> <i4 2, i4 poison>, [[Q:%.*]]
10531052
; CHECK-NEXT: [[YSHUF:%.*]] = shufflevector <2 x i4> [[Y]], <2 x i4> poison, <2 x i32> zeroinitializer
10541053
; CHECK-NEXT: [[B_Y_XSHUF:%.*]] = or <2 x i4> [[YSHUF]], [[X]]
1055-
; CHECK-NEXT: [[B_XY:%.*]] = or <2 x i4> [[X]], [[Y]]
1056-
; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_XY]], i64 0
1054+
; CHECK-NEXT: [[B_XY0:%.*]] = extractelement <2 x i4> [[B_Y_XSHUF]], i64 0
10571055
; CHECK-NEXT: call void @use(<2 x i4> [[B_Y_XSHUF]])
10581056
; CHECK-NEXT: ret i4 [[B_XY0]]
10591057
;
@@ -1067,6 +1065,8 @@ define i4 @common_binop_demand_via_extelt_op1_commute(<2 x i4> %p, <2 x i4> %q)
10671065
ret i4 %b_xy0
10681066
}
10691067

1068+
; negative test - wrong operands for sub
1069+
10701070
define i4 @common_binop_demand_via_extelt_op0_wrong_commute(<2 x i4> %x, <2 x i4> %y) {
10711071
; CHECK-LABEL: @common_binop_demand_via_extelt_op0_wrong_commute(
10721072
; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer
@@ -1084,6 +1084,8 @@ define i4 @common_binop_demand_via_extelt_op0_wrong_commute(<2 x i4> %x, <2 x i4
10841084
ret i4 %b_xy0
10851085
}
10861086

1087+
; negative test - need to reorder insts?
1088+
10871089
define i4 @common_binop_demand_via_extelt_op0_not_dominated1(<2 x i4> %x, <2 x i4> %y) {
10881090
; CHECK-LABEL: @common_binop_demand_via_extelt_op0_not_dominated1(
10891091
; CHECK-NEXT: [[B_XY:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
@@ -1101,6 +1103,8 @@ define i4 @common_binop_demand_via_extelt_op0_not_dominated1(<2 x i4> %x, <2 x i
11011103
ret i4 %b_xy0
11021104
}
11031105

1106+
; negative test - need to reorder insts?
1107+
11041108
define i4 @common_binop_demand_via_extelt_op0_not_dominated2(<2 x i4> %x, <2 x i4> %y) {
11051109
; CHECK-LABEL: @common_binop_demand_via_extelt_op0_not_dominated2(
11061110
; CHECK-NEXT: [[B_XY:%.*]] = mul <2 x i4> [[X:%.*]], [[Y:%.*]]
@@ -1118,6 +1122,8 @@ define i4 @common_binop_demand_via_extelt_op0_not_dominated2(<2 x i4> %x, <2 x i
11181122
ret i4 %b_xy0
11191123
}
11201124

1125+
; negative test - splat doesn't match demanded element
1126+
11211127
define i4 @common_binop_demand_via_extelt_op0_mismatch_elt0(<2 x i4> %x, <2 x i4> %y) {
11221128
; CHECK-LABEL: @common_binop_demand_via_extelt_op0_mismatch_elt0(
11231129
; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> <i32 1, i32 1>
@@ -1135,6 +1141,8 @@ define i4 @common_binop_demand_via_extelt_op0_mismatch_elt0(<2 x i4> %x, <2 x i4
11351141
ret i4 %b_xy0
11361142
}
11371143

1144+
; negative test - splat doesn't match demanded element
1145+
11381146
define i4 @common_binop_demand_via_extelt_op0_mismatch_elt1(<2 x i4> %x, <2 x i4> %y) {
11391147
; CHECK-LABEL: @common_binop_demand_via_extelt_op0_mismatch_elt1(
11401148
; CHECK-NEXT: [[XSHUF:%.*]] = shufflevector <2 x i4> [[X:%.*]], <2 x i4> poison, <2 x i32> zeroinitializer

0 commit comments

Comments
 (0)