Skip to content

Commit af3ffff

Browse files
authored
[DAG] Always allow folding XOR patterns to ABS pre-legalization (#94601)
Removes residual ARM handling for vXi64 ABS nodes to prevent infinite loops.
1 parent 1a52392 commit af3ffff

17 files changed

+2864
-3050
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4042,7 +4042,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
40424042
}
40434043

40444044
// fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4045-
if (hasOperation(ISD::ABS, VT) &&
4045+
if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
40464046
sd_match(N1, m_Sra(m_Value(A), m_SpecificInt(BitWidth - 1))) &&
40474047
sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
40484048
return DAG.getNode(ISD::ABS, DL, VT, A);
@@ -9526,7 +9526,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
95269526
}
95279527

95289528
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9529-
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
9529+
if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
95309530
SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
95319531
SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
95329532
if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,9 +1623,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
16231623
setPrefFunctionAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
16241624

16251625
setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1626-
1627-
if (Subtarget->isThumb() || Subtarget->isThumb2())
1628-
setTargetDAGCombine(ISD::ABS);
16291626
}
16301627

16311628
bool ARMTargetLowering::useSoftFloat() const {
@@ -13504,18 +13501,6 @@ static SDValue PerformVSetCCToVCTPCombine(SDNode *N,
1350413501
DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
1350513502
}
1350613503

13507-
static SDValue PerformABSCombine(SDNode *N,
13508-
TargetLowering::DAGCombinerInfo &DCI,
13509-
const ARMSubtarget *Subtarget) {
13510-
SelectionDAG &DAG = DCI.DAG;
13511-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13512-
13513-
if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
13514-
return SDValue();
13515-
13516-
return TLI.expandABS(N, DAG);
13517-
}
13518-
1351913504
/// PerformADDECombine - Target-specific dag combine transform from
1352013505
/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
1352113506
/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
@@ -18879,7 +18864,6 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
1887918864
case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
1888018865
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
1888118866
case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18882-
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
1888318867
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
1888418868
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
1888518869
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);

llvm/lib/Target/ARM/ARMInstrNEON.td

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5670,22 +5670,6 @@ def : Pat<(v2i64 (zext (abdu (v2i32 DPR:$opA), (v2i32 DPR:$opB)))),
56705670
(VABDLuv2i64 DPR:$opA, DPR:$opB)>;
56715671
}
56725672

5673-
// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
5674-
// shift/xor pattern for ABS.
5675-
// TODO: Remove me.
5676-
def abd_shr :
5677-
PatFrag<(ops node:$in1, node:$in2, node:$shift),
5678-
(ARMvshrsImm (sub (zext node:$in1),
5679-
(zext node:$in2)), (i32 $shift))>;
5680-
5681-
let Predicates = [HasNEON] in {
5682-
def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)),
5683-
(v2i64 (add (sub (zext (v2i32 DPR:$opA)),
5684-
(zext (v2i32 DPR:$opB))),
5685-
(abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
5686-
(VABDLuv2i64 DPR:$opA, DPR:$opB)>;
5687-
}
5688-
56895673
// VABA : Vector Absolute Difference and Accumulate
56905674
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
56915675
"vaba", "s", abds, add>;

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -135,31 +135,31 @@ define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) {
135135
; GCN-NEXT: s_waitcnt lgkmcnt(0)
136136
; GCN-NEXT: v_mov_b32_e32 v1, s4
137137
; GCN-NEXT: v_cndmask_b32_e32 v0, 5, v1, vcc
138-
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
139-
; GCN-NEXT: v_add_u32_e32 v0, vcc, v0, v1
140-
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
141-
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v0
142-
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0, v0
138+
; GCN-NEXT: v_sub_u32_e32 v1, vcc, 0, v0
139+
; GCN-NEXT: v_max_i32_e32 v1, v0, v1
140+
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v1
141+
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0, v1
143142
; GCN-NEXT: s_mov_b32 s4, 0xf4240
144143
; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v2
144+
; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
145145
; GCN-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
146146
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
147147
; GCN-NEXT: v_mul_lo_u32 v3, v3, v2
148148
; GCN-NEXT: v_mul_hi_u32 v3, v2, v3
149149
; GCN-NEXT: v_add_u32_e32 v2, vcc, v2, v3
150150
; GCN-NEXT: v_mul_hi_u32 v2, v2, s4
151-
; GCN-NEXT: v_mul_lo_u32 v3, v2, v0
151+
; GCN-NEXT: v_mul_lo_u32 v3, v2, v1
152152
; GCN-NEXT: v_add_u32_e32 v4, vcc, 1, v2
153153
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0xf4240, v3
154-
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v0
154+
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
155155
; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
156-
; GCN-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v0
156+
; GCN-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v1
157157
; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
158158
; GCN-NEXT: v_add_u32_e32 v4, vcc, 1, v2
159-
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v0
160-
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
161-
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
162-
; GCN-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
159+
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
160+
; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
161+
; GCN-NEXT: v_xor_b32_e32 v1, v1, v0
162+
; GCN-NEXT: v_sub_u32_e32 v0, vcc, v1, v0
163163
; GCN-NEXT: s_setpc_b64 s[30:31]
164164
%select = select i1 %cond, i32 ptrtoint (ptr addrspace(1) @gv to i32), i32 5
165165
%op = sdiv i32 1000000, %select
@@ -217,31 +217,31 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) {
217217
; GCN-NEXT: s_waitcnt lgkmcnt(0)
218218
; GCN-NEXT: v_mov_b32_e32 v1, s4
219219
; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 5, vcc
220-
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
221-
; GCN-NEXT: v_add_u32_e32 v0, vcc, v0, v1
222-
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
223-
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v0
224-
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0, v0
220+
; GCN-NEXT: v_sub_u32_e32 v1, vcc, 0, v0
221+
; GCN-NEXT: v_max_i32_e32 v1, v0, v1
222+
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v1
223+
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0, v1
225224
; GCN-NEXT: s_mov_b32 s4, 0xf4240
226225
; GCN-NEXT: v_rcp_iflag_f32_e32 v2, v2
226+
; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0
227227
; GCN-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
228228
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
229229
; GCN-NEXT: v_mul_lo_u32 v3, v3, v2
230230
; GCN-NEXT: v_mul_hi_u32 v3, v2, v3
231231
; GCN-NEXT: v_add_u32_e32 v2, vcc, v2, v3
232232
; GCN-NEXT: v_mul_hi_u32 v2, v2, s4
233-
; GCN-NEXT: v_mul_lo_u32 v3, v2, v0
233+
; GCN-NEXT: v_mul_lo_u32 v3, v2, v1
234234
; GCN-NEXT: v_add_u32_e32 v4, vcc, 1, v2
235235
; GCN-NEXT: v_sub_u32_e32 v3, vcc, 0xf4240, v3
236-
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v0
236+
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
237237
; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
238-
; GCN-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v0
238+
; GCN-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v1
239239
; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
240240
; GCN-NEXT: v_add_u32_e32 v4, vcc, 1, v2
241-
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v0
242-
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc
243-
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
244-
; GCN-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
241+
; GCN-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
242+
; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc
243+
; GCN-NEXT: v_xor_b32_e32 v1, v1, v0
244+
; GCN-NEXT: v_sub_u32_e32 v0, vcc, v1, v0
245245
; GCN-NEXT: s_setpc_b64 s[30:31]
246246
%select = select i1 %cond, i32 5, i32 ptrtoint (ptr addrspace(1) @gv to i32)
247247
%op = sdiv i32 1000000, %select

0 commit comments

Comments
 (0)