Skip to content

Commit 2f9a635

Browse files
committed
[ARM] Add NEON support for ISD::ABDS/ABDU nodes.
As noted on #94466, NEON has ABDS/ABDU instructions but only handles then via intrinsics, plus some VABDL custom patterns. Fixes #94466
1 parent a53ed21 commit 2f9a635

File tree

3 files changed

+27
-43
lines changed

3 files changed

+27
-43
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

+12-17
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,9 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
205205
setOperationAction(ISD::SDIVREM, VT, Expand);
206206
setOperationAction(ISD::UDIVREM, VT, Expand);
207207

208-
if (!VT.isFloatingPoint() &&
209-
VT != MVT::v2i64 && VT != MVT::v1i64)
210-
for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
208+
if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
209+
for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
210+
ISD::UMIN, ISD::UMAX})
211211
setOperationAction(Opcode, VT, Legal);
212212
if (!VT.isFloatingPoint())
213213
for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
@@ -4174,7 +4174,15 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
41744174
}
41754175
case Intrinsic::arm_neon_vabs:
41764176
return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4177-
Op.getOperand(1));
4177+
Op.getOperand(1));
4178+
case Intrinsic::arm_neon_vabds:
4179+
if (Op.getValueType().isInteger())
4180+
return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
4181+
Op.getOperand(1), Op.getOperand(2));
4182+
return SDValue();
4183+
case Intrinsic::arm_neon_vabdu:
4184+
return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
4185+
Op.getOperand(1), Op.getOperand(2));
41784186
case Intrinsic::arm_neon_vmulls:
41794187
case Intrinsic::arm_neon_vmullu: {
41804188
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
@@ -13496,18 +13504,6 @@ static SDValue PerformVSetCCToVCTPCombine(SDNode *N,
1349613504
DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
1349713505
}
1349813506

13499-
static SDValue PerformABSCombine(SDNode *N,
13500-
TargetLowering::DAGCombinerInfo &DCI,
13501-
const ARMSubtarget *Subtarget) {
13502-
SelectionDAG &DAG = DCI.DAG;
13503-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13504-
13505-
if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
13506-
return SDValue();
13507-
13508-
return TLI.expandABS(N, DAG);
13509-
}
13510-
1351113507
/// PerformADDECombine - Target-specific dag combine transform from
1351213508
/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
1351313509
/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
@@ -18871,7 +18867,6 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
1887118867
case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
1887218868
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
1887318869
case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18874-
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
1887518870
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
1887618871
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
1887718872
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);

llvm/lib/Target/ARM/ARMInstrNEON.td

+11-25
Original file line numberDiff line numberDiff line change
@@ -5640,10 +5640,10 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
56405640
// VABD : Vector Absolute Difference
56415641
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
56425642
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5643-
"vabd", "s", int_arm_neon_vabds, 1>;
5643+
"vabd", "s", abds, 1>;
56445644
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
56455645
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5646-
"vabd", "u", int_arm_neon_vabdu, 1>;
5646+
"vabd", "u", abdu, 1>;
56475647
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
56485648
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
56495649
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
@@ -5657,44 +5657,30 @@ def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
56575657

56585658
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
56595659
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5660-
"vabdl", "s", int_arm_neon_vabds, zext, 1>;
5660+
"vabdl", "s", abds, zext, 1>;
56615661
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5662-
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
5662+
"vabdl", "u", abdu, zext, 1>;
56635663

56645664
let Predicates = [HasNEON] in {
5665-
def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
5665+
def : Pat<(v8i16 (zext (abdu (v8i8 DPR:$opA), (v8i8 DPR:$opB)))),
56665666
(VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5667-
def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
5667+
def : Pat<(v4i32 (zext (abdu (v4i16 DPR:$opA), (v4i16 DPR:$opB)))),
56685668
(VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5669-
}
5670-
5671-
// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
5672-
// shift/xor pattern for ABS.
5673-
5674-
def abd_shr :
5675-
PatFrag<(ops node:$in1, node:$in2, node:$shift),
5676-
(ARMvshrsImm (sub (zext node:$in1),
5677-
(zext node:$in2)), (i32 $shift))>;
5678-
5679-
let Predicates = [HasNEON] in {
5680-
def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)),
5681-
(v2i64 (add (sub (zext (v2i32 DPR:$opA)),
5682-
(zext (v2i32 DPR:$opB))),
5683-
(abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
5669+
def : Pat<(v2i64 (zext (abdu (v2i32 DPR:$opA), (v2i32 DPR:$opB)))),
56845670
(VABDLuv2i64 DPR:$opA, DPR:$opB)>;
56855671
}
56865672

56875673
// VABA : Vector Absolute Difference and Accumulate
56885674
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5689-
"vaba", "s", int_arm_neon_vabds, add>;
5675+
"vaba", "s", abds, add>;
56905676
defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5691-
"vaba", "u", int_arm_neon_vabdu, add>;
5677+
"vaba", "u", abdu, add>;
56925678

56935679
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
56945680
defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5695-
"vabal", "s", int_arm_neon_vabds, zext, add>;
5681+
"vabal", "s", abds, zext, add>;
56965682
defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5697-
"vabal", "u", int_arm_neon_vabdu, zext, add>;
5683+
"vabal", "u", abdu, zext, add>;
56985684

56995685
// Vector Maximum and Minimum.
57005686

llvm/test/CodeGen/ARM/neon_vabs.ll

+4-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,10 @@ define <2 x i64> @test13(<2 x i32> %a, <2 x i32> %b) nounwind {
184184
; CHECK: @ %bb.0:
185185
; CHECK-NEXT: vmov d16, r2, r3
186186
; CHECK-NEXT: vmov d17, r0, r1
187-
; CHECK-NEXT: vabdl.u32 q8, d17, d16
187+
; CHECK-NEXT: vsubl.u32 q8, d17, d16
188+
; CHECK-NEXT: vshr.s64 q9, q8, #63
189+
; CHECK-NEXT: vsra.s64 q8, q8, #63
190+
; CHECK-NEXT: veor q8, q9, q8
188191
; CHECK-NEXT: vmov r0, r1, d16
189192
; CHECK-NEXT: vmov r2, r3, d17
190193
; CHECK-NEXT: mov pc, lr

0 commit comments

Comments
 (0)