Skip to content

Commit ffbf52a

Browse files
committed
[ARM] Add NEON support for ISD::ABDS/ABDU nodes.
As noted on #94466, NEON has ABDS/ABDU instructions but only handles then via intrinsics, plus some VABDL custom patterns. I'm not clear how ARM handles intrinsic -> ISD mapping, so I've left all the intrinsics handling in place so far at the moment. Ideally all the VABD/VABA/VABDL/VABAL handling should be moved to using the abds/abdu nodes - but am I on the right track? Fixes #94466
1 parent 53061ee commit ffbf52a

File tree

2 files changed

+33
-5
lines changed

2 files changed

+33
-5
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,9 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
205205
setOperationAction(ISD::SDIVREM, VT, Expand);
206206
setOperationAction(ISD::UDIVREM, VT, Expand);
207207

208-
if (!VT.isFloatingPoint() &&
209-
VT != MVT::v2i64 && VT != MVT::v1i64)
210-
for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
208+
if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
209+
for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
210+
ISD::UMIN, ISD::UMAX})
211211
setOperationAction(Opcode, VT, Legal);
212212
if (!VT.isFloatingPoint())
213213
for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})

llvm/lib/Target/ARM/ARMInstrNEON.td

+30-2
Original file line numberDiff line numberDiff line change
@@ -5655,16 +5655,44 @@ def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
56555655
"vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
56565656
Requires<[HasNEON, HasFullFP16]>;
56575657

5658+
let Predicates = [HasNEON] in {
5659+
def : Pat<(v2i32 (abds (v2i32 DPR:$opA), (v2i32 DPR:$opB))),
5660+
(VABDsv2i32 DPR:$opA, DPR:$opB)>;
5661+
def : Pat<(v4i16 (abds (v4i16 DPR:$opA), (v4i16 DPR:$opB))),
5662+
(VABDsv4i16 DPR:$opA, DPR:$opB)>;
5663+
def : Pat<(v8i8 (abds (v8i8 DPR:$opA), (v8i8 DPR:$opB))),
5664+
(VABDsv8i8 DPR:$opA, DPR:$opB)>;
5665+
def : Pat<(v4i32 (abds (v4i32 QPR:$opA), (v4i32 QPR:$opB))),
5666+
(VABDsv4i32 QPR:$opA, QPR:$opB)>;
5667+
def : Pat<(v8i16 (abds (v8i16 QPR:$opA), (v8i16 QPR:$opB))),
5668+
(VABDsv8i16 QPR:$opA, QPR:$opB)>;
5669+
def : Pat<(v16i8 (abds (v16i8 QPR:$opA), (v16i8 QPR:$opB))),
5670+
(VABDsv16i8 QPR:$opA, QPR:$opB)>;
5671+
5672+
def : Pat<(v2i32 (abdu (v2i32 DPR:$opA), (v2i32 DPR:$opB))),
5673+
(VABDuv2i32 DPR:$opA, DPR:$opB)>;
5674+
def : Pat<(v4i16 (abdu (v4i16 DPR:$opA), (v4i16 DPR:$opB))),
5675+
(VABDuv4i16 DPR:$opA, DPR:$opB)>;
5676+
def : Pat<(v8i8 (abdu (v8i8 DPR:$opA), (v8i8 DPR:$opB))),
5677+
(VABDuv8i8 DPR:$opA, DPR:$opB)>;
5678+
def : Pat<(v4i32 (abdu (v4i32 QPR:$opA), (v4i32 QPR:$opB))),
5679+
(VABDuv4i32 QPR:$opA, QPR:$opB)>;
5680+
def : Pat<(v8i16 (abdu (v8i16 QPR:$opA), (v8i16 QPR:$opB))),
5681+
(VABDuv8i16 QPR:$opA, QPR:$opB)>;
5682+
def : Pat<(v16i8 (abdu (v16i8 QPR:$opA), (v16i8 QPR:$opB))),
5683+
(VABDuv16i8 QPR:$opA, QPR:$opB)>;
5684+
}
5685+
56585686
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
56595687
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
56605688
"vabdl", "s", int_arm_neon_vabds, zext, 1>;
56615689
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
56625690
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
56635691

56645692
let Predicates = [HasNEON] in {
5665-
def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
5693+
def : Pat<(v8i16 (zext (abdu (v8i8 DPR:$opA), (v8i8 DPR:$opB)))),
56665694
(VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5667-
def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
5695+
def : Pat<(v4i32 (zext (abdu (v4i16 DPR:$opA), (v4i16 DPR:$opB)))),
56685696
(VABDLuv4i32 DPR:$opA, DPR:$opB)>;
56695697
}
56705698

0 commit comments

Comments
 (0)