Skip to content

Commit 9632e15

Browse files
authored
Match fixed width ISD::AVGFLOORS + ISD::AVGCEILS patterns (#86222)
1 parent b3fe27f commit 9632e15

File tree

2 files changed

+51
-10
lines changed

2 files changed

+51
-10
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,20 +2529,28 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
25292529
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
25302530
}
25312531

2532-
// Attempt to form avgceilu(A, B) from (A | B) - ((A ^ B) >> 1)
2533-
static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
2532+
// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2533+
static SDValue combineFixedwidthToAVGCEIL(SDNode *N, SelectionDAG &DAG) {
25342534
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25352535
SDValue N0 = N->getOperand(0);
25362536
EVT VT = N0.getValueType();
25372537
SDLoc DL(N);
2538+
SDValue A, B;
2539+
25382540
if (TLI.isOperationLegal(ISD::AVGCEILU, VT)) {
2539-
SDValue A, B;
25402541
if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
25412542
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
25422543
m_SpecificInt(1))))) {
25432544
return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
25442545
}
25452546
}
2547+
if (TLI.isOperationLegal(ISD::AVGCEILS, VT)) {
2548+
if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
2549+
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
2550+
m_SpecificInt(1))))) {
2551+
return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2552+
}
2553+
}
25462554
return SDValue();
25472555
}
25482556

@@ -2837,20 +2845,29 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
28372845
return SDValue();
28382846
}
28392847

2840-
// Attempt to form avgflooru(A, B) from (A & B) + ((A ^ B) >> 1)
2841-
static SDValue combineFixedwidthToAVGFLOORU(SDNode *N, SelectionDAG &DAG) {
2848+
// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2849+
static SDValue combineFixedwidthToAVGFLOOR(SDNode *N, SelectionDAG &DAG) {
28422850
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28432851
SDValue N0 = N->getOperand(0);
28442852
EVT VT = N0.getValueType();
28452853
SDLoc DL(N);
2854+
SDValue A, B;
2855+
28462856
if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) {
2847-
SDValue A, B;
28482857
if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
28492858
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
28502859
m_SpecificInt(1))))) {
28512860
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
28522861
}
28532862
}
2863+
if (TLI.isOperationLegal(ISD::AVGFLOORS, VT)) {
2864+
if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
2865+
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
2866+
m_SpecificInt(1))))) {
2867+
return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2868+
}
2869+
}
2870+
28542871
return SDValue();
28552872
}
28562873

@@ -2869,8 +2886,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
28692886
if (SDValue V = foldAddSubOfSignBit(N, DAG))
28702887
return V;
28712888

2872-
// Try to match AVGFLOORU fixedwidth pattern
2873-
if (SDValue V = combineFixedwidthToAVGFLOORU(N, DAG))
2889+
// Try to match AVGFLOOR fixedwidth pattern
2890+
if (SDValue V = combineFixedwidthToAVGFLOOR(N, DAG))
28742891
return V;
28752892

28762893
// fold (a+b) -> (a|b) iff a and b share no bits.
@@ -3868,8 +3885,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
38683885
if (SDValue V = foldAddSubOfSignBit(N, DAG))
38693886
return V;
38703887

3871-
// Try to match AVGCEILU fixedwidth pattern
3872-
if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
3888+
// Try to match AVGCEIL fixedwidth pattern
3889+
if (SDValue V = combineFixedwidthToAVGCEIL(N, DAG))
38733890
return V;
38743891

38753892
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))

llvm/test/CodeGen/AArch64/hadd-combine.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,18 @@ define <8 x i16> @sub_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
341341
ret <8 x i16> %res
342342
}
343343

344+
define <8 x i16> @srhadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
345+
; CHECK-LABEL: srhadd_fixedwidth_v8i16:
346+
; CHECK: // %bb.0:
347+
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
348+
; CHECK-NEXT: ret
349+
%or = or <8 x i16> %a0, %a1
350+
%xor = xor <8 x i16> %a0, %a1
351+
%srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
352+
%res = sub <8 x i16> %or, %srl
353+
ret <8 x i16> %res
354+
}
355+
344356
define <8 x i16> @rhaddu_base(<8 x i16> %src1, <8 x i16> %src2) {
345357
; CHECK-LABEL: rhaddu_base:
346358
; CHECK: // %bb.0:
@@ -879,6 +891,18 @@ define <8 x i16> @uhadd_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
879891
ret <8 x i16> %res
880892
}
881893

894+
define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
895+
; CHECK-LABEL: shadd_fixedwidth_v8i16:
896+
; CHECK: // %bb.0:
897+
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
898+
; CHECK-NEXT: ret
899+
%and = and <8 x i16> %a0, %a1
900+
%xor = xor <8 x i16> %a0, %a1
901+
%srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
902+
%res = add <8 x i16> %and, %srl
903+
ret <8 x i16> %res
904+
}
905+
882906
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
883907
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
884908
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)

0 commit comments

Comments
 (0)