Skip to content

Commit 5fc619b

Browse files
committed
[DAG] Update ISD::AVG folds to use hasOperation to allow Custom matching prior to legalization
Fixes issue where AVX1 targets weren't matching 256-bit AVGCEILU cases.
1 parent 5d7e7ab commit 5fc619b

File tree

2 files changed

+57
-94
lines changed

2 files changed

+57
-94
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 27 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,9 @@ namespace {
609609
SDValue &CC, bool MatchStrict = false) const;
610610
bool isOneUseSetCC(SDValue N) const;
611611

612+
SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
613+
SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
614+
612615
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
613616
unsigned HiOp);
614617
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
@@ -2530,26 +2533,22 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
25302533
}
25312534

25322535
// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2533-
static SDValue combineFixedwidthToAVGCEIL(SDNode *N, SelectionDAG &DAG) {
2534-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2536+
SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
25352537
SDValue N0 = N->getOperand(0);
25362538
EVT VT = N0.getValueType();
2537-
SDLoc DL(N);
25382539
SDValue A, B;
25392540

2540-
if (TLI.isOperationLegal(ISD::AVGCEILU, VT)) {
2541-
if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
2542-
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
2543-
m_SpecificInt(1))))) {
2544-
return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2545-
}
2541+
if (hasOperation(ISD::AVGCEILU, VT) &&
2542+
sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
2543+
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
2544+
m_SpecificInt(1))))) {
2545+
return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
25462546
}
2547-
if (TLI.isOperationLegal(ISD::AVGCEILS, VT)) {
2548-
if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
2549-
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
2550-
m_SpecificInt(1))))) {
2551-
return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2552-
}
2547+
if (hasOperation(ISD::AVGCEILS, VT) &&
2548+
sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
2549+
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
2550+
m_SpecificInt(1))))) {
2551+
return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
25532552
}
25542553
return SDValue();
25552554
}
@@ -2846,26 +2845,22 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
28462845
}
28472846

28482847
// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2849-
static SDValue combineFixedwidthToAVGFLOOR(SDNode *N, SelectionDAG &DAG) {
2850-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2848+
SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
28512849
SDValue N0 = N->getOperand(0);
28522850
EVT VT = N0.getValueType();
2853-
SDLoc DL(N);
28542851
SDValue A, B;
28552852

2856-
if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) {
2857-
if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
2858-
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
2859-
m_SpecificInt(1))))) {
2860-
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2861-
}
2853+
if (hasOperation(ISD::AVGFLOORU, VT) &&
2854+
sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
2855+
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
2856+
m_SpecificInt(1))))) {
2857+
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
28622858
}
2863-
if (TLI.isOperationLegal(ISD::AVGFLOORS, VT)) {
2864-
if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
2865-
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
2866-
m_SpecificInt(1))))) {
2867-
return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2868-
}
2859+
if (hasOperation(ISD::AVGFLOORS, VT) &&
2860+
sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
2861+
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
2862+
m_SpecificInt(1))))) {
2863+
return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
28692864
}
28702865

28712866
return SDValue();
@@ -2887,7 +2882,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
28872882
return V;
28882883

28892884
// Try to match AVGFLOOR fixedwidth pattern
2890-
if (SDValue V = combineFixedwidthToAVGFLOOR(N, DAG))
2885+
if (SDValue V = foldAddToAvg(N, DL))
28912886
return V;
28922887

28932888
// fold (a+b) -> (a|b) iff a and b share no bits.
@@ -3886,7 +3881,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
38863881
return V;
38873882

38883883
// Try to match AVGCEIL fixedwidth pattern
3889-
if (SDValue V = combineFixedwidthToAVGCEIL(N, DAG))
3884+
if (SDValue V = foldSubToAvg(N, DL))
38903885
return V;
38913886

38923887
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))

llvm/test/CodeGen/X86/avgceilu.ll

Lines changed: 30 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -319,18 +319,11 @@ define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) {
319319
;
320320
; AVX1-LABEL: test_fixed_v32i8:
321321
; AVX1: # %bb.0:
322-
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm2
323-
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
324-
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
325-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
326-
; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
327-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
328-
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
329-
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
330-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
331-
; AVX1-NEXT: vpsubb %xmm0, %xmm3, %xmm0
332-
; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
333-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
322+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
323+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
324+
; AVX1-NEXT: vpavgb %xmm2, %xmm3, %xmm2
325+
; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0
326+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
334327
; AVX1-NEXT: retq
335328
;
336329
; AVX2-LABEL: test_fixed_v32i8:
@@ -392,15 +385,11 @@ define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) {
392385
;
393386
; AVX1-LABEL: test_fixed_v16i16:
394387
; AVX1: # %bb.0:
395-
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm2
396-
; AVX1-NEXT: vxorps %ymm0, %ymm1, %ymm0
397-
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
398-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
399-
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
400-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
401-
; AVX1-NEXT: vpsubw %xmm0, %xmm3, %xmm0
402-
; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
403-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
388+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
389+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
390+
; AVX1-NEXT: vpavgw %xmm2, %xmm3, %xmm2
391+
; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0
392+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
404393
; AVX1-NEXT: retq
405394
;
406395
; AVX2-LABEL: test_fixed_v16i16:
@@ -959,29 +948,16 @@ define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) {
959948
;
960949
; AVX1-LABEL: test_fixed_v64i8:
961950
; AVX1: # %bb.0:
962-
; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm4
963-
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm5
964-
; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0
965-
; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1
966-
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2
967-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
968-
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
969-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
970-
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
971-
; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
972-
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm6
973-
; AVX1-NEXT: vpand %xmm3, %xmm6, %xmm6
974-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
975-
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
976-
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
977-
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3
978-
; AVX1-NEXT: vpsubb %xmm0, %xmm3, %xmm0
979-
; AVX1-NEXT: vpsubb %xmm6, %xmm5, %xmm3
980-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
981-
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
982-
; AVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1
983-
; AVX1-NEXT: vpsubb %xmm2, %xmm4, %xmm2
984-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
951+
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
952+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
953+
; AVX1-NEXT: vpavgb %xmm4, %xmm5, %xmm4
954+
; AVX1-NEXT: vpavgb %xmm2, %xmm0, %xmm0
955+
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
956+
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
957+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
958+
; AVX1-NEXT: vpavgb %xmm2, %xmm4, %xmm2
959+
; AVX1-NEXT: vpavgb %xmm3, %xmm1, %xmm1
960+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
985961
; AVX1-NEXT: retq
986962
;
987963
; AVX2-LABEL: test_fixed_v64i8:
@@ -1054,24 +1030,16 @@ define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) {
10541030
;
10551031
; AVX1-LABEL: test_fixed_v32i16:
10561032
; AVX1: # %bb.0:
1057-
; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm4
1058-
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm5
1059-
; AVX1-NEXT: vxorps %ymm0, %ymm2, %ymm0
1060-
; AVX1-NEXT: vxorps %ymm1, %ymm3, %ymm1
1061-
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm2
1062-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1063-
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
1064-
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3
1065-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1066-
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
1067-
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6
1068-
; AVX1-NEXT: vpsubw %xmm0, %xmm6, %xmm0
1069-
; AVX1-NEXT: vpsubw %xmm3, %xmm5, %xmm3
1070-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1071-
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
1072-
; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
1073-
; AVX1-NEXT: vpsubw %xmm2, %xmm4, %xmm2
1074-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1033+
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
1034+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
1035+
; AVX1-NEXT: vpavgw %xmm4, %xmm5, %xmm4
1036+
; AVX1-NEXT: vpavgw %xmm2, %xmm0, %xmm0
1037+
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
1038+
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
1039+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1040+
; AVX1-NEXT: vpavgw %xmm2, %xmm4, %xmm2
1041+
; AVX1-NEXT: vpavgw %xmm3, %xmm1, %xmm1
1042+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
10751043
; AVX1-NEXT: retq
10761044
;
10771045
; AVX2-LABEL: test_fixed_v32i16:

0 commit comments

Comments
 (0)