llvm · RKSimon · Mar 24, 2024 · Mar 21, 2024 · Mar 22, 2024 · Mar 22, 2024
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2529,20 +2529,28 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
 }
 
-// Attempt to form avgceilu(A, B) from (A | B) - ((A ^ B) >> 1)
-static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
+// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
+static SDValue combineFixedwidthToAVGCEIL(SDNode *N, SelectionDAG &DAG) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue N0 = N->getOperand(0);
   EVT VT = N0.getValueType();
   SDLoc DL(N);
+  SDValue A, B;
+
   if (TLI.isOperationLegal(ISD::AVGCEILU, VT)) {
-    SDValue A, B;
     if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
                           m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
                                 m_SpecificInt(1))))) {
       return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
     }
   }
+  if (TLI.isOperationLegal(ISD::AVGCEILS, VT)) {
+    if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
+                          m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
+                                m_SpecificInt(1))))) {
+      return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
+    }
+  }
   return SDValue();
 }
 
@@ -2837,20 +2845,29 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
   return SDValue();
 }
 
-// Attempt to form avgflooru(A, B) from (A & B) + ((A ^ B) >> 1)
-static SDValue combineFixedwidthToAVGFLOORU(SDNode *N, SelectionDAG &DAG) {
+// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
+static SDValue combineFixedwidthToAVGFLOOR(SDNode *N, SelectionDAG &DAG) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue N0 = N->getOperand(0);
   EVT VT = N0.getValueType();
   SDLoc DL(N);
+  SDValue A, B;
+
   if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) {
-    SDValue A, B;
     if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
                           m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
                                 m_SpecificInt(1))))) {
       return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
     }
   }
+  if (TLI.isOperationLegal(ISD::AVGFLOORS, VT)) {
+    if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
+                          m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
+                                m_SpecificInt(1))))) {
+      return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
+    }
+  }
+
   return SDValue();
 }
 
@@ -2869,8 +2886,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (SDValue V = foldAddSubOfSignBit(N, DAG))
     return V;
 
-  // Try to match AVGFLOORU fixedwidth pattern
-  if (SDValue V = combineFixedwidthToAVGFLOORU(N, DAG))
+  // Try to match AVGFLOOR fixedwidth pattern
+  if (SDValue V = combineFixedwidthToAVGFLOOR(N, DAG))
     return V;
 
   // fold (a+b) -> (a|b) iff a and b share no bits.
@@ -3863,8 +3880,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubOfSignBit(N, DAG))
     return V;
 
-  // Try to match AVGCEILU fixedwidth pattern
-  if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
+  // Try to match AVGCEIL fixedwidth pattern
+  if (SDValue V = combineFixedwidthToAVGCEIL(N, DAG))
     return V;
 
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))

diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -341,6 +341,18 @@ define <8 x i16> @sub_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1)  {
   ret <8 x i16> %res
 }
 
+define <8 x i16> @srhadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1)  {
+; CHECK-LABEL: srhadd_fixedwidth_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %or = or <8 x i16> %a0, %a1
+  %xor = xor <8 x i16> %a0, %a1
+  %srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %res = sub <8 x i16> %or, %srl
+  ret <8 x i16> %res
+}
+
 define <8 x i16> @rhaddu_base(<8 x i16> %src1, <8 x i16> %src2) {
 ; CHECK-LABEL: rhaddu_base:
 ; CHECK:       // %bb.0:
@@ -879,6 +891,18 @@ define <8 x i16> @uhadd_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1)  {
   ret <8 x i16> %res
 }
 
+define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1)  {
+; CHECK-LABEL: shadd_fixedwidth_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %and = and <8 x i16> %a0, %a1
+  %xor = xor <8 x i16> %a0, %a1
+  %srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %res = add <8 x i16> %and, %srl
+  ret <8 x i16> %res
+}
+
 declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
 declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
 declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)