Skip to content

Commit aaca8e2

Browse files
committed
[AArch64] Don't recreate nodes in tryCombineLongOpWithDup
If we don't find a node with either operand through isEssentiallyExtractHighSubvector, there is little point recreating the node with the same operands. Returning SDValue better communicates that no changes were made. This fixes llvm#63491 by not recreating uabd nodes with swapped operands. As noted in the ticket there are other fixes that might be useful to make too, but this should prevent the infinite combine.
1 parent 939c035 commit aaca8e2

File tree

2 files changed

+30
-3
lines changed

2 files changed

+30
-3
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18586,7 +18586,8 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
1858618586
LHS = tryExtendDUPToExtractHigh(LHS, DAG);
1858718587
if (!LHS.getNode())
1858818588
return SDValue();
18589-
}
18589+
} else
18590+
return SDValue();
1859018591

1859118592
if (IID == Intrinsic::not_intrinsic)
1859218593
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);

llvm/test/CodeGen/AArch64/abd-combine.ll

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ define <8 x i16> @abdu_i_const_bothhigh() {
260260
define <8 x i16> @abdu_i_const_onehigh() {
261261
; CHECK-LABEL: abdu_i_const_onehigh:
262262
; CHECK: // %bb.0:
263-
; CHECK-NEXT: mov w8, #32765
263+
; CHECK-NEXT: mov w8, #32765 // =0x7ffd
264264
; CHECK-NEXT: dup v0.8h, w8
265265
; CHECK-NEXT: ret
266266
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -480,7 +480,7 @@ define <8 x i16> @abds_i_const_bothhigh() {
480480
define <8 x i16> @abds_i_const_onehigh() {
481481
; CHECK-LABEL: abds_i_const_onehigh:
482482
; CHECK: // %bb.0:
483-
; CHECK-NEXT: mov w8, #32765
483+
; CHECK-NEXT: mov w8, #32765 // =0x7ffd
484484
; CHECK-NEXT: dup v0.8h, w8
485485
; CHECK-NEXT: ret
486486
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -527,7 +527,33 @@ define <8 x i16> @abds_i_reassoc(<8 x i16> %src1) {
527527
ret <8 x i16> %result
528528
}
529529

530+
define <1 x i64> @recursive() {
531+
; CHECK-LABEL: recursive:
532+
; CHECK: // %bb.0:
533+
; CHECK-NEXT: movi v0.8b, #1
534+
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
535+
; CHECK-NEXT: uabd v2.8b, v0.8b, v1.8b
536+
; CHECK-NEXT: uabdl v0.8h, v0.8b, v1.8b
537+
; CHECK-NEXT: dup v1.8b, v2.b[0]
538+
; CHECK-NEXT: saddlp v0.1d, v0.2s
539+
; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b
540+
; CHECK-NEXT: ret
541+
%1 = tail call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> zeroinitializer, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
542+
%2 = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
543+
%3 = zext <8 x i8> %2 to <8 x i16>
544+
%4 = bitcast <8 x i16> %3 to <4 x i32>
545+
%5 = shufflevector <4 x i32> %4, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
546+
%6 = shufflevector <8 x i8> %2, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
547+
%7 = bitcast <16 x i8> %6 to <2 x i64>
548+
%8 = shufflevector <2 x i64> %7, <2 x i64> zeroinitializer, <1 x i32> zeroinitializer
549+
%9 = tail call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %5)
550+
%10 = or <1 x i64> %8, %9
551+
ret <1 x i64> %10
552+
}
530553

554+
declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>)
555+
declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>)
556+
declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
531557
declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
532558
declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
533559
declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)

0 commit comments

Comments
 (0)