Skip to content

Commit 8b41551

Browse files
committed
[AArch64] Add a slp vectorization test for extract and shuffle costs. NFC
1 parent 0e3829e commit 8b41551

File tree

1 file changed

+50
-0
lines changed

1 file changed

+50
-0
lines changed

llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,56 @@ while.end71: ; preds = %while.body38, %whil
635635
ret void
636636
}
637637

638+
; FIXME: This should not be vectorizing (further) with expensive shuffles.
639+
; The old cost of the or+extract should be 2*1 (or) + 4*2 (extract). The new
640+
; cost should be 1*1 (or) + 2*2 (extract) + at least 4 (shuffles).
641+
define i1 @tryMapToRange(ptr %values, ptr %result, <2 x i64> %hi, <2 x i64> %lo) {
642+
; CHECK-LABEL: @tryMapToRange(
643+
; CHECK-NEXT: [[L:%.*]] = load <2 x i64>, ptr [[VALUES:%.*]], align 8
644+
; CHECK-NEXT: [[C1:%.*]] = icmp sgt <2 x i64> [[L]], [[HI:%.*]]
645+
; CHECK-NEXT: [[S1:%.*]] = sext <2 x i1> [[C1]] to <2 x i64>
646+
; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x i64> [[S1]] to <16 x i8>
647+
; CHECK-NEXT: [[A1:%.*]] = and <16 x i8> [[BC1]], <i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
648+
; CHECK-NEXT: [[C2:%.*]] = icmp slt <2 x i64> [[L]], [[LO:%.*]]
649+
; CHECK-NEXT: [[S2:%.*]] = sext <2 x i1> [[C2]] to <2 x i64>
650+
; CHECK-NEXT: [[BC2:%.*]] = bitcast <2 x i64> [[S2]] to <16 x i8>
651+
; CHECK-NEXT: [[A2:%.*]] = and <16 x i8> [[BC2]], <i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
652+
; CHECK-NEXT: [[REASS_SUB:%.*]] = sub <2 x i64> [[L]], [[LO]]
653+
; CHECK-NEXT: [[ADD_I_I_I_I_I_I:%.*]] = add <2 x i64> [[REASS_SUB]], splat (i64 1)
654+
; CHECK-NEXT: store <2 x i64> [[ADD_I_I_I_I_I_I]], ptr [[RESULT:%.*]], align 8
655+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A1]], <16 x i8> [[A2]], <2 x i32> <i32 8, i32 24>
656+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[A1]], <16 x i8> [[A2]], <2 x i32> <i32 0, i32 16>
657+
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i8> [[TMP1]], [[TMP2]]
658+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP3]], i32 0
659+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1
660+
; CHECK-NEXT: [[O3:%.*]] = or i8 [[TMP4]], [[TMP5]]
661+
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[O3]], 0
662+
; CHECK-NEXT: ret i1 [[C]]
663+
;
664+
%l = load <2 x i64>, ptr %values, align 8
665+
%c1 = icmp sgt <2 x i64> %l, %hi
666+
%s1 = sext <2 x i1> %c1 to <2 x i64>
667+
%bc1 = bitcast <2 x i64> %s1 to <16 x i8>
668+
%a1 = and <16 x i8> %bc1, <i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
669+
%e1 = extractelement <16 x i8> %a1, i64 0
670+
%e2 = extractelement <16 x i8> %a1, i64 8
671+
%c2 = icmp slt <2 x i64> %l, %lo
672+
%s2 = sext <2 x i1> %c2 to <2 x i64>
673+
%bc2 = bitcast <2 x i64> %s2 to <16 x i8>
674+
%a2 = and <16 x i8> %bc2, <i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
675+
%e3 = extractelement <16 x i8> %a2, i64 0
676+
%e4 = extractelement <16 x i8> %a2, i64 8
677+
%reass.sub = sub <2 x i64> %l, %lo
678+
%add.i.i.i.i.i.i = add <2 x i64> %reass.sub, splat (i64 1)
679+
store <2 x i64> %add.i.i.i.i.i.i, ptr %result, align 8
680+
%o1 = or i8 %e2, %e1
681+
%o2 = or i8 %e4, %e3
682+
%o3 = or i8 %o1, %o2
683+
%c = icmp eq i8 %o3, 0
684+
ret i1 %c
685+
}
686+
687+
638688
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #1
639689
declare <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8>) #2
640690
declare <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16>) #2

0 commit comments

Comments
 (0)