Skip to content

Commit 3fe8fd7

Browse files
committed
[AArch64] Fix st2 check for nearby store with debug info.
It needs to be skipping over debug instructions, whilst not counting them in the MaxLookupDist.
1 parent 52315f9 commit 3fe8fd7

File tree

2 files changed

+103
-17
lines changed

2 files changed

+103
-17
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15295,7 +15295,11 @@ bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) {
1529515295
const Value *PtrA1 =
1529615296
Ptr->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1529715297

15298-
while (++It != End && !It->isDebugOrPseudoInst() && MaxLookupDist-- > 0) {
15298+
while (++It != End) {
15299+
if (It->isDebugOrPseudoInst())
15300+
continue;
15301+
if (MaxLookupDist-- == 0)
15302+
break;
1529915303
if (const auto *SI = dyn_cast<StoreInst>(&*It)) {
1530015304
const Value *PtrB1 =
1530115305
SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(

llvm/test/CodeGen/AArch64/vldn_shuffle.ll

Lines changed: 98 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
define void @vld2(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 %numSamples) {
55
; CHECK-LABEL: vld2:
6-
; CHECK: // %bb.0: // %entry
6+
; CHECK: .Lfunc_begin0:
7+
; CHECK-NEXT: .cfi_startproc
8+
; CHECK-NEXT: // %bb.0: // %entry
79
; CHECK-NEXT: mov x8, xzr
810
; CHECK-NEXT: .LBB0_1: // %vector.body
911
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
@@ -41,7 +43,9 @@ while.end: ; preds = %vector.body
4143

4244
define void @vld3(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 %numSamples) {
4345
; CHECK-LABEL: vld3:
44-
; CHECK: // %bb.0: // %entry
46+
; CHECK: .Lfunc_begin1:
47+
; CHECK-NEXT: .cfi_startproc
48+
; CHECK-NEXT: // %bb.0: // %entry
4549
; CHECK-NEXT: mov x8, xzr
4650
; CHECK-NEXT: .LBB1_1: // %vector.body
4751
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
@@ -83,7 +87,9 @@ while.end: ; preds = %vector.body
8387

8488
define void @vld4(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 %numSamples) {
8589
; CHECK-LABEL: vld4:
86-
; CHECK: // %bb.0: // %entry
90+
; CHECK: .Lfunc_begin2:
91+
; CHECK-NEXT: .cfi_startproc
92+
; CHECK-NEXT: // %bb.0: // %entry
8793
; CHECK-NEXT: mov x8, xzr
8894
; CHECK-NEXT: .LBB2_1: // %vector.body
8995
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
@@ -131,7 +137,9 @@ while.end: ; preds = %vector.body
131137

132138
define void @twosrc(ptr nocapture readonly %pSrc, ptr nocapture readonly %pSrc2, ptr noalias nocapture %pDst, i32 %numSamples) {
133139
; CHECK-LABEL: twosrc:
134-
; CHECK: // %bb.0: // %entry
140+
; CHECK: .Lfunc_begin3:
141+
; CHECK-NEXT: .cfi_startproc
142+
; CHECK-NEXT: // %bb.0: // %entry
135143
; CHECK-NEXT: mov x8, xzr
136144
; CHECK-NEXT: .LBB3_1: // %vector.body
137145
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
@@ -175,7 +183,9 @@ while.end: ; preds = %vector.body
175183

176184
define void @vld2_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 %numSamples) {
177185
; CHECK-LABEL: vld2_multiuse:
178-
; CHECK: // %bb.0: // %entry
186+
; CHECK: .Lfunc_begin4:
187+
; CHECK-NEXT: .cfi_startproc
188+
; CHECK-NEXT: // %bb.0: // %entry
179189
; CHECK-NEXT: mov x8, xzr
180190
; CHECK-NEXT: .LBB4_1: // %vector.body
181191
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
@@ -212,7 +222,9 @@ while.end: ; preds = %vector.body
212222

213223
define void @vld3_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 %numSamples) {
214224
; CHECK-LABEL: vld3_multiuse:
215-
; CHECK: // %bb.0: // %entry
225+
; CHECK: .Lfunc_begin5:
226+
; CHECK-NEXT: .cfi_startproc
227+
; CHECK-NEXT: // %bb.0: // %entry
216228
; CHECK-NEXT: mov x8, xzr
217229
; CHECK-NEXT: .LBB5_1: // %vector.body
218230
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
@@ -252,7 +264,9 @@ while.end: ; preds = %vector.body
252264

253265
define void @vld4_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 %numSamples) {
254266
; CHECK-LABEL: vld4_multiuse:
255-
; CHECK: // %bb.0: // %entry
267+
; CHECK: .Lfunc_begin6:
268+
; CHECK-NEXT: .cfi_startproc
269+
; CHECK-NEXT: // %bb.0: // %entry
256270
; CHECK-NEXT: mov x8, xzr
257271
; CHECK-NEXT: .LBB6_1: // %vector.body
258272
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
@@ -299,7 +313,9 @@ while.end: ; preds = %vector.body
299313
; as a single store. This avoids the vld2 for data that is already shuffled.
300314
define void @transpose_s16_8x8_simpler(ptr nocapture noundef %a) {
301315
; CHECK-LABEL: transpose_s16_8x8_simpler:
302-
; CHECK: // %bb.0: // %entry
316+
; CHECK: .Lfunc_begin7:
317+
; CHECK-NEXT: .cfi_startproc
318+
; CHECK-NEXT: // %bb.0: // %entry
303319
; CHECK-NEXT: ldp q0, q1, [x0]
304320
; CHECK-NEXT: ldp q2, q3, [x0, #64]
305321
; CHECK-NEXT: ldp q4, q5, [x0, #32]
@@ -350,7 +366,9 @@ entry:
350366
; Same as above with some different shuffles
351367
define void @transpose_s16_8x8_simpler2(ptr nocapture noundef %a) {
352368
; CHECK-LABEL: transpose_s16_8x8_simpler2:
353-
; CHECK: // %bb.0: // %entry
369+
; CHECK: .Lfunc_begin8:
370+
; CHECK-NEXT: .cfi_startproc
371+
; CHECK-NEXT: // %bb.0: // %entry
354372
; CHECK-NEXT: ldp q0, q2, [x0]
355373
; CHECK-NEXT: ldp q3, q4, [x0, #64]
356374
; CHECK-NEXT: ldp q5, q6, [x0, #32]
@@ -401,7 +419,9 @@ entry:
401419

402420
define void @transpose_s16_8x8(ptr nocapture noundef %0, ptr nocapture noundef %1, ptr nocapture noundef %2, ptr nocapture noundef %3, ptr nocapture noundef %4, ptr nocapture noundef %5, ptr nocapture noundef %6, ptr nocapture noundef %7) {
403421
; CHECK-LABEL: transpose_s16_8x8:
404-
; CHECK: // %bb.0:
422+
; CHECK: .Lfunc_begin9:
423+
; CHECK-NEXT: .cfi_startproc
424+
; CHECK-NEXT: // %bb.0:
405425
; CHECK-NEXT: ldr q0, [x0]
406426
; CHECK-NEXT: ldr q1, [x1]
407427
; CHECK-NEXT: ldr q3, [x4]
@@ -492,7 +512,9 @@ define void @transpose_s16_8x8(ptr nocapture noundef %0, ptr nocapture noundef %
492512

493513
define void @transpose_s16_8x8_(ptr nocapture noundef %0) {
494514
; CHECK-LABEL: transpose_s16_8x8_:
495-
; CHECK: // %bb.0:
515+
; CHECK: .Lfunc_begin10:
516+
; CHECK-NEXT: .cfi_startproc
517+
; CHECK-NEXT: // %bb.0:
496518
; CHECK-NEXT: ldp q0, q1, [x0]
497519
; CHECK-NEXT: ldp q2, q3, [x0, #32]
498520
; CHECK-NEXT: ldp q4, q5, [x0, #64]
@@ -586,7 +608,9 @@ define void @transpose_s16_8x8_(ptr nocapture noundef %0) {
586608

587609
define void @store_factor2(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1) {
588610
; CHECK-LABEL: store_factor2:
589-
; CHECK: // %bb.0:
611+
; CHECK: .Lfunc_begin11:
612+
; CHECK-NEXT: .cfi_startproc
613+
; CHECK-NEXT: // %bb.0:
590614
; CHECK-NEXT: trn1 v2.4s, v0.4s, v1.4s
591615
; CHECK-NEXT: trn1 v3.4s, v1.4s, v0.4s
592616
; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0]
@@ -600,7 +624,9 @@ define void @store_factor2(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1) {
600624

601625
define void @store_factor2_high(ptr %ptr, ptr %ptr2, <4 x i32> %a0, <4 x i32> %a1) {
602626
; CHECK-LABEL: store_factor2_high:
603-
; CHECK: // %bb.0:
627+
; CHECK: .Lfunc_begin12:
628+
; CHECK-NEXT: .cfi_startproc
629+
; CHECK-NEXT: // %bb.0:
604630
; CHECK-NEXT: trn1 v2.4s, v0.4s, v1.4s
605631
; CHECK-NEXT: trn1 v0.4s, v1.4s, v0.4s
606632
; CHECK-NEXT: zip1 v1.4s, v2.4s, v0.4s
@@ -620,7 +646,9 @@ define void @store_factor2_high(ptr %ptr, ptr %ptr2, <4 x i32> %a0, <4 x i32> %a
620646

621647
define void @store_factor2_high2(ptr %ptr, ptr %ptr2, <4 x i32> %a0, <4 x i32> %a1) {
622648
; CHECK-LABEL: store_factor2_high2:
623-
; CHECK: // %bb.0:
649+
; CHECK: .Lfunc_begin13:
650+
; CHECK-NEXT: .cfi_startproc
651+
; CHECK-NEXT: // %bb.0:
624652
; CHECK-NEXT: zip1 v2.4s, v0.4s, v1.4s
625653
; CHECK-NEXT: zip2 v0.4s, v0.4s, v1.4s
626654
; CHECK-NEXT: trn1 v2.4s, v2.4s, v1.4s
@@ -636,7 +664,9 @@ define void @store_factor2_high2(ptr %ptr, ptr %ptr2, <4 x i32> %a0, <4 x i32> %
636664

637665
define void @store_factor3(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
638666
; CHECK-LABEL: store_factor3:
639-
; CHECK: // %bb.0:
667+
; CHECK: .Lfunc_begin14:
668+
; CHECK-NEXT: .cfi_startproc
669+
; CHECK-NEXT: // %bb.0:
640670
; CHECK-NEXT: ext v3.16b, v0.16b, v1.16b, #12
641671
; CHECK-NEXT: ext v6.16b, v1.16b, v2.16b, #12
642672
; CHECK-NEXT: zip2 v3.4s, v0.4s, v3.4s
@@ -660,7 +690,9 @@ define void @store_factor3(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2
660690

661691
define void @store_factor4(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
662692
; CHECK-LABEL: store_factor4:
663-
; CHECK: // %bb.0:
693+
; CHECK: .Lfunc_begin15:
694+
; CHECK-NEXT: .cfi_startproc
695+
; CHECK-NEXT: // %bb.0:
664696
; CHECK-NEXT: trn1 v4.4s, v0.4s, v1.4s
665697
; CHECK-NEXT: trn1 v5.4s, v1.4s, v2.4s
666698
; CHECK-NEXT: trn1 v6.4s, v2.4s, v3.4s
@@ -677,3 +709,53 @@ define void @store_factor4(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2
677709
store <16 x i32> %interleaved.vec, ptr %ptr, align 4
678710
ret void
679711
}
712+
713+
define void @debuginfo(ptr nocapture noundef writeonly %buf, <8 x i16> noundef %a) {
714+
; CHECK-LABEL: debuginfo:
715+
; CHECK: .Lfunc_begin16:
716+
; CHECK-NEXT: .cfi_startproc
717+
; CHECK-NEXT: // %bb.0: // %entry
718+
; CHECK-NEXT: movi v1.2d, #0000000000000000
719+
; CHECK-NEXT: zip1 v2.8h, v0.8h, v1.8h
720+
; CHECK-NEXT: zip2 v0.8h, v0.8h, v1.8h
721+
; CHECK-NEXT: stp q2, q0, [x0]
722+
; CHECK-NEXT: ret
723+
entry:
724+
%vzip.i = shufflevector <8 x i16> %a, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 poison, i16 poison, i16 poison, i16 poison>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
725+
%vzip1.i = shufflevector <8 x i16> %a, <8 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0>, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
726+
store <8 x i16> %vzip.i, ptr %buf, align 4
727+
call void @llvm.dbg.value(metadata <8 x i16> %vzip1.i, metadata !21, metadata !DIExpression()), !dbg !23
728+
%add.ptr = getelementptr inbounds i32, ptr %buf, i64 4
729+
store <8 x i16> %vzip1.i, ptr %add.ptr, align 4
730+
ret void
731+
}
732+
733+
declare void @llvm.dbg.value(metadata, metadata, metadata)
734+
735+
!llvm.dbg.cu = !{!0}
736+
!llvm.module.flags = !{!6, !7, !8, !9, !10, !11}
737+
738+
!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: None)
739+
!1 = !DIFile(filename: "a64.c", directory: "", checksumkind: CSK_MD5, checksum: "a1a236fb20d703d1ea5963e75545b91a")
740+
!2 = !{!15}
741+
!3 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
742+
!4 = !{!5}
743+
!5 = !DISubrange(count: 8)
744+
!6 = !{i32 7, !"Dwarf Version", i32 5}
745+
!7 = !{i32 2, !"Debug Info Version", i32 3}
746+
!8 = !{i32 1, !"wchar_size", i32 4}
747+
!9 = !{i32 7, !"uwtable", i32 2}
748+
!10 = !{i32 7, !"frame-pointer", i32 1}
749+
!11 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
750+
!12 = !DISubroutineType(types: !13)
751+
!13 = !{null, !14, !15}
752+
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !3, size: 64)
753+
!15 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !16)
754+
!16 = !DIDerivedType(tag: DW_TAG_typedef, name: "int16x8_t", file: !1, line: 57, baseType: !17)
755+
!17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 128, flags: DIFlagVector, elements: !4)
756+
!18 = !DIBasicType(name: "short", size: 16, encoding: DW_ATE_signed)
757+
!19 = distinct !DISubprogram(name: "store_s16q_to_tran_low_", scope: !1, file: !1, line: 13, type: !12, scopeLine: 13, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20)
758+
!20 = !{!21}
759+
!21 = !DILocalVariable(name: "__s1", scope: !22, file: !1, line: 16, type: !16)
760+
!22 = distinct !DILexicalBlock(scope: !19, file: !1, line: 16, column: 3)
761+
!23 = !DILocation(line: 0, scope: !22)

0 commit comments

Comments
 (0)