Skip to content

Commit 5282202

Browse files
committed
[LSR] Add a test case mentioned in review
As mentioned in #74747, this case is triggering a particularly high cost trip count expansion.
1 parent c003d85 commit 5282202

File tree

1 file changed

+46
-0
lines changed

1 file changed

+46
-0
lines changed

llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,3 +593,49 @@ for.body: ; preds = %for.body, %entry
593593
for.end: ; preds = %for.body
594594
ret void
595595
}
596+
597+
define void @expensive_expand_unknown_tc2(ptr %a, i32 %offset, i32 %n, i32 %step) mustprogress {
598+
; CHECK-LABEL: @expensive_expand_unknown_tc2(
599+
; CHECK-NEXT: entry:
600+
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
601+
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
602+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[STEP:%.*]], i32 [[N:%.*]])
603+
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[SMAX]], [[STEP]]
604+
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 1)
605+
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
606+
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[STEP]], i32 1)
607+
; CHECK-NEXT: [[TMP2:%.*]] = udiv i32 [[TMP1]], [[UMAX]]
608+
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[UMIN]], [[TMP2]]
609+
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
610+
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
611+
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
612+
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP5]], [[TMP6]]
613+
; CHECK-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP7]], 84
614+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
615+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
616+
; CHECK: for.body:
617+
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
618+
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
619+
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
620+
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
621+
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
622+
; CHECK: for.end:
623+
; CHECK-NEXT: ret void
624+
;
625+
entry:
626+
%offset.nonzero = or i32 %offset, 1
627+
%uglygep = getelementptr i8, ptr %a, i64 84
628+
br label %for.body
629+
630+
for.body: ; preds = %for.body, %entry
631+
%lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ]
632+
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 0, %entry ]
633+
store i32 1, ptr %lsr.iv1, align 4
634+
%lsr.iv.next = add nsw i32 %lsr.iv, %step
635+
%uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 %offset.nonzero
636+
%exitcond.not = icmp sge i32 %lsr.iv.next, %n
637+
br i1 %exitcond.not, label %for.end, label %for.body
638+
639+
for.end: ; preds = %for.body
640+
ret void
641+
}

0 commit comments

Comments
 (0)