Skip to content

Commit b511537

Browse files
committed
[LSR] Convert some tests to opaque pointers (NFC)
1 parent 2c9aba9 commit b511537

File tree

8 files changed

+117
-131
lines changed

8 files changed

+117
-131
lines changed

llvm/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2-
; RUN: opt -opaque-pointers=0 -loop-reduce -S < %s | FileCheck %s
2+
; RUN: opt -loop-reduce -S < %s | FileCheck %s
33
;
44
; Test LSR's intelligence regarding phi reuse.
55
; Verify that scaled GEPs are not reused. rdar://5064068
@@ -10,34 +10,28 @@ target triple = "x86_64-apple-darwin"
1010
target datalayout = "n8:16:32:64"
1111

1212

13-
define float @test(float* nocapture %A, float* nocapture %B, i32 %N, i32 %IA, i32 %IB) nounwind uwtable readonly ssp {
13+
define float @test(ptr nocapture %A, ptr nocapture %B, i32 %N, i32 %IA, i32 %IB) nounwind uwtable readonly ssp {
1414
; CHECK-LABEL: define float @test
15-
; CHECK-SAME: (float* nocapture [[A:%.*]], float* nocapture [[B:%.*]], i32 [[N:%.*]], i32 [[IA:%.*]], i32 [[IB:%.*]]) #[[ATTR0:[0-9]+]] {
15+
; CHECK-SAME: (ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]], i32 [[N:%.*]], i32 [[IA:%.*]], i32 [[IB:%.*]]) #[[ATTR0:[0-9]+]] {
1616
; CHECK-NEXT: entry:
1717
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N]], 0
1818
; CHECK-NEXT: br i1 [[CMP1]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]]
1919
; CHECK: while.body.lr.ph:
2020
; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[IA]] to i64
2121
; CHECK-NEXT: [[IDX_EXT2:%.*]] = sext i32 [[IB]] to i64
22-
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[IDX_EXT]], 2
23-
; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[IDX_EXT2]], 2
2422
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
2523
; CHECK: while.body:
26-
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi float* [ [[TMP5:%.*]], [[WHILE_BODY]] ], [ [[B]], [[WHILE_BODY_LR_PH]] ]
27-
; CHECK-NEXT: [[LSR_IV:%.*]] = phi float* [ [[TMP4:%.*]], [[WHILE_BODY]] ], [ [[A]], [[WHILE_BODY_LR_PH]] ]
24+
; CHECK-NEXT: [[A_ADDR_05:%.*]] = phi ptr [ [[A]], [[WHILE_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[WHILE_BODY]] ]
25+
; CHECK-NEXT: [[B_ADDR_04:%.*]] = phi ptr [ [[B]], [[WHILE_BODY_LR_PH]] ], [ [[ADD_PTR3:%.*]], [[WHILE_BODY]] ]
2826
; CHECK-NEXT: [[N_ADDR_03:%.*]] = phi i32 [ [[N]], [[WHILE_BODY_LR_PH]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ]
2927
; CHECK-NEXT: [[SUM0_02:%.*]] = phi float [ 0.000000e+00, [[WHILE_BODY_LR_PH]] ], [ [[ADD:%.*]], [[WHILE_BODY]] ]
30-
; CHECK-NEXT: [[LSR_IV1:%.*]] = bitcast float* [[LSR_IV]] to i1*
31-
; CHECK-NEXT: [[LSR_IV23:%.*]] = bitcast float* [[LSR_IV2]] to i1*
32-
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[LSR_IV]], align 4
33-
; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[LSR_IV2]], align 4
34-
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP2]], [[TMP3]]
28+
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_05]], align 4
29+
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR_04]], align 4
30+
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP0]], [[TMP1]]
3531
; CHECK-NEXT: [[ADD]] = fadd float [[SUM0_02]], [[MUL]]
32+
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, ptr [[A_ADDR_05]], i64 [[IDX_EXT]]
33+
; CHECK-NEXT: [[ADD_PTR3]] = getelementptr inbounds float, ptr [[B_ADDR_04]], i64 [[IDX_EXT2]]
3634
; CHECK-NEXT: [[SUB]] = add nsw i32 [[N_ADDR_03]], -1
37-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i1, i1* [[LSR_IV1]], i64 [[TMP0]]
38-
; CHECK-NEXT: [[TMP4]] = bitcast i1* [[SCEVGEP]] to float*
39-
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i1, i1* [[LSR_IV23]], i64 [[TMP1]]
40-
; CHECK-NEXT: [[TMP5]] = bitcast i1* [[SCEVGEP4]] to float*
4135
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[SUB]], 0
4236
; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT:%.*]]
4337
; CHECK: while.end.loopexit:
@@ -56,16 +50,16 @@ while.body.lr.ph: ; preds = %entry
5650
br label %while.body
5751

5852
while.body: ; preds = %while.body.lr.ph, %while.body
59-
%A.addr.05 = phi float* [ %A, %while.body.lr.ph ], [ %add.ptr, %while.body ]
60-
%B.addr.04 = phi float* [ %B, %while.body.lr.ph ], [ %add.ptr3, %while.body ]
53+
%A.addr.05 = phi ptr [ %A, %while.body.lr.ph ], [ %add.ptr, %while.body ]
54+
%B.addr.04 = phi ptr [ %B, %while.body.lr.ph ], [ %add.ptr3, %while.body ]
6155
%N.addr.03 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
6256
%Sum0.02 = phi float [ 0.000000e+00, %while.body.lr.ph ], [ %add, %while.body ]
63-
%0 = load float, float* %A.addr.05, align 4
64-
%1 = load float, float* %B.addr.04, align 4
57+
%0 = load float, ptr %A.addr.05, align 4
58+
%1 = load float, ptr %B.addr.04, align 4
6559
%mul = fmul float %0, %1
6660
%add = fadd float %Sum0.02, %mul
67-
%add.ptr = getelementptr inbounds float, float* %A.addr.05, i64 %idx.ext
68-
%add.ptr3 = getelementptr inbounds float, float* %B.addr.04, i64 %idx.ext2
61+
%add.ptr = getelementptr inbounds float, ptr %A.addr.05, i64 %idx.ext
62+
%add.ptr3 = getelementptr inbounds float, ptr %B.addr.04, i64 %idx.ext2
6963
%sub = add nsw i32 %N.addr.03, -1
7064
%cmp = icmp sgt i32 %sub, 0
7165
br i1 %cmp, label %while.body, label %while.end

llvm/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2-
; RUN: opt -opaque-pointers=0 -loop-reduce -S < %s | FileCheck %s
2+
; RUN: opt -loop-reduce -S < %s | FileCheck %s
33
;
44
; PR11571: handle a postinc user outside of for.body7 that requires
55
; recursive expansion of a quadratic recurrence within for.body7. LSR
@@ -16,23 +16,19 @@ define void @vb() nounwind {
1616
; CHECK-NEXT: for.cond.preheader:
1717
; CHECK-NEXT: br label [[FOR_BODY7:%.*]]
1818
; CHECK: for.body7:
19-
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi [121 x i32]* [ [[TMP0:%.*]], [[FOR_BODY7]] ], [ bitcast (i32* getelementptr inbounds ([121 x i32], [121 x i32]* @b, i32 0, i32 1) to [121 x i32]*), [[FOR_COND_PREHEADER:%.*]] ]
19+
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[FOR_BODY7]] ], [ getelementptr inbounds ([121 x i32], ptr @b, i32 0, i32 1), [[FOR_COND_PREHEADER:%.*]] ]
2020
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY7]] ], [ 8, [[FOR_COND_PREHEADER]] ]
2121
; CHECK-NEXT: [[INDVARS_IV77:%.*]] = phi i32 [ [[INDVARS_IV_NEXT78:%.*]], [[FOR_BODY7]] ], [ 1, [[FOR_COND_PREHEADER]] ]
22-
; CHECK-NEXT: [[LSR_IV12:%.*]] = bitcast [121 x i32]* [[LSR_IV1]] to i1*
2322
; CHECK-NEXT: [[INDVARS_IV_NEXT78]] = add i32 [[INDVARS_IV77]], 1
2423
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i32 [[LSR_IV]], 4
25-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i1, i1* [[LSR_IV12]], i32 [[LSR_IV]]
26-
; CHECK-NEXT: [[TMP0]] = bitcast i1* [[SCEVGEP]] to [121 x i32]*
24+
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[LSR_IV]]
2725
; CHECK-NEXT: br i1 true, label [[FOR_BODY43_PREHEADER:%.*]], label [[FOR_BODY7]]
2826
; CHECK: for.body43.preheader:
2927
; CHECK-NEXT: br label [[FOR_BODY43:%.*]]
3028
; CHECK: for.body43:
31-
; CHECK-NEXT: [[LSR_IV3:%.*]] = phi [121 x i32]* [ [[LSR_IV1]], [[FOR_BODY43_PREHEADER]] ], [ [[TMP1:%.*]], [[FOR_BODY43]] ]
32-
; CHECK-NEXT: [[LSR_IV35:%.*]] = bitcast [121 x i32]* [[LSR_IV3]] to i32*
33-
; CHECK-NEXT: [[T2:%.*]] = load i32, i32* [[LSR_IV35]], align 4
34-
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr [121 x i32], [121 x i32]* [[LSR_IV3]], i32 0, i32 1
35-
; CHECK-NEXT: [[TMP1]] = bitcast i32* [[SCEVGEP4]] to [121 x i32]*
29+
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[LSR_IV1]], [[FOR_BODY43_PREHEADER]] ], [ [[SCEVGEP3:%.*]], [[FOR_BODY43]] ]
30+
; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[LSR_IV2]], align 4
31+
; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i32 4
3632
; CHECK-NEXT: br label [[FOR_BODY43]]
3733
;
3834
for.cond.preheader:
@@ -48,8 +44,8 @@ for.body7:
4844
for.body43:
4945
%bf.459 = phi i32 [ %inc44, %for.body43 ], [ %t1, %for.body7 ]
5046
%inc44 = add nsw i32 %bf.459, 1
51-
%arrayidx45 = getelementptr inbounds [121 x i32], [121 x i32]* @b, i32 0, i32 %bf.459
52-
%t2 = load i32, i32* %arrayidx45, align 4
47+
%arrayidx45 = getelementptr inbounds [121 x i32], ptr @b, i32 0, i32 %bf.459
48+
%t2 = load i32, ptr %arrayidx45, align 4
5349
br label %for.body43
5450
}
5551

llvm/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: opt -opaque-pointers=0 < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=INSN
3-
; RUN: opt -opaque-pointers=0 < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=REGS
4-
; RUN: llc -opaque-pointers=0 < %s -O2 -mtriple=x86_64-unknown-unknown -lsr-insns-cost | FileCheck %s
2+
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=INSN
3+
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=REGS
4+
; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -lsr-insns-cost | FileCheck %s
55

66
; OPT test checks that LSR optimize compare for static counter to compare with 0.
77

@@ -19,30 +19,24 @@
1919

2020
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2121

22-
define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q) {
22+
define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture %q) {
2323
; INSN-LABEL: @foo(
2424
; INSN-NEXT: entry:
25-
; INSN-NEXT: [[Q1:%.*]] = bitcast i32* [[Q:%.*]] to i8*
26-
; INSN-NEXT: [[Y3:%.*]] = bitcast i32* [[Y:%.*]] to i8*
27-
; INSN-NEXT: [[X7:%.*]] = bitcast i32* [[X:%.*]] to i8*
2825
; INSN-NEXT: br label [[FOR_BODY:%.*]]
2926
; INSN: for.cond.cleanup:
3027
; INSN-NEXT: ret void
3128
; INSN: for.body:
3229
; INSN-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ -4096, [[ENTRY:%.*]] ]
33-
; INSN-NEXT: [[UGLYGEP8:%.*]] = getelementptr i8, i8* [[X7]], i64 [[LSR_IV]]
34-
; INSN-NEXT: [[UGLYGEP89:%.*]] = bitcast i8* [[UGLYGEP8]] to i32*
35-
; INSN-NEXT: [[SCEVGEP10:%.*]] = getelementptr i32, i32* [[UGLYGEP89]], i64 1024
36-
; INSN-NEXT: [[TMP:%.*]] = load i32, i32* [[SCEVGEP10]], align 4
37-
; INSN-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[Y3]], i64 [[LSR_IV]]
38-
; INSN-NEXT: [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to i32*
39-
; INSN-NEXT: [[SCEVGEP6:%.*]] = getelementptr i32, i32* [[UGLYGEP45]], i64 1024
40-
; INSN-NEXT: [[TMP1:%.*]] = load i32, i32* [[SCEVGEP6]], align 4
30+
; INSN-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[LSR_IV]]
31+
; INSN-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 4096
32+
; INSN-NEXT: [[TMP:%.*]] = load i32, ptr [[SCEVGEP5]], align 4
33+
; INSN-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[LSR_IV]]
34+
; INSN-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4096
35+
; INSN-NEXT: [[TMP1:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
4136
; INSN-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP]]
42-
; INSN-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Q1]], i64 [[LSR_IV]]
43-
; INSN-NEXT: [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i32*
44-
; INSN-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[UGLYGEP2]], i64 1024
45-
; INSN-NEXT: store i32 [[ADD]], i32* [[SCEVGEP]], align 4
37+
; INSN-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[LSR_IV]]
38+
; INSN-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 4096
39+
; INSN-NEXT: store i32 [[ADD]], ptr [[SCEVGEP1]], align 4
4640
; INSN-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4
4741
; INSN-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
4842
; INSN-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -54,13 +48,16 @@ define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* no
5448
; REGS-NEXT: ret void
5549
; REGS: for.body:
5650
; REGS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
57-
; REGS-NEXT: [[SCEVGEP2:%.*]] = getelementptr i32, i32* [[X:%.*]], i64 [[INDVARS_IV]]
58-
; REGS-NEXT: [[TMP:%.*]] = load i32, i32* [[SCEVGEP2]], align 4
59-
; REGS-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[Y:%.*]], i64 [[INDVARS_IV]]
60-
; REGS-NEXT: [[TMP1:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
51+
; REGS-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 2
52+
; REGS-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[TMP0]]
53+
; REGS-NEXT: [[TMP:%.*]] = load i32, ptr [[SCEVGEP2]], align 4
54+
; REGS-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 2
55+
; REGS-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[Y:%.*]], i64 [[TMP1]]
56+
; REGS-NEXT: [[TMP1:%.*]] = load i32, ptr [[SCEVGEP1]], align 4
6157
; REGS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP]]
62-
; REGS-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDVARS_IV]]
63-
; REGS-NEXT: store i32 [[ADD]], i32* [[SCEVGEP]], align 4
58+
; REGS-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 2
59+
; REGS-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[TMP2]]
60+
; REGS-NEXT: store i32 [[ADD]], ptr [[SCEVGEP]], align 4
6461
; REGS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
6562
; REGS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
6663
; REGS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -86,13 +83,13 @@ for.cond.cleanup: ; preds = %for.body
8683

8784
for.body: ; preds = %for.body, %entry
8885
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
89-
%arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
90-
%tmp = load i32, i32* %arrayidx, align 4
91-
%arrayidx2 = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
92-
%tmp1 = load i32, i32* %arrayidx2, align 4
86+
%arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvars.iv
87+
%tmp = load i32, ptr %arrayidx, align 4
88+
%arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv
89+
%tmp1 = load i32, ptr %arrayidx2, align 4
9390
%add = add nsw i32 %tmp1, %tmp
94-
%arrayidx4 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
95-
store i32 %add, i32* %arrayidx4, align 4
91+
%arrayidx4 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv
92+
store i32 %add, ptr %arrayidx4, align 4
9693
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
9794
%exitcond = icmp eq i64 %indvars.iv.next, 1024
9895
br i1 %exitcond, label %for.cond.cleanup, label %for.body

llvm/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
1-
; RUN: opt -opaque-pointers=0 < %s -loop-reduce -mtriple=x86_64-- -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
2-
; RUN: opt -opaque-pointers=0 < %s -loop-reduce -mtriple=x86_64-- -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
3-
; RUN: llc -opaque-pointers=0 < %s -O2 -mtriple=x86_64-- -lsr-insns-cost -asm-verbose=0 | FileCheck %s
1+
; RUN: opt < %s -loop-reduce -mtriple=x86_64-- -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
2+
; RUN: opt < %s -loop-reduce -mtriple=x86_64-- -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
3+
; RUN: llc < %s -O2 -mtriple=x86_64-- -lsr-insns-cost -asm-verbose=0 | FileCheck %s
44

55
; OPT checks that LSR prefers less instructions to less registers.
66
; For x86 LSR should prefer complicated address to new lsr induction
77
; variables.
88

99
; BOTH: for.body:
10-
; INSN: getelementptr i32, i32* %x, i64 %indvars.iv
11-
; INSN: getelementptr i32, i32* %y, i64 %indvars.iv
12-
; INSN: getelementptr i32, i32* %q, i64 %indvars.iv
10+
; INSN: [[OFFSET1:%.+]] = shl nuw nsw i64 %indvars.iv, 2
11+
; INSN: getelementptr i8, ptr %x, i64 [[OFFSET1]]
12+
; INSN: [[OFFSET2:%.+]] = shl nuw nsw i64 %indvars.iv, 2
13+
; INSN: getelementptr i8, ptr %y, i64 [[OFFSET2]]
14+
; INSN: [[OFFSET3:%.+]] = shl nuw nsw i64 %indvars.iv, 2
15+
; INSN: getelementptr i8, ptr %q, i64 [[OFFSET3]]
1316
; REGS: %lsr.iv4 = phi
1417
; REGS: %lsr.iv2 = phi
1518
; REGS: %lsr.iv1 = phi
16-
; REGS: getelementptr i32, i32* %lsr.iv1, i64 1
17-
; REGS: getelementptr i32, i32* %lsr.iv2, i64 1
18-
; REGS: getelementptr i32, i32* %lsr.iv4, i64 1
19+
; REGS: getelementptr i8, ptr %lsr.iv1, i64 4
20+
; REGS: getelementptr i8, ptr %lsr.iv2, i64 4
21+
; REGS: getelementptr i8, ptr %lsr.iv4, i64 4
1922

2023
; LLC checks that LSR prefers less instructions to less registers.
2124
; LSR should prefer complicated address to additonal add instructions.
@@ -28,7 +31,7 @@
2831
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2932

3033
; Function Attrs: norecurse nounwind uwtable
31-
define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q, i32 %n) {
34+
define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture %q, i32 %n) {
3235
entry:
3336
%cmp10 = icmp sgt i32 %n, 0
3437
br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
@@ -45,13 +48,13 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo
4548

4649
for.body: ; preds = %for.body, %for.body.preheader
4750
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
48-
%arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
49-
%tmp = load i32, i32* %arrayidx, align 4
50-
%arrayidx2 = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
51-
%tmp1 = load i32, i32* %arrayidx2, align 4
51+
%arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvars.iv
52+
%tmp = load i32, ptr %arrayidx, align 4
53+
%arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv
54+
%tmp1 = load i32, ptr %arrayidx2, align 4
5255
%add = add nsw i32 %tmp1, %tmp
53-
%arrayidx4 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
54-
store i32 %add, i32* %arrayidx4, align 4
56+
%arrayidx4 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv
57+
store i32 %add, ptr %arrayidx4, align 4
5558
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
5659
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
5760
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body

0 commit comments

Comments
 (0)