Skip to content

Commit aa158bf

Browse files
committed
[LV] Update tests to replace some code with loop varying instructions.
Update some tests with loop-invariant instructions, where hoisting them out of the loop changes the vectorization decision. This should preserve their original spirit when making further improvements.
1 parent e25eb14 commit aa158bf

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,17 +126,17 @@ exit:
126126
ret void
127127
}
128128

129-
define void @call_scalarized(ptr noalias %src, ptr noalias %dst, double %0) {
129+
define void @call_scalarized(ptr noalias %src, ptr noalias %dst) {
130130
; CHECK-LABEL: define void @call_scalarized(
131-
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], double [[TMP0:%.*]]) {
131+
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
132132
; CHECK-NEXT: [[ENTRY:.*]]:
133133
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
134134
; CHECK: [[LOOP_HEADER]]:
135135
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
136136
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
137137
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV_NEXT]]
138138
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP_SRC]], align 8
139-
; CHECK-NEXT: [[CMP295:%.*]] = fcmp ugt double [[TMP0]], 0.000000e+00
139+
; CHECK-NEXT: [[CMP295:%.*]] = fcmp une double [[L]], 4.000000e+00
140140
; CHECK-NEXT: [[CMP299:%.*]] = fcmp ugt double [[L]], 0.000000e+00
141141
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP295]], [[CMP299]]
142142
; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP_LATCH]], label %[[THEN:.*]]
@@ -159,7 +159,7 @@ loop.header:
159159
%iv.next = add i64 %iv, -1
160160
%gep.src = getelementptr double, ptr %src, i64 %iv.next
161161
%l = load double, ptr %gep.src, align 8
162-
%cmp295 = fcmp ugt double %0, 0.000000e+00
162+
%cmp295 = fcmp une double %l, 4.000000e+00
163163
%cmp299 = fcmp ugt double %l, 0.000000e+00
164164
%or.cond = or i1 %cmp295, %cmp299
165165
br i1 %or.cond, label %loop.latch, label %then

llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,18 @@
44
; CHECK-LABEL: vector.body:
55
; CHECK-NEXT: [[IDX:%.+]] = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
66
; CHECK-NEXT: [[IDX0:%.+]] = add i32 %index, 0
7-
; CHECK-NEXT: [[FSHL:%.+]] = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> <i16 15, i16 15, i16 15, i16 15>)
7+
; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i16, ptr %src, i32 [[IDX0]]
8+
; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i16, ptr [[GEP]], i32 0
9+
; CHECK-NEXT: [[WIDE_LOAD:%.+]] = load <4 x i16>, ptr [[GEP0]], align 2
10+
; CHECK-NEXT: [[FSHL:%.+]] = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD]], <4 x i16> <i16 15, i16 15, i16 15, i16 15>)
811
; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i16, ptr %dst, i32 [[IDX0]]
912
; CHECK-NEXT: [[GEP1:%.+]] = getelementptr inbounds i16, ptr [[GEP0]], i32 0
1013
; CHECK-NEXT: store <4 x i16> [[FSHL]], ptr [[GEP1]], align 2
1114
; CHECK-NEXT: [[IDX_NEXT:%.+]] = add nuw i32 [[IDX]], 4
1215
; CHECK-NEXT: [[EC:%.+]] = icmp eq i32 [[IDX_NEXT]], %n.vec
1316
; CHECK-NEXT: br i1 [[EC]], label %middle.block, label %vector.body
1417
;
15-
define void @test_fshl(i32 %width, ptr %dst) {
18+
define void @test_fshl(i32 %width, ptr %dst, ptr %src) {
1619
entry:
1720
br label %for.body9.us.us
1821

@@ -21,7 +24,9 @@ for.cond6.for.cond.cleanup8_crit_edge.us.us: ; preds = %for.body9.us.us
2124

2225
for.body9.us.us: ; preds = %for.body9.us.us, %entry
2326
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body9.us.us ]
24-
%conv4.i.us.us = tail call i16 @llvm.fshl.i16(i16 undef, i16 undef, i16 15)
27+
%gep = getelementptr inbounds i16, ptr %src, i32 %iv
28+
%l = load i16, ptr %gep
29+
%conv4.i.us.us = tail call i16 @llvm.fshl.i16(i16 %l, i16 %l, i16 15)
2530
%dst.gep = getelementptr inbounds i16, ptr %dst, i32 %iv
2631
store i16 %conv4.i.us.us, ptr %dst.gep
2732
%iv.next = add nuw i32 %iv, 1

0 commit comments

Comments
 (0)