1
- ; REQUIRES: asserts
2
- ; RUN: not --crash opt -mtriple=aarch64 -passes=load-store-vectorizer \
3
- ; RUN: -disable-output %s 2>&1 | FileCheck %s
1
+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2
+ ; RUN: opt -mtriple=aarch64 -passes=load-store-vectorizer -S %s | FileCheck %s
3
+
4
+ ; LSV was attempting to vectorize this earlier, but crashed while re-ordering
5
+ ; instructions due to the load-load cycle. Now, the candidate loads are no
6
+ ; longer considered safe for reordering.
4
7
5
8
define i32 @load_cycle (ptr %x ) {
6
- ; CHECK: Unexpected cycle while re-ordering instructions
9
+ ; CHECK-LABEL: define i32 @load_cycle(
10
+ ; CHECK-SAME: ptr [[X:%.*]]) {
11
+ ; CHECK-NEXT: [[ENTRY:.*:]]
12
+ ; CHECK-NEXT: [[GEP_X_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 0, i32 1
13
+ ; CHECK-NEXT: [[LOAD_X_1:%.*]] = load i32, ptr [[GEP_X_1]], align 4
14
+ ; CHECK-NEXT: [[REM:%.*]] = urem i32 [[LOAD_X_1]], 1
15
+ ; CHECK-NEXT: [[GEP_X_2:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[REM]], i32 0
16
+ ; CHECK-NEXT: [[LOAD_X_2:%.*]] = load i32, ptr [[GEP_X_2]], align 4
17
+ ; CHECK-NEXT: [[RET:%.*]] = add i32 [[LOAD_X_2]], [[LOAD_X_1]]
18
+ ; CHECK-NEXT: ret i32 [[RET]]
19
+ ;
7
20
entry:
8
21
%gep.x.1 = getelementptr inbounds [2 x i32 ], ptr %x , i32 0 , i32 1
9
22
%load.x.1 = load i32 , ptr %gep.x.1
@@ -13,3 +26,61 @@ entry:
13
26
%ret = add i32 %load.x.2 , %load.x.1
14
27
ret i32 %ret
15
28
}
29
+
30
+ define i32 @load_cycle2 (ptr %x , i32 %y ) {
31
+ ; CHECK-LABEL: define i32 @load_cycle2(
32
+ ; CHECK-SAME: ptr [[X:%.*]], i32 [[Y:%.*]]) {
33
+ ; CHECK-NEXT: [[ENTRY:.*:]]
34
+ ; CHECK-NEXT: [[GEP_X_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[Y]], i32 1
35
+ ; CHECK-NEXT: [[LOAD_X_1:%.*]] = load i32, ptr [[GEP_X_1]], align 4
36
+ ; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[LOAD_X_1]], 2
37
+ ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[Y]], [[MUL]]
38
+ ; CHECK-NEXT: [[SUB_1:%.*]] = sub i32 [[ADD]], [[LOAD_X_1]]
39
+ ; CHECK-NEXT: [[SUB_2:%.*]] = sub i32 [[SUB_1]], [[LOAD_X_1]]
40
+ ; CHECK-NEXT: [[GEP_X_2:%.*]] = getelementptr inbounds [2 x i32], ptr [[X]], i32 [[SUB_2]], i32 0
41
+ ; CHECK-NEXT: [[LOAD_X_2:%.*]] = load i32, ptr [[GEP_X_2]], align 4
42
+ ; CHECK-NEXT: [[RET:%.*]] = add i32 [[LOAD_X_2]], [[LOAD_X_1]]
43
+ ; CHECK-NEXT: ret i32 [[RET]]
44
+ ;
45
+ entry:
46
+ %gep.x.1 = getelementptr inbounds [2 x i32 ], ptr %x , i32 %y , i32 1
47
+ %load.x.1 = load i32 , ptr %gep.x.1
48
+ %mul = mul i32 %load.x.1 , 2
49
+ %add = add i32 %y , %mul
50
+ %sub.1 = sub i32 %add , %load.x.1
51
+ %sub.2 = sub i32 %sub.1 , %load.x.1
52
+ %gep.x.2 = getelementptr inbounds [2 x i32 ], ptr %x , i32 %sub.2 , i32 0
53
+ %load.x.2 = load i32 , ptr %gep.x.2
54
+ %ret = add i32 %load.x.2 , %load.x.1
55
+ ret i32 %ret
56
+ }
57
+
58
+ @global.1 = global i32 0
59
+ @global.2 = global [1 x [3 x i32 ]] zeroinitializer
60
+
61
+ define i16 @load_cycle3 () {
62
+ ; CHECK-LABEL: define i16 @load_cycle3() {
63
+ ; CHECK-NEXT: [[ENTRY:.*:]]
64
+ ; CHECK-NEXT: [[LOAD_1:%.*]] = load i32, ptr @global.1, align 4
65
+ ; CHECK-NEXT: [[UREM_1:%.*]] = urem i32 [[LOAD_1]], 1
66
+ ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 [[UREM_1]]
67
+ ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds [3 x i32], ptr [[GEP_1]], i32 0, i32 2
68
+ ; CHECK-NEXT: [[LOAD_2:%.*]] = load i32, ptr [[GEP_2]], align 4
69
+ ; CHECK-NEXT: [[UREM_2:%.*]] = urem i32 [[LOAD_2]], 1
70
+ ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds [1 x [3 x i32]], ptr @global.2, i32 0, i32 [[UREM_2]]
71
+ ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds [3 x i32], ptr [[GEP_3]], i32 0, i32 1
72
+ ; CHECK-NEXT: [[LOAD_3:%.*]] = load i32, ptr [[GEP_4]], align 4
73
+ ; CHECK-NEXT: ret i16 0
74
+ ;
75
+ entry:
76
+ %load.1 = load i32 , ptr @global.1
77
+ %urem.1 = urem i32 %load.1 , 1
78
+ %gep.1 = getelementptr inbounds [1 x [3 x i32 ]], ptr @global.2 , i32 0 , i32 %urem.1
79
+ %gep.2 = getelementptr inbounds [3 x i32 ], ptr %gep.1 , i32 0 , i32 2
80
+ %load.2 = load i32 , ptr %gep.2
81
+ %urem.2 = urem i32 %load.2 , 1
82
+ %gep.3 = getelementptr inbounds [1 x [3 x i32 ]], ptr @global.2 , i32 0 , i32 %urem.2
83
+ %gep.4 = getelementptr inbounds [3 x i32 ], ptr %gep.3 , i32 0 , i32 1
84
+ %load.3 = load i32 , ptr %gep.4
85
+ ret i16 0
86
+ }
0 commit comments