1
+ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
1
2
// RUN: %clang_cc1 -fenable-matrix %s -emit-llvm -triple x86_64-unknown-linux -disable-llvm-passes -o - -std=c++11 | FileCheck %s
2
3
3
4
using i8x3 = _BitInt(8 ) __attribute__((ext_vector_type(3 )));
@@ -7,92 +8,104 @@ using i32x3x3 = _BitInt(32) __attribute__((matrix_type(3, 3)));
7
8
using i512x3 = _BitInt(512 ) __attribute__((ext_vector_type(3 )));
8
9
using i512x3x3 = _BitInt(512 ) __attribute__((matrix_type(3 , 3 )));
9
10
10
- // CHECK-LABEL: define dso_local i32 @_Z2v1Dv3_DB8_(i32 %a.coerce)
11
+ // CHECK-LABEL: define dso_local i32 @_Z2v1Dv3_DB8_(
12
+ // CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
13
+ // CHECK-NEXT: [[ENTRY:.*:]]
14
+ // CHECK-NEXT: [[RETVAL:%.*]] = alloca <3 x i8>, align 4
15
+ // CHECK-NEXT: [[A:%.*]] = alloca <3 x i8>, align 4
16
+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i8>, align 4
17
+ // CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
18
+ // CHECK-NEXT: [[LOADVEC4:%.*]] = load <4 x i8>, ptr [[A]], align 4
19
+ // CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i8> [[LOADVEC4]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
20
+ // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
21
+ // CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
22
+ // CHECK-NEXT: [[LOADVEC42:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
23
+ // CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i8> [[LOADVEC42]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
24
+ // CHECK-NEXT: [[LOADVEC44:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
25
+ // CHECK-NEXT: [[EXTRACTVEC5:%.*]] = shufflevector <4 x i8> [[LOADVEC44]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
26
+ // CHECK-NEXT: [[ADD:%.*]] = add <3 x i8> [[EXTRACTVEC3]], [[EXTRACTVEC5]]
27
+ // CHECK-NEXT: store <3 x i8> [[ADD]], ptr [[RETVAL]], align 4
28
+ // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4
29
+ // CHECK-NEXT: ret i32 [[TMP0]]
30
+ //
11
31
i8x3 v1 (i8x3 a) {
12
- // CHECK-NEXT: entry:
13
- // CHECK-NEXT: %retval = alloca <3 x i8>, align 4
14
- // CHECK-NEXT: %a = alloca <3 x i8>, align 4
15
- // CHECK-NEXT: %a.addr = alloca <3 x i8>, align 4
16
- // CHECK-NEXT: store i32 %a.coerce, ptr %a, align 4
17
- // CHECK-NEXT: %loadVec4 = load <4 x i8>, ptr %a, align 4
18
- // CHECK-NEXT: %a1 = shufflevector <4 x i8> %loadVec4, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
19
- // CHECK-NEXT: %extractVec = shufflevector <3 x i8> %a1, <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
20
- // CHECK-NEXT: store <4 x i8> %extractVec, ptr %a.addr, align 4
21
- // CHECK-NEXT: %loadVec42 = load <4 x i8>, ptr %a.addr, align 4
22
- // CHECK-NEXT: %extractVec3 = shufflevector <4 x i8> %loadVec42, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
23
- // CHECK-NEXT: %loadVec44 = load <4 x i8>, ptr %a.addr, align 4
24
- // CHECK-NEXT: %extractVec5 = shufflevector <4 x i8> %loadVec44, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
25
- // CHECK-NEXT: %add = add <3 x i8> %extractVec3, %extractVec5
26
- // CHECK-NEXT: store <3 x i8> %add, ptr %retval, align 4
27
- // CHECK-NEXT: %0 = load i32, ptr %retval, align 4
28
- // CHECK-NEXT: ret i32 %0
29
32
return a + a;
30
33
}
31
34
32
- // CHECK-LABEL: define dso_local noundef <3 x i32> @_Z2v2Dv3_DB32_(<3 x i32> noundef %a)
35
+ // CHECK-LABEL: define dso_local noundef <3 x i32> @_Z2v2Dv3_DB32_(
36
+ // CHECK-SAME: <3 x i32> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
37
+ // CHECK-NEXT: [[ENTRY:.*:]]
38
+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i32>, align 16
39
+ // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
40
+ // CHECK-NEXT: store <4 x i32> [[EXTRACTVEC]], ptr [[A_ADDR]], align 16
41
+ // CHECK-NEXT: [[LOADVEC4:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
42
+ // CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x i32> [[LOADVEC4]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
43
+ // CHECK-NEXT: [[LOADVEC42:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
44
+ // CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i32> [[LOADVEC42]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
45
+ // CHECK-NEXT: [[ADD:%.*]] = add <3 x i32> [[EXTRACTVEC1]], [[EXTRACTVEC3]]
46
+ // CHECK-NEXT: ret <3 x i32> [[ADD]]
47
+ //
33
48
i32x3 v2 (i32x3 a) {
34
- // CHECK-NEXT: entry:
35
- // CHECK-NEXT: %a.addr = alloca <3 x i32>, align 16
36
- // CHECK-NEXT: %extractVec = shufflevector <3 x i32> %a, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
37
- // CHECK-NEXT: store <4 x i32> %extractVec, ptr %a.addr, align 16
38
- // CHECK-NEXT: %loadVec4 = load <4 x i32>, ptr %a.addr, align 16
39
- // CHECK-NEXT: %extractVec1 = shufflevector <4 x i32> %loadVec4, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
40
- // CHECK-NEXT: %loadVec42 = load <4 x i32>, ptr %a.addr, align 16
41
- // CHECK-NEXT: %extractVec3 = shufflevector <4 x i32> %loadVec42, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
42
- // CHECK-NEXT: %add = add <3 x i32> %extractVec1, %extractVec3
43
- // CHECK-NEXT: ret <3 x i32> %add
44
49
return a + a;
45
50
}
46
51
47
- // CHECK-LABEL: define dso_local noundef <3 x i512> @_Z2v3Dv3_DB512_(ptr noundef byval(<3 x i512>) align 256 %0)
52
+ // CHECK-LABEL: define dso_local noundef <3 x i512> @_Z2v3Dv3_DB512_(
53
+ // CHECK-SAME: ptr noundef byval(<3 x i512>) align 256 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
54
+ // CHECK-NEXT: [[ENTRY:.*:]]
55
+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i512>, align 256
56
+ // CHECK-NEXT: [[LOADVEC4:%.*]] = load <4 x i512>, ptr [[TMP0]], align 256
57
+ // CHECK-NEXT: [[A:%.*]] = shufflevector <4 x i512> [[LOADVEC4]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
58
+ // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
59
+ // CHECK-NEXT: store <4 x i512> [[EXTRACTVEC]], ptr [[A_ADDR]], align 256
60
+ // CHECK-NEXT: [[LOADVEC41:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
61
+ // CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x i512> [[LOADVEC41]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
62
+ // CHECK-NEXT: [[LOADVEC43:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
63
+ // CHECK-NEXT: [[EXTRACTVEC4:%.*]] = shufflevector <4 x i512> [[LOADVEC43]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
64
+ // CHECK-NEXT: [[ADD:%.*]] = add <3 x i512> [[EXTRACTVEC2]], [[EXTRACTVEC4]]
65
+ // CHECK-NEXT: ret <3 x i512> [[ADD]]
66
+ //
48
67
i512x3 v3 (i512x3 a) {
49
- // CHECK-NEXT: entry:
50
- // CHECK-NEXT: %a.addr = alloca <3 x i512>, align 256
51
- // CHECK-NEXT: %loadVec4 = load <4 x i512>, ptr %0, align 256
52
- // CHECK-NEXT: %a = shufflevector <4 x i512> %loadVec4, <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
53
- // CHECK-NEXT: %extractVec = shufflevector <3 x i512> %a, <3 x i512> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
54
- // CHECK-NEXT: store <4 x i512> %extractVec, ptr %a.addr, align 256
55
- // CHECK-NEXT: %loadVec41 = load <4 x i512>, ptr %a.addr, align 256
56
- // CHECK-NEXT: %extractVec2 = shufflevector <4 x i512> %loadVec41, <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
57
- // CHECK-NEXT: %loadVec43 = load <4 x i512>, ptr %a.addr, align 256
58
- // CHECK-NEXT: %extractVec4 = shufflevector <4 x i512> %loadVec43, <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
59
- // CHECK-NEXT: %add = add <3 x i512> %extractVec2, %extractVec4
60
- // CHECK-NEXT: ret <3 x i512> %add
61
68
return a + a;
62
69
}
63
70
64
- // CHECK-LABEL: define dso_local noundef <9 x i8> @_Z2m1u11matrix_typeILm3ELm3EDB8_E(<9 x i8> noundef %a)
71
+ // CHECK-LABEL: define dso_local noundef <9 x i8> @_Z2m1u11matrix_typeILm3ELm3EDB8_E(
72
+ // CHECK-SAME: <9 x i8> noundef [[A:%.*]]) #[[ATTR3:[0-9]+]] {
73
+ // CHECK-NEXT: [[ENTRY:.*:]]
74
+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i8], align 1
75
+ // CHECK-NEXT: store <9 x i8> [[A]], ptr [[A_ADDR]], align 1
76
+ // CHECK-NEXT: [[TMP0:%.*]] = load <9 x i8>, ptr [[A_ADDR]], align 1
77
+ // CHECK-NEXT: [[TMP1:%.*]] = load <9 x i8>, ptr [[A_ADDR]], align 1
78
+ // CHECK-NEXT: [[TMP2:%.*]] = add <9 x i8> [[TMP0]], [[TMP1]]
79
+ // CHECK-NEXT: ret <9 x i8> [[TMP2]]
80
+ //
65
81
i8x3x3 m1 (i8x3x3 a) {
66
- // CHECK-NEXT: entry:
67
- // CHECK-NEXT: %a.addr = alloca [9 x i8], align 1
68
- // CHECK-NEXT: store <9 x i8> %a, ptr %a.addr, align 1
69
- // CHECK-NEXT: %0 = load <9 x i8>, ptr %a.addr, align 1
70
- // CHECK-NEXT: %1 = load <9 x i8>, ptr %a.addr, align 1
71
- // CHECK-NEXT: %2 = add <9 x i8> %0, %1
72
- // CHECK-NEXT: ret <9 x i8> %2
73
82
return a + a;
74
83
}
75
84
76
- // CHECK-LABEL: define dso_local noundef <9 x i32> @_Z2m2u11matrix_typeILm3ELm3EDB32_E(<9 x i32> noundef %a)
85
+ // CHECK-LABEL: define dso_local noundef <9 x i32> @_Z2m2u11matrix_typeILm3ELm3EDB32_E(
86
+ // CHECK-SAME: <9 x i32> noundef [[A:%.*]]) #[[ATTR4:[0-9]+]] {
87
+ // CHECK-NEXT: [[ENTRY:.*:]]
88
+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i32], align 4
89
+ // CHECK-NEXT: store <9 x i32> [[A]], ptr [[A_ADDR]], align 4
90
+ // CHECK-NEXT: [[TMP0:%.*]] = load <9 x i32>, ptr [[A_ADDR]], align 4
91
+ // CHECK-NEXT: [[TMP1:%.*]] = load <9 x i32>, ptr [[A_ADDR]], align 4
92
+ // CHECK-NEXT: [[TMP2:%.*]] = add <9 x i32> [[TMP0]], [[TMP1]]
93
+ // CHECK-NEXT: ret <9 x i32> [[TMP2]]
94
+ //
77
95
i32x3x3 m2 (i32x3x3 a) {
78
- // CHECK-NEXT: entry:
79
- // CHECK-NEXT: %a.addr = alloca [9 x i32], align 4
80
- // CHECK-NEXT: store <9 x i32> %a, ptr %a.addr, align 4
81
- // CHECK-NEXT: %0 = load <9 x i32>, ptr %a.addr, align 4
82
- // CHECK-NEXT: %1 = load <9 x i32>, ptr %a.addr, align 4
83
- // CHECK-NEXT: %2 = add <9 x i32> %0, %1
84
- // CHECK-NEXT: ret <9 x i32> %2
85
96
return a + a;
86
97
}
87
98
88
- // CHECK-LABEL: define dso_local noundef <9 x i512> @_Z2m3u11matrix_typeILm3ELm3EDB512_E(<9 x i512> noundef %a)
99
+ // CHECK-LABEL: define dso_local noundef <9 x i512> @_Z2m3u11matrix_typeILm3ELm3EDB512_E(
100
+ // CHECK-SAME: <9 x i512> noundef [[A:%.*]]) #[[ATTR5:[0-9]+]] {
101
+ // CHECK-NEXT: [[ENTRY:.*:]]
102
+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i512], align 8
103
+ // CHECK-NEXT: store <9 x i512> [[A]], ptr [[A_ADDR]], align 8
104
+ // CHECK-NEXT: [[TMP0:%.*]] = load <9 x i512>, ptr [[A_ADDR]], align 8
105
+ // CHECK-NEXT: [[TMP1:%.*]] = load <9 x i512>, ptr [[A_ADDR]], align 8
106
+ // CHECK-NEXT: [[TMP2:%.*]] = add <9 x i512> [[TMP0]], [[TMP1]]
107
+ // CHECK-NEXT: ret <9 x i512> [[TMP2]]
108
+ //
89
109
i512x3x3 m3 (i512x3x3 a) {
90
- // CHECK-NEXT: entry:
91
- // CHECK-NEXT: %a.addr = alloca [9 x i512], align 8
92
- // CHECK-NEXT: store <9 x i512> %a, ptr %a.addr, align 8
93
- // CHECK-NEXT: %0 = load <9 x i512>, ptr %a.addr, align 8
94
- // CHECK-NEXT: %1 = load <9 x i512>, ptr %a.addr, align 8
95
- // CHECK-NEXT: %2 = add <9 x i512> %0, %1
96
- // CHECK-NEXT: ret <9 x i512> %2
97
110
return a + a;
98
111
}
0 commit comments