@@ -18,13 +18,15 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
18
18
%V128 = shufflevector <2 x double > %src128 , <2 x double > undef , <2 x i32 > <i32 1 , i32 0 >
19
19
20
20
; SSE: cost of 2 {{.*}} %V256 = shufflevector
21
- ; AVX: cost of 3 {{.*}} %V256 = shufflevector
22
- ; AVX512: cost of 3 {{.*}} %V256 = shufflevector
21
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
22
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
23
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
23
24
%V256 = shufflevector <4 x double > %src256 , <4 x double > undef , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
24
25
25
26
; SSE: cost of 4 {{.*}} %V512 = shufflevector
26
- ; AVX: cost of 6 {{.*}} %V512 = shufflevector
27
- ; AVX512: cost of 3 {{.*}} %V512 = shufflevector
27
+ ; AVX1: cost of 4 {{.*}} %V512 = shufflevector
28
+ ; AVX2: cost of 2 {{.*}} %V512 = shufflevector
29
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
28
30
%V512 = shufflevector <8 x double > %src512 , <8 x double > undef , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
29
31
30
32
ret void
@@ -38,13 +40,15 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
38
40
%V128 = shufflevector <2 x i64 > %src128 , <2 x i64 > undef , <2 x i32 > <i32 1 , i32 0 >
39
41
40
42
; SSE: cost of 2 {{.*}} %V256 = shufflevector
41
- ; AVX: cost of 3 {{.*}} %V256 = shufflevector
42
- ; AVX512: cost of 3 {{.*}} %V256 = shufflevector
43
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
44
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
45
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
43
46
%V256 = shufflevector <4 x i64 > %src256 , <4 x i64 > undef , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
44
47
45
48
; SSE: cost of 4 {{.*}} %V512 = shufflevector
46
- ; AVX: cost of 6 {{.*}} %V512 = shufflevector
47
- ; AVX512: cost of 3 {{.*}} %V512 = shufflevector
49
+ ; AVX1: cost of 4 {{.*}} %V512 = shufflevector
50
+ ; AVX2: cost of 2 {{.*}} %V512 = shufflevector
51
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
48
52
%V512 = shufflevector <8 x i64 > %src512 , <8 x i64 > undef , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
49
53
50
54
ret void
@@ -63,13 +67,15 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr
63
67
%V128 = shufflevector <4 x float > %src128 , <4 x float > undef , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
64
68
65
69
; SSE: cost of 2 {{.*}} %V256 = shufflevector
66
- ; AVX: cost of 3 {{.*}} %V256 = shufflevector
67
- ; AVX512: cost of 3 {{.*}} %V256 = shufflevector
70
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
71
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
72
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
68
73
%V256 = shufflevector <8 x float > %src256 , <8 x float > undef , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
69
74
70
75
; SSE: cost of 4 {{.*}} %V512 = shufflevector
71
- ; AVX: cost of 6 {{.*}} %V512 = shufflevector
72
- ; AVX512: cost of 3 {{.*}} %V512 = shufflevector
76
+ ; AVX1: cost of 4 {{.*}} %V512 = shufflevector
77
+ ; AVX2: cost of 2 {{.*}} %V512 = shufflevector
78
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
73
79
%V512 = shufflevector <16 x float > %src512 , <16 x float > undef , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
74
80
75
81
ret void
@@ -88,55 +94,73 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256,
88
94
%V128 = shufflevector <4 x i32 > %src128 , <4 x i32 > undef , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
89
95
90
96
; SSE: cost of 2 {{.*}} %V256 = shufflevector
91
- ; AVX: cost of 3 {{.*}} %V256 = shufflevector
92
- ; AVX512: cost of 3 {{.*}} %V256 = shufflevector
97
+ ; AVX1: cost of 2 {{.*}} %V256 = shufflevector
98
+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
99
+ ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
93
100
%V256 = shufflevector <8 x i32 > %src256 , <8 x i32 > undef , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
94
101
95
102
; SSE: cost of 4 {{.*}} %V512 = shufflevector
96
- ; AVX: cost of 6 {{.*}} %V512 = shufflevector
97
- ; AVX512: cost of 3 {{.*}} %V512 = shufflevector
103
+ ; AVX1: cost of 4 {{.*}} %V512 = shufflevector
104
+ ; AVX2: cost of 2 {{.*}} %V512 = shufflevector
105
+ ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
98
106
%V512 = shufflevector <16 x i32 > %src512 , <16 x i32 > undef , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
99
107
100
108
ret void
101
109
}
102
110
103
111
; CHECK-LABEL: 'test_vXi16'
104
112
define void @test_vXi16 (<8 x i16 > %src128 , <16 x i16 > %src256 , <32 x i16 > %src512 ) {
105
- ; SSE: cost of 1 {{.*}} %V128 = shufflevector
113
+ ; SSE2: cost of 3 {{.*}} %V128 = shufflevector
114
+ ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
115
+ ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
106
116
; AVX: cost of 1 {{.*}} %V128 = shufflevector
107
117
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
108
118
%V128 = shufflevector <8 x i16 > %src128 , <8 x i16 > undef , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
109
119
110
- ; SSE: cost of 2 {{.*}} %V256 = shufflevector
111
- ; AVX: cost of 3 {{.*}} %V256 = shufflevector
112
- ; AVX512: cost of 3 {{.*}} %V256 = shufflevector
120
+ ; SSE2: cost of 6 {{.*}} %V256 = shufflevector
121
+ ; SSSE3: cost of 2 {{.*}} %V256 = shufflevector
122
+ ; SSE42: cost of 2 {{.*}} %V256 = shufflevector
123
+ ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
124
+ ; AVX2: cost of 2 {{.*}} %V256 = shufflevector
125
+ ; AVX512: cost of 2 {{.*}} %V256 = shufflevector
113
126
%V256 = shufflevector <16 x i16 > %src256 , <16 x i16 > undef , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
114
127
115
- ; SSE: cost of 4 {{.*}} %V512 = shufflevector
116
- ; AVX: cost of 6 {{.*}} %V512 = shufflevector
117
- ; AVX512F: cost of 6 {{.*}} %V512 = shufflevector
118
- ; AVX512BW: cost of 3 {{.*}} %V512 = shufflevector
128
+ ; SSE2: cost of 12 {{.*}} %V512 = shufflevector
129
+ ; SSSE3: cost of 4 {{.*}} %V512 = shufflevector
130
+ ; SSE42: cost of 4 {{.*}} %V512 = shufflevector
131
+ ; AVX1: cost of 8 {{.*}} %V512 = shufflevector
132
+ ; AVX2: cost of 4 {{.*}} %V512 = shufflevector
133
+ ; AVX512F: cost of 4 {{.*}} %V512 = shufflevector
134
+ ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
119
135
%V512 = shufflevector <32 x i16 > %src512 , <32 x i16 > undef , <32 x i32 > <i32 31 , i32 30 , i32 29 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
120
136
121
137
ret void
122
138
}
123
139
124
140
; CHECK-LABEL: 'test_vXi8'
125
141
define void @test_vXi8 (<16 x i8 > %src128 , <32 x i8 > %src256 , <64 x i8 > %src512 ) {
126
- ; SSE: cost of 1 {{.*}} %V128 = shufflevector
142
+ ; SSE2: cost of 9 {{.*}} %V128 = shufflevector
143
+ ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
144
+ ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
127
145
; AVX: cost of 1 {{.*}} %V128 = shufflevector
128
146
; AVX512: cost of 1 {{.*}} %V128 = shufflevector
129
147
%V128 = shufflevector <16 x i8 > %src128 , <16 x i8 > undef , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
130
148
131
- ; SSE: cost of 2 {{.*}} %V256 = shufflevector
132
- ; AVX: cost of 3 {{.*}} %V256 = shufflevector
133
- ; AVX512: cost of 3 {{.*}} %V256 = shufflevector
149
+ ; SSE2: cost of 18 {{.*}} %V256 = shufflevector
150
+ ; SSSE3: cost of 2 {{.*}} %V256 = shufflevector
151
+ ; SSE42: cost of 2 {{.*}} %V256 = shufflevector
152
+ ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
153
+ ; AVX2: cost of 2 {{.*}} %V256 = shufflevector
154
+ ; AVX512: cost of 2 {{.*}} %V256 = shufflevector
134
155
%V256 = shufflevector <32 x i8 > %src256 , <32 x i8 > undef , <32 x i32 > <i32 31 , i32 30 , i32 29 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
135
156
136
- ; SSE: cost of 4 {{.*}} %V512 = shufflevector
137
- ; AVX: cost of 6 {{.*}} %V512 = shufflevector
138
- ; AVX512F: cost of 6 {{.*}} %V512 = shufflevector
139
- ; AVX512BW: cost of 3 {{.*}} %V512 = shufflevector
157
+ ; SSE2: cost of 36 {{.*}} %V512 = shufflevector
158
+ ; SSSE3: cost of 4 {{.*}} %V512 = shufflevector
159
+ ; SSE42: cost of 4 {{.*}} %V512 = shufflevector
160
+ ; AVX1: cost of 8 {{.*}} %V512 = shufflevector
161
+ ; AVX2: cost of 4 {{.*}} %V512 = shufflevector
162
+ ; AVX512F: cost of 4 {{.*}} %V512 = shufflevector
163
+ ; AVX512BW: cost of 6 {{.*}} %V512 = shufflevector
140
164
%V512 = shufflevector <64 x i8 > %src512 , <64 x i8 > undef , <64 x i32 > <i32 63 , i32 62 , i32 61 , i32 60 , i32 59 , i32 58 , i32 57 , i32 56 , i32 55 , i32 54 , i32 53 , i32 52 , i32 51 , i32 50 , i32 49 , i32 48 , i32 47 , i32 46 , i32 45 , i32 44 , i32 43 , i32 42 , i32 41 , i32 40 , i32 39 , i32 38 , i32 37 , i32 36 , i32 35 , i32 34 , i32 33 , i32 32 , i32 31 , i32 30 , i32 29 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
141
165
142
166
ret void
0 commit comments