@@ -25,9 +25,9 @@ define <4 x float> @shuf_fdiv_v4f32_yy(<4 x float> %x, <4 x float> %y, <4 x floa
25
25
define <4 x i32 > @shuf_add_v4i32_xx (<4 x i32 > %x , <4 x i32 > %y , <4 x i32 > %z ) {
26
26
; CHECK-LABEL: define <4 x i32> @shuf_add_v4i32_xx(
27
27
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
28
- ; CHECK-NEXT: [[TMP1 :%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 0>
29
- ; CHECK-NEXT: [[R1 :%.*]] = shufflevector <4 x i32> [[Y ]], <4 x i32> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
30
- ; CHECK-NEXT: [[R2:%.*]] = add <4 x i32> [[TMP1 ]], [[R1]]
28
+ ; CHECK-NEXT: [[B0 :%.*]] = add <4 x i32> [[X]], [[Y]]
29
+ ; CHECK-NEXT: [[B1 :%.*]] = add <4 x i32> [[X ]], [[Z]]
30
+ ; CHECK-NEXT: [[R2:%.*]] = shufflevector <4 x i32> [[B0 ]], <4 x i32> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
31
31
; CHECK-NEXT: ret <4 x i32> [[R2]]
32
32
;
33
33
%b0 = add <4 x i32 > %x , %y
@@ -36,15 +36,22 @@ define <4 x i32> @shuf_add_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
36
36
ret <4 x i32 > %r
37
37
}
38
38
39
- ; For commutative instructions, common operand may be swapped.
39
+ ; For commutative instructions, common operand may be swapped (SSE - expensive fmul vs AVX - cheap fmul)
40
40
41
41
define <4 x float > @shuf_fmul_v4f32_xx_swap (<4 x float > %x , <4 x float > %y , <4 x float > %z ) {
42
- ; CHECK-LABEL: define <4 x float> @shuf_fmul_v4f32_xx_swap(
43
- ; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
44
- ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
45
- ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3>
46
- ; CHECK-NEXT: [[R:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
47
- ; CHECK-NEXT: ret <4 x float> [[R]]
42
+ ; SSE-LABEL: define <4 x float> @shuf_fmul_v4f32_xx_swap(
43
+ ; SSE-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
44
+ ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
45
+ ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3>
46
+ ; SSE-NEXT: [[R:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
47
+ ; SSE-NEXT: ret <4 x float> [[R]]
48
+ ;
49
+ ; AVX-LABEL: define <4 x float> @shuf_fmul_v4f32_xx_swap(
50
+ ; AVX-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
51
+ ; AVX-NEXT: [[B0:%.*]] = fmul <4 x float> [[X]], [[Y]]
52
+ ; AVX-NEXT: [[B1:%.*]] = fmul <4 x float> [[Z]], [[X]]
53
+ ; AVX-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
54
+ ; AVX-NEXT: ret <4 x float> [[R]]
48
55
;
49
56
%b0 = fmul <4 x float > %x , %y
50
57
%b1 = fmul <4 x float > %z , %x
@@ -57,9 +64,9 @@ define <4 x float> @shuf_fmul_v4f32_xx_swap(<4 x float> %x, <4 x float> %y, <4 x
57
64
define <2 x i64 > @shuf_and_v2i64_yy_swap (<2 x i64 > %x , <2 x i64 > %y , <2 x i64 > %z ) {
58
65
; CHECK-LABEL: define <2 x i64> @shuf_and_v2i64_yy_swap(
59
66
; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
60
- ; CHECK-NEXT: [[TMP1 :%.*]] = shufflevector <2 x i64> [[Y ]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
61
- ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <2 x i64> [[X ]], <2 x i64> [[Z]], <2 x i32> <i32 3, i32 0>
62
- ; CHECK-NEXT: [[R:%.*]] = and <2 x i64> [[TMP1 ]], [[TMP2]]
67
+ ; CHECK-NEXT: [[B0 :%.*]] = and <2 x i64> [[X ]], [[Y]]
68
+ ; CHECK-NEXT: [[B1 :%.*]] = and <2 x i64> [[Y ]], [[Z]]
69
+ ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[B0 ]], <2 x i64> [[B1]], <2 x i32> <i32 3, i32 0>
63
70
; CHECK-NEXT: ret <2 x i64> [[R]]
64
71
;
65
72
%b0 = and <2 x i64 > %x , %y
@@ -84,15 +91,22 @@ define <4 x i32> @shuf_shl_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
84
91
ret <4 x i32 > %r
85
92
}
86
93
87
- ; negative test - common operand, but not commutable
94
+ ; common operand, but not commutable (SSE - expensive vector shift vs AVX2 - cheap vector shift)
88
95
89
96
define <4 x i32 > @shuf_shl_v4i32_xx_swap (<4 x i32 > %x , <4 x i32 > %y , <4 x i32 > %z ) {
90
- ; CHECK-LABEL: define <4 x i32> @shuf_shl_v4i32_xx_swap(
91
- ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
92
- ; CHECK-NEXT: [[B0:%.*]] = shl <4 x i32> [[X]], [[Y]]
93
- ; CHECK-NEXT: [[B1:%.*]] = shl <4 x i32> [[Z]], [[X]]
94
- ; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
95
- ; CHECK-NEXT: ret <4 x i32> [[R1]]
97
+ ; SSE-LABEL: define <4 x i32> @shuf_shl_v4i32_xx_swap(
98
+ ; SSE-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
99
+ ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
100
+ ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
101
+ ; SSE-NEXT: [[R:%.*]] = shl <4 x i32> [[TMP1]], [[TMP2]]
102
+ ; SSE-NEXT: ret <4 x i32> [[R]]
103
+ ;
104
+ ; AVX-LABEL: define <4 x i32> @shuf_shl_v4i32_xx_swap(
105
+ ; AVX-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
106
+ ; AVX-NEXT: [[B0:%.*]] = shl <4 x i32> [[X]], [[Y]]
107
+ ; AVX-NEXT: [[B1:%.*]] = shl <4 x i32> [[Z]], [[X]]
108
+ ; AVX-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
109
+ ; AVX-NEXT: ret <4 x i32> [[R]]
96
110
;
97
111
%b0 = shl <4 x i32 > %x , %y
98
112
%b1 = shl <4 x i32 > %z , %x
@@ -116,7 +130,7 @@ define <2 x i64> @shuf_sub_add_v2i64_yy(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z
116
130
ret <2 x i64 > %r
117
131
}
118
132
119
- ; negative test - type change via shuffle
133
+ ; type change via shuffle
120
134
121
135
define <8 x float > @shuf_fmul_v4f32_xx_type (<4 x float > %x , <4 x float > %y , <4 x float > %z ) {
122
136
; CHECK-LABEL: define <8 x float> @shuf_fmul_v4f32_xx_type(
@@ -168,14 +182,14 @@ define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
168
182
ret <4 x i32 > %r
169
183
}
170
184
171
- ; negative test - must have matching operand
185
+ ; non-matching operands (not commutable)
172
186
173
187
define <4 x float > @shuf_fdiv_v4f32_no_common_op (<4 x float > %x , <4 x float > %y , <4 x float > %z , <4 x float > %w ) {
174
188
; CHECK-LABEL: define <4 x float> @shuf_fdiv_v4f32_no_common_op(
175
189
; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]], <4 x float> [[W:%.*]]) #[[ATTR0]] {
176
- ; CHECK-NEXT: [[B0 :%.*]] = fdiv <4 x float> [[X]], [[Y]]
177
- ; CHECK-NEXT: [[B1 :%.*]] = fdiv <4 x float> [[Z ]], [[W]]
178
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0 ]], <4 x float> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
190
+ ; CHECK-NEXT: [[TMP1 :%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
191
+ ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <4 x float> [[Y ]], <4 x float> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
192
+ ; CHECK-NEXT: [[R:%.*]] = fdiv <4 x float> [[TMP1 ]], [[TMP2]]
179
193
; CHECK-NEXT: ret <4 x float> [[R]]
180
194
;
181
195
%b0 = fdiv <4 x float > %x , %y
@@ -216,6 +230,3 @@ define <4 x i32> @shuf_srem_v4i32_poison(<4 x i32> %a0, <4 x i32> %a1) {
216
230
ret <4 x i32 > %r
217
231
}
218
232
219
- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
220
- ; AVX: {{.*}}
221
- ; SSE: {{.*}}
0 commit comments