@@ -5,6 +5,10 @@ declare half @llvm.minimum.f16(half, half)
5
5
declare half @llvm.maximum.f16 (half , half )
6
6
declare <8 x half > @llvm.minimum.v8f16 (<8 x half >, <8 x half >)
7
7
declare <8 x half > @llvm.maximum.v8f16 (<8 x half >, <8 x half >)
8
+ declare <16 x half > @llvm.minimum.v16f16 (<16 x half >, <16 x half >)
9
+ declare <16 x half > @llvm.maximum.v16f16 (<16 x half >, <16 x half >)
10
+ declare <32 x half > @llvm.minimum.v32f16 (<32 x half >, <32 x half >)
11
+ declare <32 x half > @llvm.maximum.v32f16 (<32 x half >, <32 x half >)
8
12
9
13
define half @test_fminimum (half %x , half %y ) {
10
14
; CHECK-LABEL: test_fminimum:
@@ -25,20 +29,10 @@ define half @test_fminimum(half %x, half %y) {
25
29
ret half %z
26
30
}
27
31
28
- define <8 x half > @test_fminimum_scalarize (<8 x half > %x , <8 x half > %y ) "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" {
29
- ; CHECK-LABEL: test_fminimum_scalarize :
32
+ define <8 x half > @test_fminimum_v8f16 (<8 x half > %x , <8 x half > %y ) "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" {
33
+ ; CHECK-LABEL: test_fminimum_v8f16 :
30
34
; CHECK: # %bb.0:
31
- ; CHECK-NEXT: vcmpltph %xmm1, %xmm0, %k1
32
- ; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm2 {%k1}
33
- ; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm3 = [32768,32768,32768,32768,32768,32768,32768,32768]
34
- ; CHECK-NEXT: vpcmpeqw %xmm3, %xmm0, %k1
35
- ; CHECK-NEXT: vpblendmw %xmm0, %xmm2, %xmm0 {%k1}
36
- ; CHECK-NEXT: vpcmpeqw %xmm3, %xmm1, %k1
37
- ; CHECK-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1}
38
- ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
39
- ; CHECK-NEXT: vcmpeqph %xmm1, %xmm2, %k1
40
- ; CHECK-NEXT: vmovdqu16 %xmm0, %xmm2 {%k1}
41
- ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
35
+ ; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0
42
36
; CHECK-NEXT: retq
43
37
%r = call <8 x half > @llvm.minimum.v8f16 (<8 x half > %x , <8 x half > %y )
44
38
ret <8 x half > %r
@@ -113,19 +107,10 @@ define half @test_fmaximum(half %x, half %y) {
113
107
ret half %r
114
108
}
115
109
116
- define <8 x half > @test_fmaximum_scalarize (<8 x half > %x , <8 x half > %y ) "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" {
117
- ; CHECK-LABEL: test_fmaximum_scalarize :
110
+ define <8 x half > @test_fmaximum_v8f16 (<8 x half > %x , <8 x half > %y ) "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" {
111
+ ; CHECK-LABEL: test_fmaximum_v8f16 :
118
112
; CHECK: # %bb.0:
119
- ; CHECK-NEXT: vcmpltph %xmm0, %xmm1, %k1
120
- ; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm2 {%k1}
121
- ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
122
- ; CHECK-NEXT: vpblendmw %xmm0, %xmm2, %xmm0 {%k1}
123
- ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
124
- ; CHECK-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1}
125
- ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
126
- ; CHECK-NEXT: vcmpeqph %xmm1, %xmm2, %k1
127
- ; CHECK-NEXT: vmovdqu16 %xmm0, %xmm2 {%k1}
128
- ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
113
+ ; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0
129
114
; CHECK-NEXT: retq
130
115
%r = call <8 x half > @llvm.maximum.v8f16 (<8 x half > %x , <8 x half > %y )
131
116
ret <8 x half > %r
@@ -186,3 +171,50 @@ define half @test_fmaximum_combine_cmps(half %x, half %y) {
186
171
%2 = tail call half @llvm.maximum.f16 (half %x , half %1 )
187
172
ret half %2
188
173
}
174
+
175
+ define <16 x half > @test_fminimum_v16f16 (<16 x half > %x , <16 x half > %y ) "no-nans-fp-math" ="true" "no-signed-zeros-fp-math" ="true" {
176
+ ; CHECK-LABEL: test_fminimum_v16f16:
177
+ ; CHECK: # %bb.0:
178
+ ; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0
179
+ ; CHECK-NEXT: retq
180
+ %r = call <16 x half > @llvm.minimum.v16f16 (<16 x half > %x , <16 x half > %y )
181
+ ret <16 x half > %r
182
+ }
183
+
184
+ define <16 x half > @test_fmaximum_v16f16_nans (<16 x half > %x , <16 x half > %y ) "no-signed-zeros-fp-math" ="true" {
185
+ ; CHECK-LABEL: test_fmaximum_v16f16_nans:
186
+ ; CHECK: # %bb.0:
187
+ ; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm1
188
+ ; CHECK-NEXT: vcmpunordph %ymm0, %ymm0, %k1
189
+ ; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
190
+ ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
191
+ ; CHECK-NEXT: retq
192
+ %r = call <16 x half > @llvm.maximum.v16f16 (<16 x half > %x , <16 x half > %y )
193
+ ret <16 x half > %r
194
+ }
195
+
196
+ define <32 x half > @test_fminimum_v32f16_szero (<32 x half > %x , <32 x half > %y ) "no-nans-fp-math" ="true" {
197
+ ; CHECK-LABEL: test_fminimum_v32f16_szero:
198
+ ; CHECK: # %bb.0:
199
+ ; CHECK-NEXT: vpmovw2m %zmm0, %k1
200
+ ; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm2 {%k1}
201
+ ; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
202
+ ; CHECK-NEXT: vminph %zmm2, %zmm0, %zmm0
203
+ ; CHECK-NEXT: retq
204
+ %r = call <32 x half > @llvm.minimum.v32f16 (<32 x half > %x , <32 x half > %y )
205
+ ret <32 x half > %r
206
+ }
207
+
208
+ define <32 x half > @test_fmaximum_v32f16_nans_szero (<32 x half > %x , <32 x half > %y ) {
209
+ ; CHECK-LABEL: test_fmaximum_v32f16_nans_szero:
210
+ ; CHECK: # %bb.0:
211
+ ; CHECK-NEXT: vpmovw2m %zmm0, %k1
212
+ ; CHECK-NEXT: vpblendmw %zmm1, %zmm0, %zmm2 {%k1}
213
+ ; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
214
+ ; CHECK-NEXT: vmaxph %zmm2, %zmm1, %zmm0
215
+ ; CHECK-NEXT: vcmpunordph %zmm1, %zmm1, %k1
216
+ ; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
217
+ ; CHECK-NEXT: retq
218
+ %r = call <32 x half > @llvm.maximum.v32f16 (<32 x half > %x , <32 x half > %y )
219
+ ret <32 x half > %r
220
+ }
0 commit comments