@@ -32,6 +32,24 @@ define i8 @vreduce_add_v2i8(ptr %x) {
32
32
ret i8 %red
33
33
}
34
34
35
+ declare i8 @llvm.vector.reduce.add.v3i8 (<3 x i8 >)
36
+
37
+ define i8 @vreduce_add_v3i8 (ptr %x ) {
38
+ ; CHECK-LABEL: vreduce_add_v3i8:
39
+ ; CHECK: # %bb.0:
40
+ ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
41
+ ; CHECK-NEXT: vle8.v v8, (a0)
42
+ ; CHECK-NEXT: vmv.s.x v9, zero
43
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
44
+ ; CHECK-NEXT: vslideup.vi v8, v9, 3
45
+ ; CHECK-NEXT: vredsum.vs v8, v8, v9
46
+ ; CHECK-NEXT: vmv.x.s a0, v8
47
+ ; CHECK-NEXT: ret
48
+ %v = load <3 x i8 >, ptr %x
49
+ %red = call i8 @llvm.vector.reduce.add.v3i8 (<3 x i8 > %v )
50
+ ret i8 %red
51
+ }
52
+
35
53
declare i8 @llvm.vector.reduce.add.v4i8 (<4 x i8 >)
36
54
37
55
define i8 @vreduce_add_v4i8 (ptr %x ) {
@@ -1743,6 +1761,25 @@ define i8 @vreduce_and_v2i8(ptr %x) {
1743
1761
ret i8 %red
1744
1762
}
1745
1763
1764
+ declare i8 @llvm.vector.reduce.and.v3i8 (<3 x i8 >)
1765
+
1766
+ define i8 @vreduce_and_v3i8 (ptr %x ) {
1767
+ ; CHECK-LABEL: vreduce_and_v3i8:
1768
+ ; CHECK: # %bb.0:
1769
+ ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
1770
+ ; CHECK-NEXT: vle8.v v8, (a0)
1771
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1772
+ ; CHECK-NEXT: vmv.v.i v9, -1
1773
+ ; CHECK-NEXT: vslideup.vi v8, v9, 3
1774
+ ; CHECK-NEXT: vredand.vs v8, v8, v8
1775
+ ; CHECK-NEXT: vmv.x.s a0, v8
1776
+ ; CHECK-NEXT: ret
1777
+ %v = load <3 x i8 >, ptr %x
1778
+ %red = call i8 @llvm.vector.reduce.and.v3i8 (<3 x i8 > %v )
1779
+ ret i8 %red
1780
+ }
1781
+
1782
+
1746
1783
declare i8 @llvm.vector.reduce.and.v4i8 (<4 x i8 >)
1747
1784
1748
1785
define i8 @vreduce_and_v4i8 (ptr %x ) {
@@ -2328,6 +2365,24 @@ define i8 @vreduce_or_v2i8(ptr %x) {
2328
2365
ret i8 %red
2329
2366
}
2330
2367
2368
+ declare i8 @llvm.vector.reduce.or.v3i8 (<3 x i8 >)
2369
+
2370
+ define i8 @vreduce_or_v3i8 (ptr %x ) {
2371
+ ; CHECK-LABEL: vreduce_or_v3i8:
2372
+ ; CHECK: # %bb.0:
2373
+ ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
2374
+ ; CHECK-NEXT: vle8.v v8, (a0)
2375
+ ; CHECK-NEXT: vmv.s.x v9, zero
2376
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
2377
+ ; CHECK-NEXT: vslideup.vi v8, v9, 3
2378
+ ; CHECK-NEXT: vredor.vs v8, v8, v8
2379
+ ; CHECK-NEXT: vmv.x.s a0, v8
2380
+ ; CHECK-NEXT: ret
2381
+ %v = load <3 x i8 >, ptr %x
2382
+ %red = call i8 @llvm.vector.reduce.or.v3i8 (<3 x i8 > %v )
2383
+ ret i8 %red
2384
+ }
2385
+
2331
2386
declare i8 @llvm.vector.reduce.or.v4i8 (<4 x i8 >)
2332
2387
2333
2388
define i8 @vreduce_or_v4i8 (ptr %x ) {
@@ -2914,6 +2969,24 @@ define i8 @vreduce_xor_v2i8(ptr %x) {
2914
2969
ret i8 %red
2915
2970
}
2916
2971
2972
+ declare i8 @llvm.vector.reduce.xor.v3i8 (<3 x i8 >)
2973
+
2974
+ define i8 @vreduce_xor_v3i8 (ptr %x ) {
2975
+ ; CHECK-LABEL: vreduce_xor_v3i8:
2976
+ ; CHECK: # %bb.0:
2977
+ ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
2978
+ ; CHECK-NEXT: vle8.v v8, (a0)
2979
+ ; CHECK-NEXT: vmv.s.x v9, zero
2980
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
2981
+ ; CHECK-NEXT: vslideup.vi v8, v9, 3
2982
+ ; CHECK-NEXT: vredxor.vs v8, v8, v9
2983
+ ; CHECK-NEXT: vmv.x.s a0, v8
2984
+ ; CHECK-NEXT: ret
2985
+ %v = load <3 x i8 >, ptr %x
2986
+ %red = call i8 @llvm.vector.reduce.xor.v3i8 (<3 x i8 > %v )
2987
+ ret i8 %red
2988
+ }
2989
+
2917
2990
declare i8 @llvm.vector.reduce.xor.v4i8 (<4 x i8 >)
2918
2991
2919
2992
define i8 @vreduce_xor_v4i8 (ptr %x ) {
@@ -3531,6 +3604,25 @@ define i8 @vreduce_smin_v2i8(ptr %x) {
3531
3604
ret i8 %red
3532
3605
}
3533
3606
3607
+ declare i8 @llvm.vector.reduce.smin.v3i8 (<3 x i8 >)
3608
+
3609
+ define i8 @vreduce_smin_v3i8 (ptr %x ) {
3610
+ ; CHECK-LABEL: vreduce_smin_v3i8:
3611
+ ; CHECK: # %bb.0:
3612
+ ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
3613
+ ; CHECK-NEXT: vle8.v v8, (a0)
3614
+ ; CHECK-NEXT: li a0, 127
3615
+ ; CHECK-NEXT: vmv.s.x v9, a0
3616
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
3617
+ ; CHECK-NEXT: vslideup.vi v8, v9, 3
3618
+ ; CHECK-NEXT: vredmin.vs v8, v8, v8
3619
+ ; CHECK-NEXT: vmv.x.s a0, v8
3620
+ ; CHECK-NEXT: ret
3621
+ %v = load <3 x i8 >, ptr %x
3622
+ %red = call i8 @llvm.vector.reduce.smin.v3i8 (<3 x i8 > %v )
3623
+ ret i8 %red
3624
+ }
3625
+
3534
3626
declare i8 @llvm.vector.reduce.smin.v4i8 (<4 x i8 >)
3535
3627
3536
3628
define i8 @vreduce_smin_v4i8 (ptr %x ) {
@@ -4116,6 +4208,25 @@ define i8 @vreduce_smax_v2i8(ptr %x) {
4116
4208
ret i8 %red
4117
4209
}
4118
4210
4211
+ declare i8 @llvm.vector.reduce.smax.v3i8 (<3 x i8 >)
4212
+
4213
+ define i8 @vreduce_smax_v3i8 (ptr %x ) {
4214
+ ; CHECK-LABEL: vreduce_smax_v3i8:
4215
+ ; CHECK: # %bb.0:
4216
+ ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
4217
+ ; CHECK-NEXT: vle8.v v8, (a0)
4218
+ ; CHECK-NEXT: li a0, -128
4219
+ ; CHECK-NEXT: vmv.s.x v9, a0
4220
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
4221
+ ; CHECK-NEXT: vslideup.vi v8, v9, 3
4222
+ ; CHECK-NEXT: vredmax.vs v8, v8, v8
4223
+ ; CHECK-NEXT: vmv.x.s a0, v8
4224
+ ; CHECK-NEXT: ret
4225
+ %v = load <3 x i8 >, ptr %x
4226
+ %red = call i8 @llvm.vector.reduce.smax.v3i8 (<3 x i8 > %v )
4227
+ ret i8 %red
4228
+ }
4229
+
4119
4230
declare i8 @llvm.vector.reduce.smax.v4i8 (<4 x i8 >)
4120
4231
4121
4232
define i8 @vreduce_smax_v4i8 (ptr %x ) {
@@ -4701,6 +4812,24 @@ define i8 @vreduce_umin_v2i8(ptr %x) {
4701
4812
ret i8 %red
4702
4813
}
4703
4814
4815
+ declare i8 @llvm.vector.reduce.umin.v3i8 (<3 x i8 >)
4816
+
4817
+ define i8 @vreduce_umin_v3i8 (ptr %x ) {
4818
+ ; CHECK-LABEL: vreduce_umin_v3i8:
4819
+ ; CHECK: # %bb.0:
4820
+ ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
4821
+ ; CHECK-NEXT: vle8.v v8, (a0)
4822
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
4823
+ ; CHECK-NEXT: vmv.v.i v9, -1
4824
+ ; CHECK-NEXT: vslideup.vi v8, v9, 3
4825
+ ; CHECK-NEXT: vredminu.vs v8, v8, v8
4826
+ ; CHECK-NEXT: vmv.x.s a0, v8
4827
+ ; CHECK-NEXT: ret
4828
+ %v = load <3 x i8 >, ptr %x
4829
+ %red = call i8 @llvm.vector.reduce.umin.v3i8 (<3 x i8 > %v )
4830
+ ret i8 %red
4831
+ }
4832
+
4704
4833
declare i8 @llvm.vector.reduce.umin.v4i8 (<4 x i8 >)
4705
4834
4706
4835
define i8 @vreduce_umin_v4i8 (ptr %x ) {
@@ -5286,6 +5415,24 @@ define i8 @vreduce_umax_v2i8(ptr %x) {
5286
5415
ret i8 %red
5287
5416
}
5288
5417
5418
+ declare i8 @llvm.vector.reduce.umax.v3i8 (<3 x i8 >)
5419
+
5420
+ define i8 @vreduce_umax_v3i8 (ptr %x ) {
5421
+ ; CHECK-LABEL: vreduce_umax_v3i8:
5422
+ ; CHECK: # %bb.0:
5423
+ ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
5424
+ ; CHECK-NEXT: vle8.v v8, (a0)
5425
+ ; CHECK-NEXT: vmv.s.x v9, zero
5426
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
5427
+ ; CHECK-NEXT: vslideup.vi v8, v9, 3
5428
+ ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
5429
+ ; CHECK-NEXT: vmv.x.s a0, v8
5430
+ ; CHECK-NEXT: ret
5431
+ %v = load <3 x i8 >, ptr %x
5432
+ %red = call i8 @llvm.vector.reduce.umax.v3i8 (<3 x i8 > %v )
5433
+ ret i8 %red
5434
+ }
5435
+
5289
5436
declare i8 @llvm.vector.reduce.umax.v4i8 (<4 x i8 >)
5290
5437
5291
5438
define i8 @vreduce_umax_v4i8 (ptr %x ) {
@@ -5872,6 +6019,30 @@ define i8 @vreduce_mul_v2i8(ptr %x) {
5872
6019
ret i8 %red
5873
6020
}
5874
6021
6022
+ declare i8 @llvm.vector.reduce.mul.v3i8 (<3 x i8 >)
6023
+
6024
+ define i8 @vreduce_mul_v3i8 (ptr %x ) {
6025
+ ; CHECK-LABEL: vreduce_mul_v3i8:
6026
+ ; CHECK: # %bb.0:
6027
+ ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
6028
+ ; CHECK-NEXT: vle8.v v8, (a0)
6029
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
6030
+ ; CHECK-NEXT: vmv.v.i v9, 1
6031
+ ; CHECK-NEXT: vslideup.vi v8, v9, 3
6032
+ ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6033
+ ; CHECK-NEXT: vslidedown.vi v9, v8, 2
6034
+ ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
6035
+ ; CHECK-NEXT: vmul.vv v8, v8, v9
6036
+ ; CHECK-NEXT: vslidedown.vi v9, v8, 1
6037
+ ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
6038
+ ; CHECK-NEXT: vmul.vv v8, v8, v9
6039
+ ; CHECK-NEXT: vmv.x.s a0, v8
6040
+ ; CHECK-NEXT: ret
6041
+ %v = load <3 x i8 >, ptr %x
6042
+ %red = call i8 @llvm.vector.reduce.mul.v3i8 (<3 x i8 > %v )
6043
+ ret i8 %red
6044
+ }
6045
+
5875
6046
declare i8 @llvm.vector.reduce.mul.v4i8 (<4 x i8 >)
5876
6047
5877
6048
define i8 @vreduce_mul_v4i8 (ptr %x ) {
0 commit comments