@@ -70,15 +70,30 @@ define i8 @v_ashr_i8_7(i8 %value) {
70
70
}
71
71
72
72
define amdgpu_ps i8 @s_ashr_i8 (i8 inreg %value , i8 inreg %amount ) {
73
- ; GCN-LABEL: s_ashr_i8:
74
- ; GCN: ; %bb.0:
75
- ; GCN-NEXT: s_sext_i32_i8 s0, s0
76
- ; GCN-NEXT: s_ashr_i32 s0, s0, s1
77
- ; GCN-NEXT: ; return to shader part epilog
73
+ ; GFX6-LABEL: s_ashr_i8:
74
+ ; GFX6: ; %bb.0:
75
+ ; GFX6-NEXT: s_sext_i32_i8 s0, s0
76
+ ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
77
+ ; GFX6-NEXT: ; return to shader part epilog
78
+ ;
79
+ ; GFX8-LABEL: s_ashr_i8:
80
+ ; GFX8: ; %bb.0:
81
+ ; GFX8-NEXT: s_sext_i32_i8 s0, s0
82
+ ; GFX8-NEXT: s_sext_i32_i8 s1, s1
83
+ ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
84
+ ; GFX8-NEXT: ; return to shader part epilog
85
+ ;
86
+ ; GFX9-LABEL: s_ashr_i8:
87
+ ; GFX9: ; %bb.0:
88
+ ; GFX9-NEXT: s_sext_i32_i8 s0, s0
89
+ ; GFX9-NEXT: s_sext_i32_i8 s1, s1
90
+ ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
91
+ ; GFX9-NEXT: ; return to shader part epilog
78
92
;
79
93
; GFX10PLUS-LABEL: s_ashr_i8:
80
94
; GFX10PLUS: ; %bb.0:
81
95
; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
96
+ ; GFX10PLUS-NEXT: s_sext_i32_i8 s1, s1
82
97
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
83
98
; GFX10PLUS-NEXT: ; return to shader part epilog
84
99
%result = ashr i8 %value , %amount
@@ -627,15 +642,30 @@ define i16 @v_ashr_i16_15(i16 %value) {
627
642
}
628
643
629
644
define amdgpu_ps i16 @s_ashr_i16 (i16 inreg %value , i16 inreg %amount ) {
630
- ; GCN-LABEL: s_ashr_i16:
631
- ; GCN: ; %bb.0:
632
- ; GCN-NEXT: s_sext_i32_i16 s0, s0
633
- ; GCN-NEXT: s_ashr_i32 s0, s0, s1
634
- ; GCN-NEXT: ; return to shader part epilog
645
+ ; GFX6-LABEL: s_ashr_i16:
646
+ ; GFX6: ; %bb.0:
647
+ ; GFX6-NEXT: s_sext_i32_i16 s0, s0
648
+ ; GFX6-NEXT: s_ashr_i32 s0, s0, s1
649
+ ; GFX6-NEXT: ; return to shader part epilog
650
+ ;
651
+ ; GFX8-LABEL: s_ashr_i16:
652
+ ; GFX8: ; %bb.0:
653
+ ; GFX8-NEXT: s_sext_i32_i16 s0, s0
654
+ ; GFX8-NEXT: s_sext_i32_i16 s1, s1
655
+ ; GFX8-NEXT: s_ashr_i32 s0, s0, s1
656
+ ; GFX8-NEXT: ; return to shader part epilog
657
+ ;
658
+ ; GFX9-LABEL: s_ashr_i16:
659
+ ; GFX9: ; %bb.0:
660
+ ; GFX9-NEXT: s_sext_i32_i16 s0, s0
661
+ ; GFX9-NEXT: s_sext_i32_i16 s1, s1
662
+ ; GFX9-NEXT: s_ashr_i32 s0, s0, s1
663
+ ; GFX9-NEXT: ; return to shader part epilog
635
664
;
636
665
; GFX10PLUS-LABEL: s_ashr_i16:
637
666
; GFX10PLUS: ; %bb.0:
638
667
; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
668
+ ; GFX10PLUS-NEXT: s_sext_i32_i16 s1, s1
639
669
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
640
670
; GFX10PLUS-NEXT: ; return to shader part epilog
641
671
%result = ashr i16 %value , %amount
@@ -796,15 +826,14 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
796
826
;
797
827
; GFX8-LABEL: s_ashr_v2i16:
798
828
; GFX8: ; %bb.0:
799
- ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
800
- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
801
- ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
829
+ ; GFX8-NEXT: s_sext_i32_i16 s2, s0
830
+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
831
+ ; GFX8-NEXT: s_sext_i32_i16 s3, s1
832
+ ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
833
+ ; GFX8-NEXT: s_ashr_i32 s2, s2, s3
802
834
; GFX8-NEXT: s_ashr_i32 s0, s0, s1
803
- ; GFX8-NEXT: s_sext_i32_i16 s1, s2
804
- ; GFX8-NEXT: s_ashr_i32 s1, s1, s3
805
- ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
806
- ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
807
- ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
835
+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
836
+ ; GFX8-NEXT: s_and_b32 s1, s2, 0xffff
808
837
; GFX8-NEXT: s_or_b32 s0, s0, s1
809
838
; GFX8-NEXT: ; return to shader part epilog
810
839
;
@@ -999,25 +1028,23 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
999
1028
;
1000
1029
; GFX8-LABEL: s_ashr_v4i16:
1001
1030
; GFX8: ; %bb.0:
1002
- ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1003
- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1004
- ; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1031
+ ; GFX8-NEXT: s_sext_i32_i16 s4, s0
1032
+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1033
+ ; GFX8-NEXT: s_sext_i32_i16 s5, s1
1034
+ ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1035
+ ; GFX8-NEXT: s_sext_i32_i16 s6, s2
1036
+ ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1037
+ ; GFX8-NEXT: s_sext_i32_i16 s7, s3
1038
+ ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1039
+ ; GFX8-NEXT: s_ashr_i32 s4, s4, s6
1005
1040
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1006
- ; GFX8-NEXT: s_sext_i32_i16 s2, s4
1007
- ; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1008
- ; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1009
- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1010
- ; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1041
+ ; GFX8-NEXT: s_ashr_i32 s2, s5, s7
1011
1042
; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1012
- ; GFX8-NEXT: s_sext_i32_i16 s3, s5
1013
- ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1014
- ; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1015
- ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1016
- ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1017
- ; GFX8-NEXT: s_or_b32 s0, s0, s2
1018
- ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1019
- ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1020
- ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1043
+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1044
+ ; GFX8-NEXT: s_and_b32 s3, s4, 0xffff
1045
+ ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1046
+ ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1047
+ ; GFX8-NEXT: s_or_b32 s0, s0, s3
1021
1048
; GFX8-NEXT: s_or_b32 s1, s1, s2
1022
1049
; GFX8-NEXT: ; return to shader part epilog
1023
1050
;
@@ -1208,45 +1235,41 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
1208
1235
;
1209
1236
; GFX8-LABEL: s_ashr_v8i16:
1210
1237
; GFX8: ; %bb.0:
1211
- ; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1212
- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
1213
- ; GFX8-NEXT: s_lshr_b32 s12, s4, 16
1238
+ ; GFX8-NEXT: s_sext_i32_i16 s8, s0
1239
+ ; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1240
+ ; GFX8-NEXT: s_sext_i32_i16 s9, s1
1241
+ ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1242
+ ; GFX8-NEXT: s_sext_i32_i16 s12, s4
1243
+ ; GFX8-NEXT: s_bfe_i32 s4, s4, 0x100010
1244
+ ; GFX8-NEXT: s_sext_i32_i16 s13, s5
1245
+ ; GFX8-NEXT: s_bfe_i32 s5, s5, 0x100010
1246
+ ; GFX8-NEXT: s_sext_i32_i16 s10, s2
1247
+ ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1248
+ ; GFX8-NEXT: s_sext_i32_i16 s14, s6
1249
+ ; GFX8-NEXT: s_bfe_i32 s6, s6, 0x100010
1214
1250
; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1215
- ; GFX8-NEXT: s_sext_i32_i16 s4, s8
1216
- ; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1217
- ; GFX8-NEXT: s_ashr_i32 s4, s4, s12
1218
- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
1219
- ; GFX8-NEXT: s_lshr_b32 s13, s5, 16
1251
+ ; GFX8-NEXT: s_ashr_i32 s4, s9, s13
1220
1252
; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1221
- ; GFX8-NEXT: s_sext_i32_i16 s5, s9
1222
- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1223
- ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1224
- ; GFX8-NEXT: s_ashr_i32 s5, s5, s13
1225
- ; GFX8-NEXT: s_sext_i32_i16 s2, s2
1226
- ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1227
- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1228
- ; GFX8-NEXT: s_lshr_b32 s14, s6, 16
1253
+ ; GFX8-NEXT: s_sext_i32_i16 s11, s3
1254
+ ; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1255
+ ; GFX8-NEXT: s_sext_i32_i16 s15, s7
1256
+ ; GFX8-NEXT: s_bfe_i32 s7, s7, 0x100010
1257
+ ; GFX8-NEXT: s_ashr_i32 s5, s10, s14
1229
1258
; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1230
- ; GFX8-NEXT: s_sext_i32_i16 s6, s10
1231
- ; GFX8-NEXT: s_or_b32 s0, s0, s4
1232
- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1233
- ; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1234
- ; GFX8-NEXT: s_ashr_i32 s6, s6, s14
1235
- ; GFX8-NEXT: s_sext_i32_i16 s3, s3
1236
- ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1237
- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1238
- ; GFX8-NEXT: s_lshr_b32 s15, s7, 16
1259
+ ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1260
+ ; GFX8-NEXT: s_and_b32 s4, s4, 0xffff
1261
+ ; GFX8-NEXT: s_ashr_i32 s8, s8, s12
1262
+ ; GFX8-NEXT: s_ashr_i32 s6, s11, s15
1239
1263
; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1240
- ; GFX8-NEXT: s_sext_i32_i16 s7, s11
1241
1264
; GFX8-NEXT: s_or_b32 s1, s1, s4
1242
- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1243
- ; GFX8-NEXT: s_ashr_i32 s7, s7, s15
1244
- ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1245
- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1265
+ ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1266
+ ; GFX8-NEXT: s_and_b32 s4, s5, 0xffff
1267
+ ; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1268
+ ; GFX8-NEXT: s_and_b32 s7, s8, 0xffff
1246
1269
; GFX8-NEXT: s_or_b32 s2, s2, s4
1247
- ; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1248
- ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1249
- ; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1270
+ ; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1271
+ ; GFX8-NEXT: s_and_b32 s4, s6, 0xffff
1272
+ ; GFX8-NEXT: s_or_b32 s0, s0, s7
1250
1273
; GFX8-NEXT: s_or_b32 s3, s3, s4
1251
1274
; GFX8-NEXT: ; return to shader part epilog
1252
1275
;
0 commit comments