Skip to content

Commit 2f9a3ab

Browse files
Revert "[AMDGPU] Do not promote uniform i16 operations to i32 in CGP (llvm#140208)"
This reverts commit aacebae.
1 parent 82bad53 commit 2f9a3ab

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+19032
-17565
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ static cl::opt<bool> WidenLoads(
4747
cl::init(false));
4848

4949
static cl::opt<bool> Widen16BitOps(
50-
"amdgpu-codegenprepare-widen-16-bit-ops",
51-
cl::desc(
52-
"Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"),
53-
cl::ReallyHidden, cl::init(false));
50+
"amdgpu-codegenprepare-widen-16-bit-ops",
51+
cl::desc("Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"),
52+
cl::ReallyHidden,
53+
cl::init(true));
5454

5555
static cl::opt<bool>
5656
BreakLargePHIs("amdgpu-codegenprepare-break-large-phis",

llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -281,12 +281,12 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
281281
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_splat:
282282
; GFX8: ; %bb.0:
283283
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
284-
; GFX8-NEXT: s_add_i32 s1, s1, 0xffc0
284+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
285285
; GFX8-NEXT: s_add_i32 s0, s0, 0xffc0
286-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
287-
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
286+
; GFX8-NEXT: s_add_i32 s1, s1, 0xffc0
288287
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
289-
; GFX8-NEXT: s_or_b32 s0, s0, s1
288+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
289+
; GFX8-NEXT: s_or_b32 s0, s1, s0
290290
; GFX8-NEXT: ; return to shader part epilog
291291
;
292292
; GFX10-LABEL: s_add_v2i16_neg_inline_imm_splat:
@@ -323,12 +323,12 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
323323
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_lo:
324324
; GFX8: ; %bb.0:
325325
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
326-
; GFX8-NEXT: s_add_i32 s1, s1, 4
326+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
327327
; GFX8-NEXT: s_add_i32 s0, s0, 0xffc0
328-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
329-
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
328+
; GFX8-NEXT: s_add_i32 s1, s1, 4
330329
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
331-
; GFX8-NEXT: s_or_b32 s0, s0, s1
330+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
331+
; GFX8-NEXT: s_or_b32 s0, s1, s0
332332
; GFX8-NEXT: ; return to shader part epilog
333333
;
334334
; GFX10-LABEL: s_add_v2i16_neg_inline_imm_lo:
@@ -365,12 +365,12 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
365365
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_hi:
366366
; GFX8: ; %bb.0:
367367
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
368-
; GFX8-NEXT: s_add_i32 s1, s1, 0xffc0
368+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
369369
; GFX8-NEXT: s_add_i32 s0, s0, 4
370-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
371-
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
370+
; GFX8-NEXT: s_add_i32 s1, s1, 0xffc0
372371
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
373-
; GFX8-NEXT: s_or_b32 s0, s0, s1
372+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
373+
; GFX8-NEXT: s_or_b32 s0, s1, s0
374374
; GFX8-NEXT: ; return to shader part epilog
375375
;
376376
; GFX10-LABEL: s_add_v2i16_neg_inline_imm_hi:
@@ -408,13 +408,14 @@ define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
408408
; GFX8-LABEL: s_add_v2i16:
409409
; GFX8: ; %bb.0:
410410
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
411+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
411412
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
412-
; GFX8-NEXT: s_add_i32 s2, s2, s3
413+
; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
413414
; GFX8-NEXT: s_add_i32 s0, s0, s1
414-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
415-
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
416-
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
417-
; GFX8-NEXT: s_or_b32 s0, s0, s1
415+
; GFX8-NEXT: s_add_i32 s2, s2, s3
416+
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
417+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
418+
; GFX8-NEXT: s_or_b32 s0, s1, s0
418419
; GFX8-NEXT: ; return to shader part epilog
419420
;
420421
; GFX10-LABEL: s_add_v2i16:
@@ -460,13 +461,14 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
460461
; GFX8: ; %bb.0:
461462
; GFX8-NEXT: s_xor_b32 s0, s0, 0x80008000
462463
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
464+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
463465
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
464-
; GFX8-NEXT: s_add_i32 s2, s2, s3
466+
; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
465467
; GFX8-NEXT: s_add_i32 s0, s0, s1
466-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
467-
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
468-
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
469-
; GFX8-NEXT: s_or_b32 s0, s0, s1
468+
; GFX8-NEXT: s_add_i32 s2, s2, s3
469+
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
470+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
471+
; GFX8-NEXT: s_or_b32 s0, s1, s0
470472
; GFX8-NEXT: ; return to shader part epilog
471473
;
472474
; GFX10-LABEL: s_add_v2i16_fneg_lhs:
@@ -515,13 +517,14 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
515517
; GFX8: ; %bb.0:
516518
; GFX8-NEXT: s_xor_b32 s1, s1, 0x80008000
517519
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
520+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
518521
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
519-
; GFX8-NEXT: s_add_i32 s2, s2, s3
522+
; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
520523
; GFX8-NEXT: s_add_i32 s0, s0, s1
521-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
522-
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
523-
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
524-
; GFX8-NEXT: s_or_b32 s0, s0, s1
524+
; GFX8-NEXT: s_add_i32 s2, s2, s3
525+
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
526+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
527+
; GFX8-NEXT: s_or_b32 s0, s1, s0
525528
; GFX8-NEXT: ; return to shader part epilog
526529
;
527530
; GFX10-LABEL: s_add_v2i16_fneg_rhs:
@@ -577,13 +580,14 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x ha
577580
; GFX8-NEXT: s_xor_b32 s0, s0, 0x80008000
578581
; GFX8-NEXT: s_xor_b32 s1, s1, 0x80008000
579582
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
583+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
580584
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
581-
; GFX8-NEXT: s_add_i32 s2, s2, s3
585+
; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
582586
; GFX8-NEXT: s_add_i32 s0, s0, s1
583-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
584-
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
585-
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
586-
; GFX8-NEXT: s_or_b32 s0, s0, s1
587+
; GFX8-NEXT: s_add_i32 s2, s2, s3
588+
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
589+
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
590+
; GFX8-NEXT: s_or_b32 s0, s1, s0
587591
; GFX8-NEXT: ; return to shader part epilog
588592
;
589593
; GFX10-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 91 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,30 @@ define i8 @v_ashr_i8_7(i8 %value) {
7070
}
7171

7272
define amdgpu_ps i8 @s_ashr_i8(i8 inreg %value, i8 inreg %amount) {
73-
; GCN-LABEL: s_ashr_i8:
74-
; GCN: ; %bb.0:
75-
; GCN-NEXT: s_sext_i32_i8 s0, s0
76-
; GCN-NEXT: s_ashr_i32 s0, s0, s1
77-
; GCN-NEXT: ; return to shader part epilog
73+
; GFX6-LABEL: s_ashr_i8:
74+
; GFX6: ; %bb.0:
75+
; GFX6-NEXT: s_sext_i32_i8 s0, s0
76+
; GFX6-NEXT: s_ashr_i32 s0, s0, s1
77+
; GFX6-NEXT: ; return to shader part epilog
78+
;
79+
; GFX8-LABEL: s_ashr_i8:
80+
; GFX8: ; %bb.0:
81+
; GFX8-NEXT: s_sext_i32_i8 s0, s0
82+
; GFX8-NEXT: s_sext_i32_i8 s1, s1
83+
; GFX8-NEXT: s_ashr_i32 s0, s0, s1
84+
; GFX8-NEXT: ; return to shader part epilog
85+
;
86+
; GFX9-LABEL: s_ashr_i8:
87+
; GFX9: ; %bb.0:
88+
; GFX9-NEXT: s_sext_i32_i8 s0, s0
89+
; GFX9-NEXT: s_sext_i32_i8 s1, s1
90+
; GFX9-NEXT: s_ashr_i32 s0, s0, s1
91+
; GFX9-NEXT: ; return to shader part epilog
7892
;
7993
; GFX10PLUS-LABEL: s_ashr_i8:
8094
; GFX10PLUS: ; %bb.0:
8195
; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
96+
; GFX10PLUS-NEXT: s_sext_i32_i8 s1, s1
8297
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
8398
; GFX10PLUS-NEXT: ; return to shader part epilog
8499
%result = ashr i8 %value, %amount
@@ -627,15 +642,30 @@ define i16 @v_ashr_i16_15(i16 %value) {
627642
}
628643

629644
define amdgpu_ps i16 @s_ashr_i16(i16 inreg %value, i16 inreg %amount) {
630-
; GCN-LABEL: s_ashr_i16:
631-
; GCN: ; %bb.0:
632-
; GCN-NEXT: s_sext_i32_i16 s0, s0
633-
; GCN-NEXT: s_ashr_i32 s0, s0, s1
634-
; GCN-NEXT: ; return to shader part epilog
645+
; GFX6-LABEL: s_ashr_i16:
646+
; GFX6: ; %bb.0:
647+
; GFX6-NEXT: s_sext_i32_i16 s0, s0
648+
; GFX6-NEXT: s_ashr_i32 s0, s0, s1
649+
; GFX6-NEXT: ; return to shader part epilog
650+
;
651+
; GFX8-LABEL: s_ashr_i16:
652+
; GFX8: ; %bb.0:
653+
; GFX8-NEXT: s_sext_i32_i16 s0, s0
654+
; GFX8-NEXT: s_sext_i32_i16 s1, s1
655+
; GFX8-NEXT: s_ashr_i32 s0, s0, s1
656+
; GFX8-NEXT: ; return to shader part epilog
657+
;
658+
; GFX9-LABEL: s_ashr_i16:
659+
; GFX9: ; %bb.0:
660+
; GFX9-NEXT: s_sext_i32_i16 s0, s0
661+
; GFX9-NEXT: s_sext_i32_i16 s1, s1
662+
; GFX9-NEXT: s_ashr_i32 s0, s0, s1
663+
; GFX9-NEXT: ; return to shader part epilog
635664
;
636665
; GFX10PLUS-LABEL: s_ashr_i16:
637666
; GFX10PLUS: ; %bb.0:
638667
; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
668+
; GFX10PLUS-NEXT: s_sext_i32_i16 s1, s1
639669
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1
640670
; GFX10PLUS-NEXT: ; return to shader part epilog
641671
%result = ashr i16 %value, %amount
@@ -796,15 +826,14 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
796826
;
797827
; GFX8-LABEL: s_ashr_v2i16:
798828
; GFX8: ; %bb.0:
799-
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
800-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
801-
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
829+
; GFX8-NEXT: s_sext_i32_i16 s2, s0
830+
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
831+
; GFX8-NEXT: s_sext_i32_i16 s3, s1
832+
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
833+
; GFX8-NEXT: s_ashr_i32 s2, s2, s3
802834
; GFX8-NEXT: s_ashr_i32 s0, s0, s1
803-
; GFX8-NEXT: s_sext_i32_i16 s1, s2
804-
; GFX8-NEXT: s_ashr_i32 s1, s1, s3
805-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
806-
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
807-
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
835+
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
836+
; GFX8-NEXT: s_and_b32 s1, s2, 0xffff
808837
; GFX8-NEXT: s_or_b32 s0, s0, s1
809838
; GFX8-NEXT: ; return to shader part epilog
810839
;
@@ -999,25 +1028,23 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
9991028
;
10001029
; GFX8-LABEL: s_ashr_v4i16:
10011030
; GFX8: ; %bb.0:
1002-
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1003-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
1004-
; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1031+
; GFX8-NEXT: s_sext_i32_i16 s4, s0
1032+
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1033+
; GFX8-NEXT: s_sext_i32_i16 s5, s1
1034+
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1035+
; GFX8-NEXT: s_sext_i32_i16 s6, s2
1036+
; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1037+
; GFX8-NEXT: s_sext_i32_i16 s7, s3
1038+
; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1039+
; GFX8-NEXT: s_ashr_i32 s4, s4, s6
10051040
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1006-
; GFX8-NEXT: s_sext_i32_i16 s2, s4
1007-
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1008-
; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1009-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
1010-
; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1041+
; GFX8-NEXT: s_ashr_i32 s2, s5, s7
10111042
; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1012-
; GFX8-NEXT: s_sext_i32_i16 s3, s5
1013-
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1014-
; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1015-
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1016-
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1017-
; GFX8-NEXT: s_or_b32 s0, s0, s2
1018-
; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1019-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1020-
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1043+
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1044+
; GFX8-NEXT: s_and_b32 s3, s4, 0xffff
1045+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1046+
; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
1047+
; GFX8-NEXT: s_or_b32 s0, s0, s3
10211048
; GFX8-NEXT: s_or_b32 s1, s1, s2
10221049
; GFX8-NEXT: ; return to shader part epilog
10231050
;
@@ -1208,45 +1235,41 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
12081235
;
12091236
; GFX8-LABEL: s_ashr_v8i16:
12101237
; GFX8: ; %bb.0:
1211-
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1212-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
1213-
; GFX8-NEXT: s_lshr_b32 s12, s4, 16
1238+
; GFX8-NEXT: s_sext_i32_i16 s8, s0
1239+
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1240+
; GFX8-NEXT: s_sext_i32_i16 s9, s1
1241+
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1242+
; GFX8-NEXT: s_sext_i32_i16 s12, s4
1243+
; GFX8-NEXT: s_bfe_i32 s4, s4, 0x100010
1244+
; GFX8-NEXT: s_sext_i32_i16 s13, s5
1245+
; GFX8-NEXT: s_bfe_i32 s5, s5, 0x100010
1246+
; GFX8-NEXT: s_sext_i32_i16 s10, s2
1247+
; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
1248+
; GFX8-NEXT: s_sext_i32_i16 s14, s6
1249+
; GFX8-NEXT: s_bfe_i32 s6, s6, 0x100010
12141250
; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1215-
; GFX8-NEXT: s_sext_i32_i16 s4, s8
1216-
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1217-
; GFX8-NEXT: s_ashr_i32 s4, s4, s12
1218-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
1219-
; GFX8-NEXT: s_lshr_b32 s13, s5, 16
1251+
; GFX8-NEXT: s_ashr_i32 s4, s9, s13
12201252
; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1221-
; GFX8-NEXT: s_sext_i32_i16 s5, s9
1222-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1223-
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1224-
; GFX8-NEXT: s_ashr_i32 s5, s5, s13
1225-
; GFX8-NEXT: s_sext_i32_i16 s2, s2
1226-
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1227-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1228-
; GFX8-NEXT: s_lshr_b32 s14, s6, 16
1253+
; GFX8-NEXT: s_sext_i32_i16 s11, s3
1254+
; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1255+
; GFX8-NEXT: s_sext_i32_i16 s15, s7
1256+
; GFX8-NEXT: s_bfe_i32 s7, s7, 0x100010
1257+
; GFX8-NEXT: s_ashr_i32 s5, s10, s14
12291258
; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1230-
; GFX8-NEXT: s_sext_i32_i16 s6, s10
1231-
; GFX8-NEXT: s_or_b32 s0, s0, s4
1232-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1233-
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1234-
; GFX8-NEXT: s_ashr_i32 s6, s6, s14
1235-
; GFX8-NEXT: s_sext_i32_i16 s3, s3
1236-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1237-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1238-
; GFX8-NEXT: s_lshr_b32 s15, s7, 16
1259+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1260+
; GFX8-NEXT: s_and_b32 s4, s4, 0xffff
1261+
; GFX8-NEXT: s_ashr_i32 s8, s8, s12
1262+
; GFX8-NEXT: s_ashr_i32 s6, s11, s15
12391263
; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1240-
; GFX8-NEXT: s_sext_i32_i16 s7, s11
12411264
; GFX8-NEXT: s_or_b32 s1, s1, s4
1242-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1243-
; GFX8-NEXT: s_ashr_i32 s7, s7, s15
1244-
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1245-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1265+
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1266+
; GFX8-NEXT: s_and_b32 s4, s5, 0xffff
1267+
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
1268+
; GFX8-NEXT: s_and_b32 s7, s8, 0xffff
12461269
; GFX8-NEXT: s_or_b32 s2, s2, s4
1247-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1248-
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1249-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1270+
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1271+
; GFX8-NEXT: s_and_b32 s4, s6, 0xffff
1272+
; GFX8-NEXT: s_or_b32 s0, s0, s7
12501273
; GFX8-NEXT: s_or_b32 s3, s3, s4
12511274
; GFX8-NEXT: ; return to shader part epilog
12521275
;

0 commit comments

Comments
 (0)