@@ -506,8 +506,8 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
506
506
; GFX908-NEXT: v_accvgpr_write_b32 a3, 0
507
507
; GFX908-NEXT: v_accvgpr_write_b32 a2, 0
508
508
; GFX908-NEXT: v_accvgpr_write_b32 a0, 0
509
- ; GFX908-NEXT: s_mov_b32 s0, 16
510
509
; GFX908-NEXT: v_mov_b32_e32 v0, 1.0
510
+ ; GFX908-NEXT: s_mov_b32 s0, 16
511
511
; GFX908-NEXT: v_mov_b32_e32 v1, 2.0
512
512
; GFX908-NEXT: .LBB2_1: ; %for.cond.preheader
513
513
; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -566,7 +566,6 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
566
566
;
567
567
; GFX90A-LABEL: test_mfma_loop_non_splat:
568
568
; GFX90A: ; %bb.0: ; %entry
569
- ; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
570
569
; GFX90A-NEXT: v_accvgpr_write_b32 a1, 1.0
571
570
; GFX90A-NEXT: v_accvgpr_write_b32 a31, 0
572
571
; GFX90A-NEXT: v_accvgpr_write_b32 a30, 0
@@ -600,6 +599,7 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
600
599
; GFX90A-NEXT: v_accvgpr_write_b32 a2, 0
601
600
; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
602
601
; GFX90A-NEXT: s_mov_b32 s0, 16
602
+ ; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
603
603
; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
604
604
; GFX90A-NEXT: .LBB2_1: ; %for.cond.preheader
605
605
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -626,7 +626,6 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
626
626
;
627
627
; GFX942-LABEL: test_mfma_loop_non_splat:
628
628
; GFX942: ; %bb.0: ; %entry
629
- ; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
630
629
; GFX942-NEXT: v_accvgpr_write_b32 a1, 1.0
631
630
; GFX942-NEXT: v_accvgpr_write_b32 a31, 0
632
631
; GFX942-NEXT: v_accvgpr_write_b32 a30, 0
@@ -660,6 +659,7 @@ define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
660
659
; GFX942-NEXT: v_accvgpr_write_b32 a2, 0
661
660
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
662
661
; GFX942-NEXT: s_mov_b32 s0, 16
662
+ ; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
663
663
; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
664
664
; GFX942-NEXT: .LBB2_1: ; %for.cond.preheader
665
665
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
0 commit comments