@@ -49,7 +49,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x
49
49
; GCN-NEXT: v_mov_b32_e32 v9, s17
50
50
; GCN-NEXT: v_mov_b32_e32 v10, s18
51
51
; GCN-NEXT: v_mov_b32_e32 v11, s19
52
- ; GCN-NEXT: s_nop 3
52
+ ; GCN-NEXT: s_nop 4
53
53
; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
54
54
; GCN-NEXT: s_waitcnt vmcnt(0)
55
55
; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -122,7 +122,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0
122
122
; GCN-NEXT: v_mov_b32_e32 v9, s17
123
123
; GCN-NEXT: v_mov_b32_e32 v10, s18
124
124
; GCN-NEXT: v_mov_b32_e32 v11, s19
125
- ; GCN-NEXT: s_nop 3
125
+ ; GCN-NEXT: s_nop 4
126
126
; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
127
127
; GCN-NEXT: s_waitcnt vmcnt(0)
128
128
; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -179,7 +179,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac(<8 x bfloat> %arg0, <8 x b
179
179
; GCN-NEXT: s_nop 1
180
180
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
181
181
; GCN-NEXT: s_nop 7
182
- ; GCN-NEXT: s_nop 2
182
+ ; GCN-NEXT: s_nop 3
183
183
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
184
184
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
185
185
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -224,7 +224,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac__flags(<8 x bfloat> %arg0,
224
224
; GCN-NEXT: s_nop 1
225
225
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
226
226
; GCN-NEXT: s_nop 7
227
- ; GCN-NEXT: s_nop 2
227
+ ; GCN-NEXT: s_nop 3
228
228
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
229
229
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
230
230
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -417,7 +417,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac(<8 x bfloat>
417
417
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
418
418
; GCN-NEXT: v_mov_b32_e32 v0, 0
419
419
; GCN-NEXT: s_nop 7
420
- ; GCN-NEXT: s_nop 1
420
+ ; GCN-NEXT: s_nop 2
421
421
; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
422
422
; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
423
423
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -459,7 +459,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac_flags(<8 x bf
459
459
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
460
460
; GCN-NEXT: v_mov_b32_e32 v0, 0
461
461
; GCN-NEXT: s_nop 7
462
- ; GCN-NEXT: s_nop 1
462
+ ; GCN-NEXT: s_nop 2
463
463
; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
464
464
; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
465
465
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
0 commit comments