@@ -222,7 +222,7 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
222
222
; GFX8-NEXT: s_not_b64 exec, exec
223
223
; GFX8-NEXT: v_mov_b32_e32 v2, 0
224
224
; GFX8-NEXT: s_not_b64 exec, exec
225
- ; GFX8-NEXT: s_or_saveexec_b64 s[12:13 ], -1
225
+ ; GFX8-NEXT: s_or_saveexec_b64 s[10:11 ], -1
226
226
; GFX8-NEXT: s_nop 0
227
227
; GFX8-NEXT: v_add_u32_dpp v2, vcc, v2, v2 row_shr:1 row_mask:0xf bank_mask:0xf bound_ctrl:0
228
228
; GFX8-NEXT: s_nop 1
@@ -235,19 +235,19 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
235
235
; GFX8-NEXT: v_add_u32_dpp v2, vcc, v2, v2 row_bcast:15 row_mask:0xa bank_mask:0xf
236
236
; GFX8-NEXT: s_nop 1
237
237
; GFX8-NEXT: v_add_u32_dpp v2, vcc, v2, v2 row_bcast:31 row_mask:0xc bank_mask:0xf
238
- ; GFX8-NEXT: v_readlane_b32 s10 , v2, 63
238
+ ; GFX8-NEXT: v_readlane_b32 s12 , v2, 63
239
239
; GFX8-NEXT: s_nop 0
240
240
; GFX8-NEXT: v_mov_b32_dpp v1, v2 wave_shr:1 row_mask:0xf bank_mask:0xf
241
- ; GFX8-NEXT: s_mov_b64 exec, s[12:13 ]
241
+ ; GFX8-NEXT: s_mov_b64 exec, s[10:11 ]
242
242
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
243
243
; GFX8-NEXT: ; implicit-def: $vgpr0
244
- ; GFX8-NEXT: s_and_saveexec_b64 s[12:13 ], vcc
244
+ ; GFX8-NEXT: s_and_saveexec_b64 s[10:11 ], vcc
245
245
; GFX8-NEXT: s_cbranch_execz BB1_3
246
246
; GFX8-NEXT: ; %bb.2:
247
- ; GFX8-NEXT: v_mov_b32_e32 v0, s10
247
+ ; GFX8-NEXT: v_mov_b32_e32 v0, s12
248
248
; GFX8-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
249
249
; GFX8-NEXT: BB1_3:
250
- ; GFX8-NEXT: s_or_b64 exec, exec, s[12:13 ]
250
+ ; GFX8-NEXT: s_or_b64 exec, exec, s[10:11 ]
251
251
; GFX8-NEXT: s_waitcnt vmcnt(0)
252
252
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
253
253
; GFX8-NEXT: v_mov_b32_e32 v0, v1
@@ -279,7 +279,7 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
279
279
; GFX9-NEXT: s_not_b64 exec, exec
280
280
; GFX9-NEXT: v_mov_b32_e32 v2, 0
281
281
; GFX9-NEXT: s_not_b64 exec, exec
282
- ; GFX9-NEXT: s_or_saveexec_b64 s[12:13 ], -1
282
+ ; GFX9-NEXT: s_or_saveexec_b64 s[10:11 ], -1
283
283
; GFX9-NEXT: s_nop 0
284
284
; GFX9-NEXT: v_add_u32_dpp v2, v2, v2 row_shr:1 row_mask:0xf bank_mask:0xf bound_ctrl:0
285
285
; GFX9-NEXT: s_nop 1
@@ -292,19 +292,19 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
292
292
; GFX9-NEXT: v_add_u32_dpp v2, v2, v2 row_bcast:15 row_mask:0xa bank_mask:0xf
293
293
; GFX9-NEXT: s_nop 1
294
294
; GFX9-NEXT: v_add_u32_dpp v2, v2, v2 row_bcast:31 row_mask:0xc bank_mask:0xf
295
- ; GFX9-NEXT: v_readlane_b32 s10 , v2, 63
295
+ ; GFX9-NEXT: v_readlane_b32 s12 , v2, 63
296
296
; GFX9-NEXT: s_nop 0
297
297
; GFX9-NEXT: v_mov_b32_dpp v1, v2 wave_shr:1 row_mask:0xf bank_mask:0xf
298
- ; GFX9-NEXT: s_mov_b64 exec, s[12:13 ]
298
+ ; GFX9-NEXT: s_mov_b64 exec, s[10:11 ]
299
299
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
300
300
; GFX9-NEXT: ; implicit-def: $vgpr0
301
- ; GFX9-NEXT: s_and_saveexec_b64 s[12:13 ], vcc
301
+ ; GFX9-NEXT: s_and_saveexec_b64 s[10:11 ], vcc
302
302
; GFX9-NEXT: s_cbranch_execz BB1_3
303
303
; GFX9-NEXT: ; %bb.2:
304
- ; GFX9-NEXT: v_mov_b32_e32 v0, s10
304
+ ; GFX9-NEXT: v_mov_b32_e32 v0, s12
305
305
; GFX9-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
306
306
; GFX9-NEXT: BB1_3:
307
- ; GFX9-NEXT: s_or_b64 exec, exec, s[12:13 ]
307
+ ; GFX9-NEXT: s_or_b64 exec, exec, s[10:11 ]
308
308
; GFX9-NEXT: s_waitcnt vmcnt(0)
309
309
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
310
310
; GFX9-NEXT: v_mov_b32_e32 v0, v1
@@ -336,36 +336,36 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
336
336
; GFX1064-NEXT: s_not_b64 exec, exec
337
337
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
338
338
; GFX1064-NEXT: s_not_b64 exec, exec
339
- ; GFX1064-NEXT: s_or_saveexec_b64 s[12:13 ], -1
339
+ ; GFX1064-NEXT: s_or_saveexec_b64 s[10:11 ], -1
340
340
; GFX1064-NEXT: v_add_nc_u32_dpp v2, v2, v2 row_shr:1 row_mask:0xf bank_mask:0xf bound_ctrl:0
341
341
; GFX1064-NEXT: v_add_nc_u32_dpp v2, v2, v2 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:0
342
342
; GFX1064-NEXT: v_add_nc_u32_dpp v2, v2, v2 row_shr:4 row_mask:0xf bank_mask:0xf bound_ctrl:0
343
343
; GFX1064-NEXT: v_add_nc_u32_dpp v2, v2, v2 row_shr:8 row_mask:0xf bank_mask:0xf bound_ctrl:0
344
344
; GFX1064-NEXT: v_mov_b32_e32 v3, v2
345
345
; GFX1064-NEXT: v_permlanex16_b32 v3, v3, -1, -1
346
346
; GFX1064-NEXT: v_add_nc_u32_dpp v2, v3, v2 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
347
- ; GFX1064-NEXT: v_readlane_b32 s10 , v2, 31
348
- ; GFX1064-NEXT: v_mov_b32_e32 v3, s10
347
+ ; GFX1064-NEXT: v_readlane_b32 s12 , v2, 31
348
+ ; GFX1064-NEXT: v_mov_b32_e32 v3, s12
349
349
; GFX1064-NEXT: v_add_nc_u32_dpp v2, v3, v2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
350
- ; GFX1064-NEXT: v_readlane_b32 s10 , v2, 15
350
+ ; GFX1064-NEXT: v_readlane_b32 s12 , v2, 15
351
351
; GFX1064-NEXT: v_mov_b32_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf
352
- ; GFX1064-NEXT: v_readlane_b32 s11 , v2, 31
353
- ; GFX1064-NEXT: v_writelane_b32 v1, s10 , 16
354
- ; GFX1064-NEXT: v_readlane_b32 s10 , v2, 63
355
- ; GFX1064-NEXT: v_writelane_b32 v1, s11 , 32
356
- ; GFX1064-NEXT: v_readlane_b32 s11 , v2, 47
357
- ; GFX1064-NEXT: v_writelane_b32 v1, s11 , 48
358
- ; GFX1064-NEXT: s_mov_b64 exec, s[12:13 ]
352
+ ; GFX1064-NEXT: v_readlane_b32 s13 , v2, 31
353
+ ; GFX1064-NEXT: v_writelane_b32 v1, s12 , 16
354
+ ; GFX1064-NEXT: v_readlane_b32 s12 , v2, 63
355
+ ; GFX1064-NEXT: v_writelane_b32 v1, s13 , 32
356
+ ; GFX1064-NEXT: v_readlane_b32 s13 , v2, 47
357
+ ; GFX1064-NEXT: v_writelane_b32 v1, s13 , 48
358
+ ; GFX1064-NEXT: s_mov_b64 exec, s[10:11 ]
359
359
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
360
360
; GFX1064-NEXT: ; implicit-def: $vgpr0
361
- ; GFX1064-NEXT: s_and_saveexec_b64 s[12:13 ], vcc
361
+ ; GFX1064-NEXT: s_and_saveexec_b64 s[30:31 ], vcc
362
362
; GFX1064-NEXT: s_cbranch_execz BB1_3
363
363
; GFX1064-NEXT: ; %bb.2:
364
- ; GFX1064-NEXT: v_mov_b32_e32 v0, s10
364
+ ; GFX1064-NEXT: v_mov_b32_e32 v0, s12
365
365
; GFX1064-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
366
366
; GFX1064-NEXT: BB1_3:
367
367
; GFX1064-NEXT: v_nop
368
- ; GFX1064-NEXT: s_or_b64 exec, exec, s[12:13 ]
368
+ ; GFX1064-NEXT: s_or_b64 exec, exec, s[30:31 ]
369
369
; GFX1064-NEXT: s_waitcnt vmcnt(0)
370
370
; GFX1064-NEXT: v_readfirstlane_b32 s4, v0
371
371
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
@@ -397,29 +397,29 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
397
397
; GFX1032-NEXT: s_not_b32 exec_lo, exec_lo
398
398
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
399
399
; GFX1032-NEXT: s_not_b32 exec_lo, exec_lo
400
- ; GFX1032-NEXT: s_or_saveexec_b32 s10 , -1
400
+ ; GFX1032-NEXT: s_or_saveexec_b32 s9 , -1
401
401
; GFX1032-NEXT: v_add_nc_u32_dpp v2, v2, v2 row_shr:1 row_mask:0xf bank_mask:0xf bound_ctrl:0
402
402
; GFX1032-NEXT: v_add_nc_u32_dpp v2, v2, v2 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:0
403
403
; GFX1032-NEXT: v_add_nc_u32_dpp v2, v2, v2 row_shr:4 row_mask:0xf bank_mask:0xf bound_ctrl:0
404
404
; GFX1032-NEXT: v_add_nc_u32_dpp v2, v2, v2 row_shr:8 row_mask:0xf bank_mask:0xf bound_ctrl:0
405
405
; GFX1032-NEXT: v_mov_b32_e32 v3, v2
406
406
; GFX1032-NEXT: v_permlanex16_b32 v3, v3, -1, -1
407
407
; GFX1032-NEXT: v_add_nc_u32_dpp v2, v3, v2 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
408
- ; GFX1032-NEXT: v_readlane_b32 s9 , v2, 31
408
+ ; GFX1032-NEXT: v_readlane_b32 s10 , v2, 31
409
409
; GFX1032-NEXT: v_readlane_b32 s11, v2, 15
410
410
; GFX1032-NEXT: v_mov_b32_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf
411
411
; GFX1032-NEXT: v_writelane_b32 v1, s11, 16
412
- ; GFX1032-NEXT: s_mov_b32 exec_lo, s10
412
+ ; GFX1032-NEXT: s_mov_b32 exec_lo, s9
413
413
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
414
414
; GFX1032-NEXT: ; implicit-def: $vgpr0
415
- ; GFX1032-NEXT: s_and_saveexec_b32 s14 , vcc_lo
415
+ ; GFX1032-NEXT: s_and_saveexec_b32 s9 , vcc_lo
416
416
; GFX1032-NEXT: s_cbranch_execz BB1_3
417
417
; GFX1032-NEXT: ; %bb.2:
418
- ; GFX1032-NEXT: v_mov_b32_e32 v0, s9
418
+ ; GFX1032-NEXT: v_mov_b32_e32 v0, s10
419
419
; GFX1032-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
420
420
; GFX1032-NEXT: BB1_3:
421
421
; GFX1032-NEXT: v_nop
422
- ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s14
422
+ ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s9
423
423
; GFX1032-NEXT: s_waitcnt vmcnt(0)
424
424
; GFX1032-NEXT: v_readfirstlane_b32 s4, v0
425
425
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
0 commit comments