@@ -476,28 +476,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
476
476
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
477
477
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
478
478
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
479
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
480
+ ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
481
+ ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
479
482
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
480
483
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
481
484
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
482
485
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
483
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
484
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
485
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
486
- ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
487
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
488
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
489
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
490
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
486
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
491
487
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
492
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
493
- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
494
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
488
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
489
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
495
490
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
496
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
497
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
491
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
498
492
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
499
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
500
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
493
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
501
494
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
502
495
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
503
496
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -508,7 +501,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
508
501
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
509
502
; GFX9-O0-NEXT: s_mov_b32 s14, s13
510
503
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
511
- ; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
512
504
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
513
505
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
514
506
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -1042,10 +1034,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
1042
1034
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1043
1035
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1044
1036
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1045
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1046
- ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1047
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1048
- ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1037
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1038
+ ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1039
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1040
+ ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1049
1041
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
1050
1042
; GFX9-O0-NEXT: s_mov_b32 s5, s6
1051
1043
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -2747,28 +2739,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2747
2739
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2748
2740
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
2749
2741
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
2742
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2743
+ ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2744
+ ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2750
2745
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2751
2746
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2752
2747
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
2753
2748
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2754
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v9
2755
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2756
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2757
- ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2758
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2759
- ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
2760
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2761
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2749
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2762
2750
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
2763
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2764
- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2765
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[8:9]
2751
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2752
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
2766
2753
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2767
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2768
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[14:15]
2754
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
2769
2755
; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
2770
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2771
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[14:15]
2756
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
2772
2757
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
2773
2758
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
2774
2759
; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
@@ -2779,7 +2764,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2779
2764
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
2780
2765
; GFX9-O0-NEXT: s_mov_b32 s14, s13
2781
2766
; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
2782
- ; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
2783
2767
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2784
2768
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
2785
2769
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
@@ -3313,10 +3297,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
3313
3297
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3314
3298
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3315
3299
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3316
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3317
- ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3318
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3319
- ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3300
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3301
+ ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3302
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3303
+ ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3320
3304
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
3321
3305
; GFX9-O0-NEXT: s_mov_b32 s5, s6
3322
3306
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
0 commit comments