@@ -171,17 +171,17 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
171
171
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
172
172
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
173
173
; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
174
- ; CHECK-NEXT: v_mul_lo_u32 v1, v0, v2
175
- ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
176
- ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1
177
- ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
178
- ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
179
- ; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2
180
- ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
181
- ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
182
- ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
183
- ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
184
174
; CHECK-NEXT: v_mov_b32_e32 v1, 0
175
+ ; CHECK-NEXT: v_mul_lo_u32 v3, v0, v2
176
+ ; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v0
177
+ ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v4, v3
178
+ ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
179
+ ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
180
+ ; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v2
181
+ ; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
182
+ ; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v0
183
+ ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
184
+ ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
185
185
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
186
186
; CHECK-NEXT: s_setpc_b64 s[30:31]
187
187
%result = sdiv i64 %num , %den
@@ -335,7 +335,6 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
335
335
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
336
336
; CHECK-NEXT: .LBB1_3: ; %Flow
337
337
; CHECK-NEXT: s_xor_b32 s0, s0, 1
338
- ; CHECK-NEXT: s_and_b32 s0, s0, 1
339
338
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
340
339
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
341
340
; CHECK-NEXT: ; %bb.4:
@@ -809,17 +808,17 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
809
808
; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
810
809
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
811
810
; CGP-NEXT: v_mul_hi_u32 v0, v10, v0
812
- ; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
813
- ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
814
- ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v1
815
- ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
816
- ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
817
- ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v4
818
- ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
819
- ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
820
- ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
821
- ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
822
811
; CGP-NEXT: v_mov_b32_e32 v1, 0
812
+ ; CGP-NEXT: v_mul_lo_u32 v2, v0, v4
813
+ ; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
814
+ ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v10, v2
815
+ ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
816
+ ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
817
+ ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v4
818
+ ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
819
+ ; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
820
+ ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
821
+ ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
823
822
; CGP-NEXT: .LBB2_4:
824
823
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
825
824
; CGP-NEXT: v_or_b32_e32 v3, v9, v7
@@ -981,17 +980,17 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
981
980
; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
982
981
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
983
982
; CGP-NEXT: v_mul_hi_u32 v2, v8, v2
984
- ; CGP-NEXT: v_mul_lo_u32 v3, v2, v6
985
- ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
986
- ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v3
987
- ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
988
- ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
989
- ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v6
990
- ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
991
- ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
992
- ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
993
- ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
994
983
; CGP-NEXT: v_mov_b32_e32 v3, 0
984
+ ; CGP-NEXT: v_mul_lo_u32 v4, v2, v6
985
+ ; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
986
+ ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
987
+ ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
988
+ ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
989
+ ; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v4, v6
990
+ ; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
991
+ ; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
992
+ ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
993
+ ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
995
994
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
996
995
; CGP-NEXT: s_setpc_b64 s[30:31]
997
996
%result = sdiv <2 x i64 > %num , %den
@@ -1817,17 +1816,17 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
1817
1816
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
1818
1817
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1819
1818
; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0
1820
- ; CHECK-NEXT: v_mul_lo_u32 v1, v0, v5
1821
- ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
1822
- ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
1823
- ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
1824
- ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1825
- ; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v5
1826
- ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1827
- ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
1828
- ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
1829
- ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1830
1819
; CHECK-NEXT: v_mov_b32_e32 v1, 0
1820
+ ; CHECK-NEXT: v_mul_lo_u32 v2, v0, v5
1821
+ ; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v0
1822
+ ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v3, v2
1823
+ ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
1824
+ ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1825
+ ; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v5
1826
+ ; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1827
+ ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
1828
+ ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
1829
+ ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1831
1830
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
1832
1831
; CHECK-NEXT: s_setpc_b64 s[30:31]
1833
1832
%shl.y = shl i64 4096 , %y
@@ -2279,17 +2278,17 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
2279
2278
; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
2280
2279
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
2281
2280
; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
2282
- ; CGP-NEXT: v_mul_lo_u32 v1, v0, v11
2283
- ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
2284
- ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1
2285
- ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v11
2286
- ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2287
- ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v11
2288
- ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2289
- ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
2290
- ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v11
2291
- ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2292
2281
; CGP-NEXT: v_mov_b32_e32 v1, 0
2282
+ ; CGP-NEXT: v_mul_lo_u32 v2, v0, v11
2283
+ ; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
2284
+ ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
2285
+ ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v11
2286
+ ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2287
+ ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v11
2288
+ ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
2289
+ ; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
2290
+ ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v11
2291
+ ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2293
2292
; CGP-NEXT: .LBB8_4:
2294
2293
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
2295
2294
; CGP-NEXT: v_or_b32_e32 v3, v7, v10
@@ -2453,17 +2452,17 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
2453
2452
; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
2454
2453
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
2455
2454
; CGP-NEXT: v_mul_hi_u32 v2, v5, v2
2456
- ; CGP-NEXT: v_mul_lo_u32 v3, v2, v9
2457
- ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
2458
- ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v3
2459
- ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
2460
- ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2461
- ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v9
2462
- ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
2463
- ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
2464
- ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
2465
- ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2466
2455
; CGP-NEXT: v_mov_b32_e32 v3, 0
2456
+ ; CGP-NEXT: v_mul_lo_u32 v4, v2, v9
2457
+ ; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v2
2458
+ ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v5, v4
2459
+ ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v9
2460
+ ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
2461
+ ; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v4, v9
2462
+ ; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
2463
+ ; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
2464
+ ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v9
2465
+ ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
2467
2466
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
2468
2467
; CGP-NEXT: s_setpc_b64 s[30:31]
2469
2468
%shl.y = shl <2 x i64 > <i64 4096 , i64 4096 >, %y
0 commit comments