Skip to content

Commit 07ba9a2

Browse files
authored
merge main into amd-staging (llvm#920)
2 parents 63590f8 + 40051c9 commit 07ba9a2

File tree

7 files changed

+41
-29
lines changed

7 files changed

+41
-29
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,11 +682,21 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
682682
if (!CanCommute)
683683
return false;
684684

685+
MachineOperand &Op = MI->getOperand(OpNo);
686+
MachineOperand &CommutedOp = MI->getOperand(CommuteOpNo);
687+
685688
// One of operands might be an Imm operand, and OpNo may refer to it after
686689
// the call of commuteInstruction() below. Such situations are avoided
687690
// here explicitly as OpNo must be a register operand to be a candidate
688691
// for memory folding.
689-
if (!MI->getOperand(OpNo).isReg() || !MI->getOperand(CommuteOpNo).isReg())
692+
if (!Op.isReg() || !CommutedOp.isReg())
693+
return false;
694+
695+
// The same situation with an immediate could reproduce if both inputs are
696+
// the same register.
697+
if (Op.isReg() && CommutedOp.isReg() &&
698+
(Op.getReg() == CommutedOp.getReg() &&
699+
Op.getSubReg() == CommutedOp.getSubReg()))
690700
return false;
691701

692702
if (!TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo))

llvm/test/CodeGen/AMDGPU/dag-divergence.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ define amdgpu_kernel void @flat_load_maybe_divergent(ptr addrspace(4) %k, ptr %f
3737
; GCN-LABEL: {{^}}wide_carry_divergence_error:
3838
; GCN: v_sub_u32_e32
3939
; GCN: v_subb_u32_e32
40-
; GCN: v_subbrev_u32_e32
41-
; GCN: v_subbrev_u32_e32
40+
; GCN: v_subb_u32_e32
41+
; GCN: v_subb_u32_e32
4242
define <2 x i128> @wide_carry_divergence_error(i128 %arg) {
4343
%i = call i128 @llvm.ctlz.i128(i128 %arg, i1 false)
4444
%i1 = sub i128 0, %i

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
6565
; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, 0, vcc
6666
; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3
6767
; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v7, vcc
68-
; GFX9-NEXT: v_subbrev_co_u32_e32 v4, vcc, 0, v5, vcc
69-
; GFX9-NEXT: v_subbrev_co_u32_e32 v5, vcc, 0, v5, vcc
68+
; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, 0, v5, vcc
69+
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, 0, v5, vcc
7070
; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f
7171
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
7272
; GFX9-NEXT: v_mov_b32_e32 v18, v16
@@ -2355,8 +2355,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
23552355
; GFX9-NEXT: v_sub_co_u32_e32 v12, vcc, v8, v9
23562356
; GFX9-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v13, vcc
23572357
; GFX9-NEXT: v_mov_b32_e32 v8, 0
2358-
; GFX9-NEXT: v_subbrev_co_u32_e32 v14, vcc, 0, v8, vcc
2359-
; GFX9-NEXT: v_subbrev_co_u32_e32 v15, vcc, 0, v8, vcc
2358+
; GFX9-NEXT: v_subb_co_u32_e32 v14, vcc, 0, v8, vcc
2359+
; GFX9-NEXT: v_subb_co_u32_e32 v15, vcc, 0, v8, vcc
23602360
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[12:13]
23612361
; GFX9-NEXT: v_or_b32_e32 v10, v13, v15
23622362
; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc

llvm/test/CodeGen/AMDGPU/div_v2i128.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
6666
; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
6767
; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v8, v9, vcc
6868
; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v2
69-
; SDAG-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v18, vcc
69+
; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v18, vcc
7070
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[2:3]
7171
; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
72-
; SDAG-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v18, vcc
72+
; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v18, vcc
7373
; SDAG-NEXT: v_or_b32_e32 v8, v8, v10
7474
; SDAG-NEXT: v_or_b32_e32 v9, v3, v11
7575
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
@@ -263,10 +263,10 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
263263
; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v4, v13
264264
; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v9, v12, vcc
265265
; SDAG-NEXT: v_xor_b32_e32 v9, 0x7f, v4
266-
; SDAG-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v8, vcc
266+
; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v8, vcc
267267
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[4:5]
268268
; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
269-
; SDAG-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v8, vcc
269+
; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc
270270
; SDAG-NEXT: v_or_b32_e32 v8, v9, v10
271271
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
272272
; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
@@ -872,10 +872,10 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
872872
; SDAG-NEXT: v_sub_i32_e32 v22, vcc, v16, v18
873873
; SDAG-NEXT: v_subb_u32_e32 v23, vcc, v20, v17, vcc
874874
; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v22
875-
; SDAG-NEXT: v_subbrev_u32_e32 v24, vcc, 0, v28, vcc
875+
; SDAG-NEXT: v_subb_u32_e32 v24, vcc, 0, v28, vcc
876876
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[22:23]
877877
; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
878-
; SDAG-NEXT: v_subbrev_u32_e32 v25, vcc, 0, v28, vcc
878+
; SDAG-NEXT: v_subb_u32_e32 v25, vcc, 0, v28, vcc
879879
; SDAG-NEXT: v_or_b32_e32 v16, v16, v24
880880
; SDAG-NEXT: v_or_b32_e32 v17, v23, v25
881881
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[24:25]
@@ -1047,10 +1047,10 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
10471047
; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
10481048
; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v8, v1, vcc
10491049
; SDAG-NEXT: v_xor_b32_e32 v2, 0x7f, v0
1050-
; SDAG-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v24, vcc
1050+
; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v24, vcc
10511051
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[0:1]
10521052
; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1053-
; SDAG-NEXT: v_subbrev_u32_e32 v21, vcc, 0, v24, vcc
1053+
; SDAG-NEXT: v_subb_u32_e32 v21, vcc, 0, v24, vcc
10541054
; SDAG-NEXT: v_or_b32_e32 v2, v2, v20
10551055
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21]
10561056
; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
@@ -1619,10 +1619,10 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
16191619
; SDAG-NEXT: v_sub_i32_e32 v10, vcc, v8, v10
16201620
; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v11, v18, vcc
16211621
; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v10
1622-
; SDAG-NEXT: v_subbrev_u32_e32 v18, vcc, 0, v19, vcc
1622+
; SDAG-NEXT: v_subb_u32_e32 v18, vcc, 0, v19, vcc
16231623
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
16241624
; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5]
1625-
; SDAG-NEXT: v_subbrev_u32_e32 v19, vcc, 0, v19, vcc
1625+
; SDAG-NEXT: v_subb_u32_e32 v19, vcc, 0, v19, vcc
16261626
; SDAG-NEXT: v_or_b32_e32 v8, v8, v18
16271627
; SDAG-NEXT: v_or_b32_e32 v9, v11, v19
16281628
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19]
@@ -1814,10 +1814,10 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
18141814
; SDAG-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
18151815
; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v13, v12, vcc
18161816
; SDAG-NEXT: v_xor_b32_e32 v14, 0x7f, v10
1817-
; SDAG-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v18, vcc
1817+
; SDAG-NEXT: v_subb_u32_e32 v12, vcc, 0, v18, vcc
18181818
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
18191819
; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1820-
; SDAG-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v18, vcc
1820+
; SDAG-NEXT: v_subb_u32_e32 v13, vcc, 0, v18, vcc
18211821
; SDAG-NEXT: v_or_b32_e32 v14, v14, v12
18221822
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13]
18231823
; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
@@ -2502,10 +2502,10 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
25022502
; SDAG-NEXT: v_sub_i32_e32 v18, vcc, v16, v18
25032503
; SDAG-NEXT: v_subb_u32_e32 v19, vcc, v20, v17, vcc
25042504
; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v18
2505-
; SDAG-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v28, vcc
2505+
; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v28, vcc
25062506
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[18:19]
25072507
; SDAG-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5]
2508-
; SDAG-NEXT: v_subbrev_u32_e32 v21, vcc, 0, v28, vcc
2508+
; SDAG-NEXT: v_subb_u32_e32 v21, vcc, 0, v28, vcc
25092509
; SDAG-NEXT: v_or_b32_e32 v16, v16, v20
25102510
; SDAG-NEXT: v_or_b32_e32 v17, v19, v21
25112511
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21]
@@ -2677,10 +2677,10 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
26772677
; SDAG-NEXT: v_sub_i32_e32 v16, vcc, v16, v18
26782678
; SDAG-NEXT: v_subb_u32_e32 v17, vcc, v20, v17, vcc
26792679
; SDAG-NEXT: v_xor_b32_e32 v18, 0x7f, v16
2680-
; SDAG-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v28, vcc
2680+
; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v28, vcc
26812681
; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[16:17]
26822682
; SDAG-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5]
2683-
; SDAG-NEXT: v_subbrev_u32_e32 v21, vcc, 0, v28, vcc
2683+
; SDAG-NEXT: v_subb_u32_e32 v21, vcc, 0, v28, vcc
26842684
; SDAG-NEXT: v_or_b32_e32 v18, v18, v20
26852685
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21]
26862686
; SDAG-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc

llvm/test/CodeGen/AMDGPU/rem_i128.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
6666
; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, 0, vcc
6767
; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v7
6868
; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v8, v11, vcc
69-
; GFX9-NEXT: v_subbrev_co_u32_e32 v8, vcc, 0, v9, vcc
70-
; GFX9-NEXT: v_subbrev_co_u32_e32 v9, vcc, 0, v9, vcc
69+
; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v9, vcc
70+
; GFX9-NEXT: v_subb_co_u32_e32 v9, vcc, 0, v9, vcc
7171
; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f
7272
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7]
7373
; GFX9-NEXT: v_or_b32_e32 v12, v7, v9
@@ -1541,8 +1541,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
15411541
; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, v8, v9
15421542
; GFX9-NEXT: v_subb_co_u32_e32 v9, vcc, v10, v12, vcc
15431543
; GFX9-NEXT: v_mov_b32_e32 v11, 0
1544-
; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v11, vcc
1545-
; GFX9-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v11, vcc
1544+
; GFX9-NEXT: v_subb_co_u32_e32 v10, vcc, 0, v11, vcc
1545+
; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v11, vcc
15461546
; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
15471547
; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
15481548
; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]

llvm/test/CodeGen/AMDGPU/si-fold-operands-commute-same-operands-assert.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ body: |
4747
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4848
; CHECK-NEXT: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 0, killed [[COPY]], implicit-def $vcc, implicit $exec
4949
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
50-
; CHECK-NEXT: [[V_SUBBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBBREV_U32_e32 0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $vcc, implicit $exec
51-
; CHECK-NEXT: $vgpr4 = COPY [[V_SUBBREV_U32_e32_]]
50+
; CHECK-NEXT: [[V_SUBB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBB_U32_e32 0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $vcc, implicit $exec
51+
; CHECK-NEXT: $vgpr4 = COPY [[V_SUBB_U32_e32_]]
5252
; CHECK-NEXT: SI_RETURN implicit $vgpr4
5353
%0:vgpr_32 = COPY $vgpr0
5454
%1:sreg_64 = S_MOV_B64 0

revert_patches.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,5 @@ revert : breaks build of 5 spec-accel codes
3535
25f29ee377b1 - [flang][OpenMP] Update all `lastprivate` symbols, not just in clauses (#125628)
3636
Kareem, Ron
3737
---
38+
a3165398db07 AMDGPU: Fix overly conservative immediate operand check
39+
---

0 commit comments

Comments
 (0)