Skip to content

Commit 70b7f83

Browse files
committed
[LSV] Fix codegen tests
Also, avoid merging the same class into itself. Change-Id: Ib64fd98de5c908262947648ad14dc53b61814642
1 parent 826c475 commit 70b7f83

37 files changed

+1882
-1908
lines changed

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,8 @@ class Vectorizer {
325325
Instruction *ChainElem, Instruction *ChainBegin,
326326
const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets);
327327

328-
/// Merge the equivalence classes if casts could be inserted in one to match
329-
/// the scalar bitwidth of the instructions in the other class.
328+
/// Merge equivalence classes if casts could be inserted in one to match
329+
/// the total bitwidth of the instructions.
330330
void insertCastsToMergeClasses(EquivalenceClassMap &EQClasses);
331331

332332
/// Merges the equivalence classes if they have underlying objects that differ
@@ -1346,7 +1346,10 @@ void Vectorizer::insertCastsToMergeClasses(EquivalenceClassMap &EQClasses) {
13461346
DenseSet<EqClassKey> ClassesToErase;
13471347
for (auto EC1 : EQClasses) {
13481348
for (auto EC2 : EQClasses) {
1349-
if (ClassesToErase.contains(EC2.first) || EC1 <= EC2)
1349+
// Skip if EC2 was already merged before, EC1 follows EC2 in the
1350+
// collection or EC1 is the same as EC2.
1351+
if (ClassesToErase.contains(EC2.first) || EC1 <= EC2 ||
1352+
EC1.first == EC2.first)
13501353
continue;
13511354

13521355
auto [Ptr1, AS1, TySize1, IsLoad1] = EC1.first;

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Lines changed: 671 additions & 648 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/bitop3.ll

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,10 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) {
113113
}
114114

115115
define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
116-
; GFX950-SDAG-LABEL: and_and_and:
117-
; GFX950-SDAG: ; %bb.0:
118-
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
119-
; GFX950-SDAG-NEXT: ; return to shader part epilog
120-
;
121-
; GFX950-GISEL-LABEL: and_and_and:
122-
; GFX950-GISEL: ; %bb.0:
123-
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
124-
; GFX950-GISEL-NEXT: ; return to shader part epilog
116+
; GCN-LABEL: and_and_and:
117+
; GCN: ; %bb.0:
118+
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
119+
; GCN-NEXT: ; return to shader part epilog
125120
%and1 = and i32 %a, %c
126121
%and2 = and i32 %and1, %b
127122
%ret_cast = bitcast i32 %and2 to float
@@ -131,15 +126,10 @@ define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
131126
; ========= Multi bit functions =========
132127

133128
define amdgpu_ps float @test_12(i32 %a, i32 %b) {
134-
; GFX950-SDAG-LABEL: test_12:
135-
; GFX950-SDAG: ; %bb.0:
136-
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
137-
; GFX950-SDAG-NEXT: ; return to shader part epilog
138-
;
139-
; GFX950-GISEL-LABEL: test_12:
140-
; GFX950-GISEL: ; %bb.0:
141-
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
142-
; GFX950-GISEL-NEXT: ; return to shader part epilog
129+
; GCN-LABEL: test_12:
130+
; GCN: ; %bb.0:
131+
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
132+
; GCN-NEXT: ; return to shader part epilog
143133
%nota = xor i32 %a, -1
144134
%and1 = and i32 %nota, %b
145135
%ret_cast = bitcast i32 %and1 to float

llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll

Lines changed: 72 additions & 73 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/build_vector.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -271,13 +271,13 @@ define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out,
271271
; GFX8: ; %bb.0: ; %entry
272272
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
273273
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
274-
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
275-
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
276-
; GFX8-NEXT: v_mov_b32_e32 v3, s1
277-
; GFX8-NEXT: v_mov_b32_e32 v0, s2
278-
; GFX8-NEXT: v_mov_b32_e32 v1, s3
279-
; GFX8-NEXT: v_mov_b32_e32 v2, s0
280-
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
274+
; GFX8-NEXT: v_mov_b32_e32 v0, s0
275+
; GFX8-NEXT: v_mov_b32_e32 v1, s1
276+
; GFX8-NEXT: s_lshl_b32 s0, s3, 16
277+
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
278+
; GFX8-NEXT: v_mov_b32_e32 v2, s1
279+
; GFX8-NEXT: v_mov_b32_e32 v3, s0
280+
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
281281
; GFX8-NEXT: s_endpgm
282282
;
283283
; GFX10-LABEL: build_v2i32_from_v4i16_shuffle:

llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ define amdgpu_ps i32 @s_or_i32_disjoint(i32 inreg %a, i32 inreg %b) {
1010
; CHECK-NEXT: {{ $}}
1111
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
1212
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
13-
; CHECK-NEXT: %3:sreg_32 = disjoint S_OR_B32 [[COPY1]], [[COPY]], implicit-def dead $scc
14-
; CHECK-NEXT: $sgpr0 = COPY %3
13+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = disjoint S_OR_B32 [[COPY1]], [[COPY]], implicit-def dead $scc
14+
; CHECK-NEXT: $sgpr0 = COPY [[S_OR_B32_]]
1515
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0
1616
%result = or disjoint i32 %a, %b
1717
ret i32 %result
@@ -26,10 +26,10 @@ define amdgpu_ps <2 x i32> @s_or_v2i32_disjoint(<2 x i32> inreg %a, <2 x i32> in
2626
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
2727
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
2828
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
29-
; CHECK-NEXT: %5:sreg_32 = disjoint S_OR_B32 [[COPY3]], [[COPY1]], implicit-def dead $scc
30-
; CHECK-NEXT: %6:sreg_32 = disjoint S_OR_B32 [[COPY2]], [[COPY]], implicit-def dead $scc
31-
; CHECK-NEXT: $sgpr0 = COPY %5
32-
; CHECK-NEXT: $sgpr1 = COPY %6
29+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = disjoint S_OR_B32 [[COPY3]], [[COPY1]], implicit-def dead $scc
30+
; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = disjoint S_OR_B32 [[COPY2]], [[COPY]], implicit-def dead $scc
31+
; CHECK-NEXT: $sgpr0 = COPY [[S_OR_B32_]]
32+
; CHECK-NEXT: $sgpr1 = COPY [[S_OR_B32_1]]
3333
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
3434
%result = or disjoint <2 x i32> %a, %b
3535
ret <2 x i32> %result
@@ -42,8 +42,8 @@ define i32 @v_or_i32_disjoint(i32 %a, i32 %b) {
4242
; CHECK-NEXT: {{ $}}
4343
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4444
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
45-
; CHECK-NEXT: %10:vgpr_32 = disjoint V_OR_B32_e64 [[COPY1]], [[COPY]], implicit $exec
46-
; CHECK-NEXT: $vgpr0 = COPY %10
45+
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 [[COPY1]], [[COPY]], implicit $exec
46+
; CHECK-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]]
4747
; CHECK-NEXT: SI_RETURN implicit $vgpr0
4848
%result = or disjoint i32 %a, %b
4949
ret i32 %result
@@ -58,10 +58,10 @@ define <2 x i32> @v_or_v2i32_disjoint(<2 x i32> %a, <2 x i32> %b) {
5858
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
5959
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
6060
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
61-
; CHECK-NEXT: %12:vgpr_32 = disjoint V_OR_B32_e64 [[COPY3]], [[COPY1]], implicit $exec
62-
; CHECK-NEXT: %13:vgpr_32 = disjoint V_OR_B32_e64 [[COPY2]], [[COPY]], implicit $exec
63-
; CHECK-NEXT: $vgpr0 = COPY %12
64-
; CHECK-NEXT: $vgpr1 = COPY %13
61+
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 [[COPY3]], [[COPY1]], implicit $exec
62+
; CHECK-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 [[COPY2]], [[COPY]], implicit $exec
63+
; CHECK-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]]
64+
; CHECK-NEXT: $vgpr1 = COPY [[V_OR_B32_e64_1]]
6565
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
6666
%result = or disjoint <2 x i32> %a, %b
6767
ret <2 x i32> %result
@@ -78,9 +78,9 @@ define amdgpu_ps i64 @s_or_i64_disjoint(i64 inreg %a, i64 inreg %b) {
7878
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
7979
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
8080
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
81-
; CHECK-NEXT: %7:sreg_64 = disjoint S_OR_B64 killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], implicit-def dead $scc
82-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %7.sub1
83-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY %7.sub0
81+
; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = disjoint S_OR_B64 killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], implicit-def dead $scc
82+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub1
83+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_OR_B64_]].sub0
8484
; CHECK-NEXT: $sgpr0 = COPY [[COPY5]]
8585
; CHECK-NEXT: $sgpr1 = COPY [[COPY4]]
8686
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1

llvm/test/CodeGen/AMDGPU/divrem24-assume.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define amdgpu_kernel void @divrem24_assume(ptr addrspace(1) %arg, i32 %arg1) {
55
; CHECK-LABEL: @divrem24_assume(
66
; CHECK-NEXT: bb:
7-
; CHECK-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
7+
; CHECK-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG0:![0-9]+]]
88
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[ARG1:%.*]], 42
99
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]])
1010
; CHECK-NEXT: [[TMP0:%.*]] = uitofp i32 [[TMP]] to float

llvm/test/CodeGen/AMDGPU/fabs.f16.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -197,26 +197,26 @@ define amdgpu_kernel void @s_fabs_v4f16(ptr addrspace(1) %out, <4 x half> %in) {
197197
; CI: ; %bb.0:
198198
; CI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
199199
; CI-NEXT: s_waitcnt lgkmcnt(0)
200-
; CI-NEXT: s_and_b32 s3, s3, 0x7fff7fff
201-
; CI-NEXT: s_and_b32 s2, s2, 0x7fff7fff
202-
; CI-NEXT: v_mov_b32_e32 v3, s1
203-
; CI-NEXT: v_mov_b32_e32 v0, s2
204-
; CI-NEXT: v_mov_b32_e32 v1, s3
205-
; CI-NEXT: v_mov_b32_e32 v2, s0
206-
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
200+
; CI-NEXT: v_mov_b32_e32 v0, s0
201+
; CI-NEXT: v_mov_b32_e32 v1, s1
202+
; CI-NEXT: s_and_b32 s0, s3, 0x7fff7fff
203+
; CI-NEXT: s_and_b32 s1, s2, 0x7fff7fff
204+
; CI-NEXT: v_mov_b32_e32 v2, s1
205+
; CI-NEXT: v_mov_b32_e32 v3, s0
206+
; CI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
207207
; CI-NEXT: s_endpgm
208208
;
209209
; VI-LABEL: s_fabs_v4f16:
210210
; VI: ; %bb.0:
211211
; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
212212
; VI-NEXT: s_waitcnt lgkmcnt(0)
213-
; VI-NEXT: s_and_b32 s3, s3, 0x7fff7fff
214-
; VI-NEXT: s_and_b32 s2, s2, 0x7fff7fff
215-
; VI-NEXT: v_mov_b32_e32 v3, s1
216-
; VI-NEXT: v_mov_b32_e32 v0, s2
217-
; VI-NEXT: v_mov_b32_e32 v1, s3
218-
; VI-NEXT: v_mov_b32_e32 v2, s0
219-
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
213+
; VI-NEXT: v_mov_b32_e32 v0, s0
214+
; VI-NEXT: v_mov_b32_e32 v1, s1
215+
; VI-NEXT: s_and_b32 s0, s3, 0x7fff7fff
216+
; VI-NEXT: s_and_b32 s1, s2, 0x7fff7fff
217+
; VI-NEXT: v_mov_b32_e32 v2, s1
218+
; VI-NEXT: v_mov_b32_e32 v3, s0
219+
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
220220
; VI-NEXT: s_endpgm
221221
;
222222
; GFX9-LABEL: s_fabs_v4f16:

llvm/test/CodeGen/AMDGPU/fabs.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,13 @@ define amdgpu_kernel void @fabs_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
115115
; VI: ; %bb.0:
116116
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
117117
; VI-NEXT: s_waitcnt lgkmcnt(0)
118-
; VI-NEXT: s_bitset0_b32 s3, 31
119-
; VI-NEXT: s_bitset0_b32 s2, 31
120-
; VI-NEXT: v_mov_b32_e32 v3, s1
121-
; VI-NEXT: v_mov_b32_e32 v0, s2
122-
; VI-NEXT: v_mov_b32_e32 v1, s3
123-
; VI-NEXT: v_mov_b32_e32 v2, s0
124-
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
118+
; VI-NEXT: v_mov_b32_e32 v0, s0
119+
; VI-NEXT: v_mov_b32_e32 v1, s1
120+
; VI-NEXT: s_and_b32 s0, s3, 0x7fffffff
121+
; VI-NEXT: s_and_b32 s1, s2, 0x7fffffff
122+
; VI-NEXT: v_mov_b32_e32 v2, s1
123+
; VI-NEXT: v_mov_b32_e32 v3, s0
124+
; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
125125
; VI-NEXT: s_endpgm
126126
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
127127
store <2 x float> %fabs, ptr addrspace(1) %out

0 commit comments

Comments
 (0)