Skip to content

Commit 2f2ac3d

Browse files
authored
DAG: Avoid stack usage in bitcast operand promotion to legal vector (#125637)
Fix introducing stack usage if a bitcast source operand is an illegal integer type cast to a legal vector type. This should cover more situations, but this is the first one I noticed.
1 parent de5d588 commit 2f2ac3d

12 files changed

+575
-2590
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2204,9 +2204,43 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
22042204
}
22052205

22062206
SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
2207+
EVT OutVT = N->getValueType(0);
2208+
SDValue InOp = N->getOperand(0);
2209+
EVT InVT = InOp.getValueType();
2210+
EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
2211+
SDLoc dl(N);
2212+
2213+
switch (getTypeAction(InVT)) {
2214+
case TargetLowering::TypePromoteInteger: {
2215+
// TODO: Handle big endian
2216+
if (OutVT.isVector() && DAG.getDataLayout().isLittleEndian()) {
2217+
EVT EltVT = OutVT.getVectorElementType();
2218+
TypeSize EltSize = EltVT.getSizeInBits();
2219+
TypeSize NInSize = NInVT.getSizeInBits();
2220+
2221+
if (NInSize.hasKnownScalarFactor(EltSize)) {
2222+
unsigned NumEltsWithPadding = NInSize.getKnownScalarFactor(EltSize);
2223+
EVT WideVecVT =
2224+
EVT::getVectorVT(*DAG.getContext(), EltVT, NumEltsWithPadding);
2225+
2226+
if (isTypeLegal(WideVecVT)) {
2227+
SDValue Promoted = GetPromotedInteger(InOp);
2228+
SDValue Cast = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Promoted);
2229+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, Cast,
2230+
DAG.getVectorIdxConstant(0, dl));
2231+
}
2232+
}
2233+
}
2234+
2235+
break;
2236+
}
2237+
default:
2238+
break;
2239+
}
2240+
22072241
// This should only occur in unusual situations like bitcasting to an
22082242
// x86_fp80, so just turn it into a store+load
2209-
return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
2243+
return CreateStackStoreLoad(InOp, OutVT);
22102244
}
22112245

22122246
SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {

llvm/test/CodeGen/AMDGPU/bitcast_vector_bigint.ll

Lines changed: 0 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,6 @@ define <5 x i32> @bitcast_i160_to_v5i32(i160 %int) {
8080
; GFX9-LABEL: bitcast_i160_to_v5i32:
8181
; GFX9: ; %bb.0:
8282
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83-
; GFX9-NEXT: s_mov_b32 s4, s33
84-
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
85-
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
86-
; GFX9-NEXT: s_mov_b32 s5, s34
87-
; GFX9-NEXT: s_mov_b32 s34, s32
88-
; GFX9-NEXT: s_addk_i32 s32, 0x1000
89-
; GFX9-NEXT: s_mov_b32 s32, s34
90-
; GFX9-NEXT: s_mov_b32 s34, s5
91-
; GFX9-NEXT: s_mov_b32 s33, s4
9283
; GFX9-NEXT: s_setpc_b64 s[30:31]
9384
;
9485
; GFX12-LABEL: bitcast_i160_to_v5i32:
@@ -98,23 +89,6 @@ define <5 x i32> @bitcast_i160_to_v5i32(i160 %int) {
9889
; GFX12-NEXT: s_wait_samplecnt 0x0
9990
; GFX12-NEXT: s_wait_bvhcnt 0x0
10091
; GFX12-NEXT: s_wait_kmcnt 0x0
101-
; GFX12-NEXT: s_mov_b32 s0, s33
102-
; GFX12-NEXT: s_add_co_i32 s33, s32, 31
103-
; GFX12-NEXT: s_mov_b32 s1, s34
104-
; GFX12-NEXT: s_wait_alu 0xfffe
105-
; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
106-
; GFX12-NEXT: s_clause 0x1
107-
; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
108-
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
109-
; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
110-
; GFX12-NEXT: s_mov_b32 s34, s32
111-
; GFX12-NEXT: s_add_co_i32 s32, s32, 64
112-
; GFX12-NEXT: s_wait_alu 0xfffe
113-
; GFX12-NEXT: s_mov_b32 s32, s34
114-
; GFX12-NEXT: s_mov_b32 s34, s1
115-
; GFX12-NEXT: s_mov_b32 s33, s0
116-
; GFX12-NEXT: s_wait_loadcnt 0x0
117-
; GFX12-NEXT: s_wait_alu 0xfffe
11892
; GFX12-NEXT: s_setpc_b64 s[30:31]
11993
%bitcast = bitcast i160 %int to <5 x i32>
12094
ret <5 x i32> %bitcast
@@ -124,15 +98,6 @@ define <6 x i32> @bitcast_i192_to_v6i32(i192 %int) {
12498
; GFX9-LABEL: bitcast_i192_to_v6i32:
12599
; GFX9: ; %bb.0:
126100
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127-
; GFX9-NEXT: s_mov_b32 s4, s33
128-
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
129-
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
130-
; GFX9-NEXT: s_mov_b32 s5, s34
131-
; GFX9-NEXT: s_mov_b32 s34, s32
132-
; GFX9-NEXT: s_addk_i32 s32, 0x1000
133-
; GFX9-NEXT: s_mov_b32 s32, s34
134-
; GFX9-NEXT: s_mov_b32 s34, s5
135-
; GFX9-NEXT: s_mov_b32 s33, s4
136101
; GFX9-NEXT: s_setpc_b64 s[30:31]
137102
;
138103
; GFX12-LABEL: bitcast_i192_to_v6i32:
@@ -142,23 +107,6 @@ define <6 x i32> @bitcast_i192_to_v6i32(i192 %int) {
142107
; GFX12-NEXT: s_wait_samplecnt 0x0
143108
; GFX12-NEXT: s_wait_bvhcnt 0x0
144109
; GFX12-NEXT: s_wait_kmcnt 0x0
145-
; GFX12-NEXT: s_mov_b32 s0, s33
146-
; GFX12-NEXT: s_add_co_i32 s33, s32, 31
147-
; GFX12-NEXT: s_mov_b32 s1, s34
148-
; GFX12-NEXT: s_wait_alu 0xfffe
149-
; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
150-
; GFX12-NEXT: s_clause 0x1
151-
; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
152-
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
153-
; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
154-
; GFX12-NEXT: s_mov_b32 s34, s32
155-
; GFX12-NEXT: s_add_co_i32 s32, s32, 64
156-
; GFX12-NEXT: s_wait_alu 0xfffe
157-
; GFX12-NEXT: s_mov_b32 s32, s34
158-
; GFX12-NEXT: s_mov_b32 s34, s1
159-
; GFX12-NEXT: s_mov_b32 s33, s0
160-
; GFX12-NEXT: s_wait_loadcnt 0x0
161-
; GFX12-NEXT: s_wait_alu 0xfffe
162110
; GFX12-NEXT: s_setpc_b64 s[30:31]
163111
%bitcast = bitcast i192 %int to <6 x i32>
164112
ret <6 x i32> %bitcast
@@ -168,15 +116,6 @@ define <7 x i32> @bitcast_i224_to_v7i32(i224 %int) {
168116
; GFX9-LABEL: bitcast_i224_to_v7i32:
169117
; GFX9: ; %bb.0:
170118
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171-
; GFX9-NEXT: s_mov_b32 s4, s33
172-
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
173-
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
174-
; GFX9-NEXT: s_mov_b32 s5, s34
175-
; GFX9-NEXT: s_mov_b32 s34, s32
176-
; GFX9-NEXT: s_addk_i32 s32, 0x1000
177-
; GFX9-NEXT: s_mov_b32 s32, s34
178-
; GFX9-NEXT: s_mov_b32 s34, s5
179-
; GFX9-NEXT: s_mov_b32 s33, s4
180119
; GFX9-NEXT: s_setpc_b64 s[30:31]
181120
;
182121
; GFX12-LABEL: bitcast_i224_to_v7i32:
@@ -186,27 +125,6 @@ define <7 x i32> @bitcast_i224_to_v7i32(i224 %int) {
186125
; GFX12-NEXT: s_wait_samplecnt 0x0
187126
; GFX12-NEXT: s_wait_bvhcnt 0x0
188127
; GFX12-NEXT: s_wait_kmcnt 0x0
189-
; GFX12-NEXT: s_mov_b32 s0, s33
190-
; GFX12-NEXT: s_add_co_i32 s33, s32, 31
191-
; GFX12-NEXT: s_mov_b32 s1, s34
192-
; GFX12-NEXT: s_wait_alu 0xfffe
193-
; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
194-
; GFX12-NEXT: s_clause 0x1
195-
; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
196-
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
197-
; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
198-
; GFX12-NEXT: s_clause 0x1
199-
; GFX12-NEXT: scratch_store_b32 off, v6, s33 offset:24
200-
; GFX12-NEXT: scratch_store_b64 off, v[4:5], s33 offset:16
201-
; GFX12-NEXT: scratch_load_b96 v[4:6], off, s33 offset:16
202-
; GFX12-NEXT: s_mov_b32 s34, s32
203-
; GFX12-NEXT: s_add_co_i32 s32, s32, 64
204-
; GFX12-NEXT: s_wait_alu 0xfffe
205-
; GFX12-NEXT: s_mov_b32 s32, s34
206-
; GFX12-NEXT: s_mov_b32 s34, s1
207-
; GFX12-NEXT: s_mov_b32 s33, s0
208-
; GFX12-NEXT: s_wait_loadcnt 0x0
209-
; GFX12-NEXT: s_wait_alu 0xfffe
210128
; GFX12-NEXT: s_setpc_b64 s[30:31]
211129
%bitcast = bitcast i224 %int to <7 x i32>
212130
ret <7 x i32> %bitcast
@@ -252,15 +170,6 @@ define <3 x i64> @bitcast_i192_to_v3i64(i192 %int) {
252170
; GFX9-LABEL: bitcast_i192_to_v3i64:
253171
; GFX9: ; %bb.0:
254172
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255-
; GFX9-NEXT: s_mov_b32 s4, s33
256-
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
257-
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
258-
; GFX9-NEXT: s_mov_b32 s5, s34
259-
; GFX9-NEXT: s_mov_b32 s34, s32
260-
; GFX9-NEXT: s_addk_i32 s32, 0x1000
261-
; GFX9-NEXT: s_mov_b32 s32, s34
262-
; GFX9-NEXT: s_mov_b32 s34, s5
263-
; GFX9-NEXT: s_mov_b32 s33, s4
264173
; GFX9-NEXT: s_setpc_b64 s[30:31]
265174
;
266175
; GFX12-LABEL: bitcast_i192_to_v3i64:
@@ -270,23 +179,6 @@ define <3 x i64> @bitcast_i192_to_v3i64(i192 %int) {
270179
; GFX12-NEXT: s_wait_samplecnt 0x0
271180
; GFX12-NEXT: s_wait_bvhcnt 0x0
272181
; GFX12-NEXT: s_wait_kmcnt 0x0
273-
; GFX12-NEXT: s_mov_b32 s0, s33
274-
; GFX12-NEXT: s_add_co_i32 s33, s32, 31
275-
; GFX12-NEXT: s_mov_b32 s1, s34
276-
; GFX12-NEXT: s_wait_alu 0xfffe
277-
; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
278-
; GFX12-NEXT: s_clause 0x1
279-
; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
280-
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
281-
; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
282-
; GFX12-NEXT: s_mov_b32 s34, s32
283-
; GFX12-NEXT: s_add_co_i32 s32, s32, 64
284-
; GFX12-NEXT: s_wait_alu 0xfffe
285-
; GFX12-NEXT: s_mov_b32 s32, s34
286-
; GFX12-NEXT: s_mov_b32 s34, s1
287-
; GFX12-NEXT: s_mov_b32 s33, s0
288-
; GFX12-NEXT: s_wait_loadcnt 0x0
289-
; GFX12-NEXT: s_wait_alu 0xfffe
290182
; GFX12-NEXT: s_setpc_b64 s[30:31]
291183
%bitcast = bitcast i192 %int to <3 x i64>
292184
ret <3 x i64> %bitcast
@@ -408,15 +300,6 @@ define <5 x float> @bitcast_i160_to_v5f32(i160 %int) {
408300
; GFX9-LABEL: bitcast_i160_to_v5f32:
409301
; GFX9: ; %bb.0:
410302
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
411-
; GFX9-NEXT: s_mov_b32 s4, s33
412-
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
413-
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
414-
; GFX9-NEXT: s_mov_b32 s5, s34
415-
; GFX9-NEXT: s_mov_b32 s34, s32
416-
; GFX9-NEXT: s_addk_i32 s32, 0x1000
417-
; GFX9-NEXT: s_mov_b32 s32, s34
418-
; GFX9-NEXT: s_mov_b32 s34, s5
419-
; GFX9-NEXT: s_mov_b32 s33, s4
420303
; GFX9-NEXT: s_setpc_b64 s[30:31]
421304
;
422305
; GFX12-LABEL: bitcast_i160_to_v5f32:
@@ -426,23 +309,6 @@ define <5 x float> @bitcast_i160_to_v5f32(i160 %int) {
426309
; GFX12-NEXT: s_wait_samplecnt 0x0
427310
; GFX12-NEXT: s_wait_bvhcnt 0x0
428311
; GFX12-NEXT: s_wait_kmcnt 0x0
429-
; GFX12-NEXT: s_mov_b32 s0, s33
430-
; GFX12-NEXT: s_add_co_i32 s33, s32, 31
431-
; GFX12-NEXT: s_mov_b32 s1, s34
432-
; GFX12-NEXT: s_wait_alu 0xfffe
433-
; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
434-
; GFX12-NEXT: s_clause 0x1
435-
; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
436-
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
437-
; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
438-
; GFX12-NEXT: s_mov_b32 s34, s32
439-
; GFX12-NEXT: s_add_co_i32 s32, s32, 64
440-
; GFX12-NEXT: s_wait_alu 0xfffe
441-
; GFX12-NEXT: s_mov_b32 s32, s34
442-
; GFX12-NEXT: s_mov_b32 s34, s1
443-
; GFX12-NEXT: s_mov_b32 s33, s0
444-
; GFX12-NEXT: s_wait_loadcnt 0x0
445-
; GFX12-NEXT: s_wait_alu 0xfffe
446312
; GFX12-NEXT: s_setpc_b64 s[30:31]
447313
%bitcast = bitcast i160 %int to <5 x float>
448314
ret <5 x float> %bitcast
@@ -452,15 +318,6 @@ define <6 x float> @bitcast_i192_to_v6f32(i192 %int) {
452318
; GFX9-LABEL: bitcast_i192_to_v6f32:
453319
; GFX9: ; %bb.0:
454320
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
455-
; GFX9-NEXT: s_mov_b32 s4, s33
456-
; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
457-
; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
458-
; GFX9-NEXT: s_mov_b32 s5, s34
459-
; GFX9-NEXT: s_mov_b32 s34, s32
460-
; GFX9-NEXT: s_addk_i32 s32, 0x1000
461-
; GFX9-NEXT: s_mov_b32 s32, s34
462-
; GFX9-NEXT: s_mov_b32 s34, s5
463-
; GFX9-NEXT: s_mov_b32 s33, s4
464321
; GFX9-NEXT: s_setpc_b64 s[30:31]
465322
;
466323
; GFX12-LABEL: bitcast_i192_to_v6f32:
@@ -470,23 +327,6 @@ define <6 x float> @bitcast_i192_to_v6f32(i192 %int) {
470327
; GFX12-NEXT: s_wait_samplecnt 0x0
471328
; GFX12-NEXT: s_wait_bvhcnt 0x0
472329
; GFX12-NEXT: s_wait_kmcnt 0x0
473-
; GFX12-NEXT: s_mov_b32 s0, s33
474-
; GFX12-NEXT: s_add_co_i32 s33, s32, 31
475-
; GFX12-NEXT: s_mov_b32 s1, s34
476-
; GFX12-NEXT: s_wait_alu 0xfffe
477-
; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
478-
; GFX12-NEXT: s_clause 0x1
479-
; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
480-
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
481-
; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
482-
; GFX12-NEXT: s_mov_b32 s34, s32
483-
; GFX12-NEXT: s_add_co_i32 s32, s32, 64
484-
; GFX12-NEXT: s_wait_alu 0xfffe
485-
; GFX12-NEXT: s_mov_b32 s32, s34
486-
; GFX12-NEXT: s_mov_b32 s34, s1
487-
; GFX12-NEXT: s_mov_b32 s33, s0
488-
; GFX12-NEXT: s_wait_loadcnt 0x0
489-
; GFX12-NEXT: s_wait_alu 0xfffe
490330
; GFX12-NEXT: s_setpc_b64 s[30:31]
491331
%bitcast = bitcast i192 %int to <6 x float>
492332
ret <6 x float> %bitcast

llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3110,17 +3110,8 @@ define void @store_i160(i160 %data, ptr addrspace(8) inreg %buf) {
31103110
; SDAG-LABEL: store_i160:
31113111
; SDAG: ; %bb.0:
31123112
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3113-
; SDAG-NEXT: s_mov_b32 s4, s33
3114-
; SDAG-NEXT: s_add_i32 s33, s32, 0x7c0
3115-
; SDAG-NEXT: s_and_b32 s33, s33, 0xfffff800
3116-
; SDAG-NEXT: s_mov_b32 s5, s34
3117-
; SDAG-NEXT: s_mov_b32 s34, s32
3118-
; SDAG-NEXT: s_addk_i32 s32, 0x1000
31193113
; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0
31203114
; SDAG-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16
3121-
; SDAG-NEXT: s_mov_b32 s32, s34
3122-
; SDAG-NEXT: s_mov_b32 s34, s5
3123-
; SDAG-NEXT: s_mov_b32 s33, s4
31243115
; SDAG-NEXT: s_waitcnt vmcnt(0)
31253116
; SDAG-NEXT: s_setpc_b64 s[30:31]
31263117
;

0 commit comments

Comments
 (0)