Skip to content

Commit 83d9ed2

Browse files
[NVPTX] Emit NVPTXISD::DYNAMIC_STACKALLOC's chain (#101714)
`LowerDYNAMIC_STACKALLOC()` emits the `dynamic_stackalloc` chain operand instead of the chain produced by the `NVPTXISD::DYNAMIC_STACKALLOC`. Fix this behavior and don't produce an unnecessary `ISD::MERGE_VALUES`.
1 parent 52956b0 commit 83d9ed2

File tree

2 files changed

+33
-10
lines changed

2 files changed

+33
-10
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2232,18 +2232,12 @@ SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
22322232
SDLoc DL(Op.getNode());
22332233

22342234
// The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32.
2235-
if (nvTM->is64Bit())
2236-
Size = DAG.getZExtOrTrunc(Size, DL, MVT::i64);
2237-
else
2238-
Size = DAG.getZExtOrTrunc(Size, DL, MVT::i32);
2235+
MVT ValueSizeTy = nvTM->is64Bit() ? MVT::i64 : MVT::i32;
22392236

2240-
SDValue AllocOps[] = {Chain, Size,
2237+
SDValue AllocOps[] = {Chain, DAG.getZExtOrTrunc(Size, DL, ValueSizeTy),
22412238
DAG.getTargetConstant(Align, DL, MVT::i32)};
2242-
SDValue Alloca = DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL,
2243-
nvTM->is64Bit() ? MVT::i64 : MVT::i32, AllocOps);
2244-
2245-
SDValue MergeOps[] = {Alloca, Chain};
2246-
return DAG.getMergeValues(MergeOps, DL);
2239+
EVT RetTypes[] = {ValueSizeTy, MVT::Other};
2240+
return DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL, RetTypes, AllocOps);
22472241
}
22482242

22492243
// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s
3+
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
define void @foo(i64 %a, ptr %p0, ptr %p1) {
7+
; CHECK-LABEL: foo(
8+
; CHECK: {
9+
; CHECK-NEXT: .reg .b64 %rd<8>;
10+
; CHECK-EMPTY:
11+
; CHECK-NEXT: // %bb.0:
12+
; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0];
13+
; CHECK-NEXT: add.s64 %rd2, %rd1, 7;
14+
; CHECK-NEXT: and.b64 %rd3, %rd2, -8;
15+
; CHECK-NEXT: alloca.u64 %rd4, %rd3, 16;
16+
; CHECK-NEXT: cvta.local.u64 %rd4, %rd4;
17+
; CHECK-NEXT: ld.param.u64 %rd5, [foo_param_1];
18+
; CHECK-NEXT: alloca.u64 %rd6, %rd3, 16;
19+
; CHECK-NEXT: cvta.local.u64 %rd6, %rd6;
20+
; CHECK-NEXT: ld.param.u64 %rd7, [foo_param_2];
21+
; CHECK-NEXT: st.u64 [%rd5], %rd4;
22+
; CHECK-NEXT: st.u64 [%rd7], %rd6;
23+
; CHECK-NEXT: ret;
24+
%b = alloca i8, i64 %a, align 16
25+
%c = alloca i8, i64 %a, align 16
26+
store ptr %b, ptr %p0, align 8
27+
store ptr %c, ptr %p1, align 8
28+
ret void
29+
}

0 commit comments

Comments
 (0)