Skip to content

Commit cb24422

Browse files
authored
[NVPTX] remove store.params of undef (#96940)
Extend the logic for `NVPTXISD::StoreRetval` to `NVPTXISD::StoreParam` to remove stores of undef values.
1 parent ebc123e commit cb24422

File tree

2 files changed

+118
-7
lines changed

2 files changed

+118
-7
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5345,15 +5345,26 @@ PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
53455345
return SDValue();
53465346
}
53475347

5348+
static SDValue PerformStoreCombineHelper(SDNode *N, std::size_t Front,
5349+
std::size_t Back) {
5350+
if (all_of(N->ops().drop_front(Front).drop_back(Back),
5351+
[](const SDUse &U) { return U.get()->isUndef(); }))
5352+
// Operand 0 is the previous value in the chain. Cannot return EntryToken
5353+
// as the previous value will become unused and eliminated later.
5354+
return N->getOperand(0);
5355+
5356+
return SDValue();
5357+
}
5358+
5359+
static SDValue PerformStoreParamCombine(SDNode *N) {
5360+
// Operands from the 3rd to the 2nd last one are the values to be stored.
5361+
// {Chain, ArgID, Offset, Val, Glue}
5362+
return PerformStoreCombineHelper(N, 3, 1);
5363+
}
5364+
53485365
static SDValue PerformStoreRetvalCombine(SDNode *N) {
53495366
// Operands from the 2nd to the last one are the values to be stored
5350-
for (std::size_t I = 2, OpsCount = N->ops().size(); I != OpsCount; ++I)
5351-
if (!N->getOperand(I).isUndef())
5352-
return SDValue();
5353-
5354-
// Operand 0 is the previous value in the chain. Cannot return EntryToken
5355-
// as the previous value will become unused and eliminated later.
5356-
return N->getOperand(0);
5367+
return PerformStoreCombineHelper(N, 2, 0);
53575368
}
53585369

53595370
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
@@ -5822,6 +5833,10 @@ static SDValue PerformEXTRACTCombine(SDNode *N,
58225833
VectorVT == MVT::v4i8 || VectorVT == MVT::v8i8)
58235834
return SDValue();
58245835

5836+
// Don't mess with undef values as sra may be simplified to 0, not undef.
5837+
if (Vector->isUndef() || ISD::allOperandsUndef(Vector.getNode()))
5838+
return SDValue();
5839+
58255840
uint64_t VectorBits = VectorVT.getSizeInBits();
58265841
// We only handle the types we can extract in-register.
58275842
if (!(VectorBits == 16 || VectorBits == 32 || VectorBits == 64))
@@ -5950,6 +5965,10 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
59505965
case NVPTXISD::StoreRetvalV2:
59515966
case NVPTXISD::StoreRetvalV4:
59525967
return PerformStoreRetvalCombine(N);
5968+
case NVPTXISD::StoreParam:
5969+
case NVPTXISD::StoreParamV2:
5970+
case NVPTXISD::StoreParamV4:
5971+
return PerformStoreParamCombine(N);
59535972
case ISD::EXTRACT_VECTOR_ELT:
59545973
return PerformEXTRACTCombine(N, DCI);
59555974
case ISD::VSELECT:
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
4+
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
%struct.T = type { i64, <2 x i32>, <4 x i32> }
8+
9+
declare void @test_call(%struct.T)
10+
11+
define void @test_store_param_undef() {
12+
; CHECK-LABEL: test_store_param_undef(
13+
; CHECK: {
14+
; CHECK-EMPTY:
15+
; CHECK-EMPTY:
16+
; CHECK-NEXT: // %bb.0:
17+
; CHECK-NEXT: { // callseq 0, 0
18+
; CHECK-NEXT: .param .align 16 .b8 param0[32];
19+
; CHECK-NEXT: call.uni
20+
; CHECK-NEXT: test_call,
21+
; CHECK-NEXT: (
22+
; CHECK-NEXT: param0
23+
; CHECK-NEXT: );
24+
; CHECK-NEXT: } // callseq 0
25+
; CHECK-NEXT: ret;
26+
call void @test_call(%struct.T undef)
27+
ret void
28+
}
29+
30+
define void @test_store_param_def(i64 %param0, i32 %param1) {
31+
; CHECK-LABEL: test_store_param_def(
32+
; CHECK: {
33+
; CHECK-NEXT: .reg .b32 %r<6>;
34+
; CHECK-NEXT: .reg .b64 %rd<2>;
35+
; CHECK-EMPTY:
36+
; CHECK-NEXT: // %bb.0:
37+
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_param_def_param_0];
38+
; CHECK-NEXT: ld.param.u32 %r1, [test_store_param_def_param_1];
39+
; CHECK-NEXT: { // callseq 1, 0
40+
; CHECK-NEXT: .param .align 16 .b8 param0[32];
41+
; CHECK-NEXT: st.param.b64 [param0+0], %rd1;
42+
; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1};
43+
; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5};
44+
; CHECK-NEXT: call.uni
45+
; CHECK-NEXT: test_call,
46+
; CHECK-NEXT: (
47+
; CHECK-NEXT: param0
48+
; CHECK-NEXT: );
49+
; CHECK-NEXT: } // callseq 1
50+
; CHECK-NEXT: ret;
51+
%V2 = insertelement <2 x i32> undef, i32 %param1, i32 1
52+
%V4 = insertelement <4 x i32> undef, i32 %param1, i32 1
53+
%S0 = insertvalue %struct.T undef, i64 %param0, 0
54+
%S1 = insertvalue %struct.T %S0, <2 x i32> %V2, 1
55+
%S2 = insertvalue %struct.T %S1, <4 x i32> %V4, 2
56+
call void @test_call(%struct.T %S2)
57+
ret void
58+
}
59+
60+
define void @test_store_undef(ptr %out) {
61+
; CHECK-LABEL: test_store_undef(
62+
; CHECK: {
63+
; CHECK-EMPTY:
64+
; CHECK-EMPTY:
65+
; CHECK-NEXT: // %bb.0:
66+
; CHECK-NEXT: ret;
67+
store %struct.T undef, ptr %out
68+
ret void
69+
}
70+
71+
define void @test_store_def(i64 %param0, i32 %param1, ptr %out) {
72+
; CHECK-LABEL: test_store_def(
73+
; CHECK: {
74+
; CHECK-NEXT: .reg .b32 %r<6>;
75+
; CHECK-NEXT: .reg .b64 %rd<3>;
76+
; CHECK-EMPTY:
77+
; CHECK-NEXT: // %bb.0:
78+
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_def_param_0];
79+
; CHECK-NEXT: ld.param.u32 %r1, [test_store_def_param_1];
80+
; CHECK-NEXT: ld.param.u64 %rd2, [test_store_def_param_2];
81+
; CHECK-NEXT: st.v4.u32 [%rd2+16], {%r2, %r1, %r3, %r4};
82+
; CHECK-NEXT: st.v2.u32 [%rd2+8], {%r5, %r1};
83+
; CHECK-NEXT: st.u64 [%rd2], %rd1;
84+
; CHECK-NEXT: ret;
85+
%V2 = insertelement <2 x i32> undef, i32 %param1, i32 1
86+
%V4 = insertelement <4 x i32> undef, i32 %param1, i32 1
87+
%S0 = insertvalue %struct.T undef, i64 %param0, 0
88+
%S1 = insertvalue %struct.T %S0, <2 x i32> %V2, 1
89+
%S2 = insertvalue %struct.T %S1, <4 x i32> %V4, 2
90+
store %struct.T %S2, ptr %out
91+
ret void
92+
}

0 commit comments

Comments
 (0)