[NVPTX] remove store.params of undef #96940

AlexMaclean · 2024-06-27T17:42:45Z

Extend the logic for NVPTXISD::StoreRetval to NVPTXISD::StoreParam to remove stores of undef values.

llvmbot · 2024-06-27T17:43:07Z

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

Extend the logic for NVPTXISD::StoreRetval to NVPTXISD::StoreParam to remove stores of undef values.

Full diff: https://github.com/llvm/llvm-project/pull/96940.diff

2 Files Affected:

(modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+26-7)
(added) llvm/test/CodeGen/NVPTX/store-undef.ll (+93)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index e3201516e4e7f..476a532db0a37 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -5345,15 +5345,26 @@ PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
   return SDValue();
 }
 
+static SDValue PerformStoreCombineHelper(SDNode *N, std::size_t Front,
+                                         std::size_t Back) {
+  if (all_of(N->ops().drop_front(Front).drop_back(Back),
+             [](const SDUse &U) { return U.get()->isUndef(); }))
+    // Operand 0 is the previous value in the chain. Cannot return EntryToken
+    // as the previous value will become unused and eliminated later.
+    return N->getOperand(0);
+
+  return SDValue();
+}
+
+static SDValue PerformStoreParamCombine(SDNode *N) {
+  // Operands from the 3rd to the 2nd last one are the values to be stored.
+  //   {Chain, ArgID, Offset, Val, Glue}
+  return PerformStoreCombineHelper(N, 3, 1);
+}
+
 static SDValue PerformStoreRetvalCombine(SDNode *N) {
   // Operands from the 2nd to the last one are the values to be stored
-  for (std::size_t I = 2, OpsCount = N->ops().size(); I != OpsCount; ++I)
-    if (!N->getOperand(I).isUndef())
-      return SDValue();
-
-  // Operand 0 is the previous value in the chain. Cannot return EntryToken
-  // as the previous value will become unused and eliminated later.
-  return N->getOperand(0);
+  return PerformStoreCombineHelper(N, 2, 0);
 }
 
 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
@@ -5822,6 +5833,10 @@ static SDValue PerformEXTRACTCombine(SDNode *N,
       VectorVT == MVT::v4i8 || VectorVT == MVT::v8i8)
     return SDValue();
 
+  // Don't mess with undef values as sra may be simplified to 0, not undef.
+  if (Vector->isUndef() || ISD::allOperandsUndef(Vector.getNode()))
+    return SDValue();
+
   uint64_t VectorBits = VectorVT.getSizeInBits();
   // We only handle the types we can extract in-register.
   if (!(VectorBits == 16 || VectorBits == 32 || VectorBits == 64))
@@ -5950,6 +5965,10 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
     case NVPTXISD::StoreRetvalV2:
     case NVPTXISD::StoreRetvalV4:
       return PerformStoreRetvalCombine(N);
+    case NVPTXISD::StoreParam:
+    case NVPTXISD::StoreParamV2:
+    case NVPTXISD::StoreParamV4:
+      return PerformStoreParamCombine(N);
     case ISD::EXTRACT_VECTOR_ELT:
       return PerformEXTRACTCombine(N, DCI);
     case ISD::VSELECT:
diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll
new file mode 100644
index 0000000000000..e281017e802d1
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/store-undef.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+%struct.T = type { i64, <2 x i32>, <4 x i32> }
+
+declare void @test_call(%struct.T)
+
+define void @test_store_param_undef() {
+; CHECK-LABEL: test_store_param_undef(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 0, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[32];
+; CHECK-NEXT:    st.param.v2.b32 [param0+8], {%r1, 0};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    test_call,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 0
+; CHECK-NEXT:    ret;
+  call void @test_call(%struct.T undef)
+  ret void
+}
+
+define void @test_store_param_def(i64 %param0, i32 %param1) {
+; CHECK-LABEL: test_store_param_def(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<6>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [test_store_param_def_param_0];
+; CHECK-NEXT:    ld.param.u32 %r1, [test_store_param_def_param_1];
+; CHECK-NEXT:    { // callseq 1, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[32];
+; CHECK-NEXT:    st.param.b64 [param0+0], %rd1;
+; CHECK-NEXT:    st.param.v2.b32 [param0+8], {%r2, %r1};
+; CHECK-NEXT:    st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    test_call,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 1
+; CHECK-NEXT:    ret;
+  %V2 = insertelement <2 x i32> undef, i32 %param1, i32 1
+  %V4 = insertelement <4 x i32> undef, i32 %param1, i32 1
+  %S0 = insertvalue %struct.T undef, i64 %param0, 0
+  %S1 = insertvalue %struct.T %S0, <2 x i32> %V2, 1
+  %S2 = insertvalue %struct.T %S1, <4 x i32> %V4, 2
+  call void @test_call(%struct.T %S2)
+  ret void
+}
+
+define void @test_store_undef(ptr %out) {
+; CHECK-LABEL: test_store_undef(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ret;
+  store %struct.T undef, ptr %out
+  ret void
+}
+
+define void @test_store_def(i64 %param0, i32 %param1, ptr %out) {
+; CHECK-LABEL: test_store_def(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<6>;
+; CHECK-NEXT:    .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [test_store_def_param_0];
+; CHECK-NEXT:    ld.param.u32 %r1, [test_store_def_param_1];
+; CHECK-NEXT:    ld.param.u64 %rd2, [test_store_def_param_2];
+; CHECK-NEXT:    st.v4.u32 [%rd2+16], {%r2, %r1, %r3, %r4};
+; CHECK-NEXT:    st.v2.u32 [%rd2+8], {%r5, %r1};
+; CHECK-NEXT:    st.u64 [%rd2], %rd1;
+; CHECK-NEXT:    ret;
+  %V2 = insertelement <2 x i32> undef, i32 %param1, i32 1
+  %V4 = insertelement <4 x i32> undef, i32 %param1, i32 1
+  %S0 = insertvalue %struct.T undef, i64 %param0, 0
+  %S1 = insertvalue %struct.T %S0, <2 x i32> %V2, 1
+  %S2 = insertvalue %struct.T %S1, <4 x i32> %V4, 2
+  store %struct.T %S2, ptr %out
+  ret void
+}

jlebar · 2024-06-27T19:02:37Z

llvm/test/CodeGen/NVPTX/store-undef.ll

@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5


Up to you, but personally I feel like, we might as well write the tests manually instead of using this tool? Or, use the tool, and then delete the lines in the tests that are not relevant?

Like we don't care about the CHECK-EMPTY's and the lines containing comments.

The advantage of this is that, if we ever change the codegen format (and it does change sometimes!), we won't have to regenerate this test and then figure out if the changes are "meaningful".

That's funny, @Artem-B has frequently requested that I use this tool. If it's alright with you I'll keep this test as is, though it would be nice to have consensus on what type of tests we prefer, I wonder if the auto generation script could be updated to make the checks that are generated more flexible.

Heh, okay, I defer to Art. :)

Auto-generator is not perfect, but it has two major benefits:

it's automatic, so the author only need to write the input code, not the checks

it includes checks for everything, so when something goes wrong I can reason about it without guessing whether the changed output is OK or not, based on hand-written checks that tend to be spotty.

Human-written tests can be good, but usually aren't. The effort of writing a good test falls somewhere between tedious and infeasible. I'll take a good enough automatic test, even if it's overly verbose. It also lowers the review burden as we no longer need to argue about the quality of the checks in the test. Just concentrate on what we're testing and that the generated code is valid.

jlebar · 2024-06-27T19:03:52Z

llvm/test/CodeGen/NVPTX/store-undef.ll

+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 0, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[32];
+; CHECK-NEXT:    st.param.v2.b32 [param0+8], {%r1, 0};


I'm unclear on what this one is testing. If the parameter is undef then why do we store into part of it?

Apologies, this test was out of date, I forgot to add the latest version to the commit, the new version of this test does not include this line.

jlebar · 2024-06-27T19:05:51Z

llvm/test/CodeGen/NVPTX/store-undef.ll

+; CHECK-NEXT:    { // callseq 1, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[32];
+; CHECK-NEXT:    st.param.b64 [param0+0], %rd1;
+; CHECK-NEXT:    st.param.v2.b32 [param0+8], {%r2, %r1};


Similarly I'm unclear on why we expect to have a v2.b32 and v4.b32 store here? It seems like only one value in each of these struct fields has a non-undef value.

That is correct, this is similar to the existing DAG combining behavior for normal stores: only when the value is completely undef is the store removed. I agree the logic could be improved to narrow the store but I think this is beyond the scope of this change.

jlebar · 2024-06-27T19:06:48Z

llvm/test/CodeGen/NVPTX/store-undef.ll

+; CHECK-NEXT:    ld.param.u64 %rd2, [test_store_def_param_2];
+; CHECK-NEXT:    st.v4.u32 [%rd2+16], {%r2, %r1, %r3, %r4};
+; CHECK-NEXT:    st.v2.u32 [%rd2+8], {%r5, %r1};
+; CHECK-NEXT:    st.u64 [%rd2], %rd1;


Same here. Why do we store the full struct when most of it is undef?

llvm-ci · 2024-06-29T01:18:20Z

LLVM Buildbot has detected a new failure on builder clang-armv8-quick running on linaro-clang-armv8-quick while building llvm at step 5 "ninja check 1".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/154/builds/677

Here is the relevant piece of the build log for the reference:

Step 5 (ninja check 1) failure: stage 1 checked (failure)
******************** TEST 'Clangd :: trace.test' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
Content-Length: 3407

{
  "id": 0,
  "jsonrpc": "2.0",
  "result": {
    "capabilities": {
      "astProvider": true,
      "callHierarchyProvider": true,
      "clangdInlayHintsProvider": true,
      "codeActionProvider": true,
      "compilationDatabase": {
        "automaticReload": true
      },
      "completionProvider": {
        "resolveProvider": false,
        "triggerCharacters": [
          ".",
          "<",
          ">",
          ":",
          "\"",
          "/",
          "*"
        ]
      },
      "declarationProvider": true,
      "definitionProvider": true,
      "documentFormattingProvider": true,
      "documentHighlightProvider": true,
      "documentLinkProvider": {
        "resolveProvider": false
      },
      "documentOnTypeFormattingProvider": {
        "firstTriggerCharacter": "\n",
        "moreTriggerCharacter": []
      },
      "documentRangeFormattingProvider": true,
      "documentSymbolProvider": true,
      "executeCommandProvider": {
        "commands": [
          "clangd.applyFix",
          "clangd.applyRename",
          "clangd.applyTweak"
        ]
...

Extend the logic for `NVPTXISD::StoreRetval` to `NVPTXISD::StoreParam` to remove stores of undef values.

AlexMaclean added the backend:NVPTX label Jun 27, 2024

AlexMaclean requested a review from jlebar June 27, 2024 17:42

AlexMaclean self-assigned this Jun 27, 2024

jlebar reviewed Jun 27, 2024

View reviewed changes

[NVPTX] remove store.params of undef

41bf310

AlexMaclean force-pushed the upstream/store-param-undef branch from 6fb7068 to 41bf310 Compare June 28, 2024 16:53

jlebar approved these changes Jun 28, 2024

View reviewed changes

AlexMaclean merged commit cb24422 into llvm:main Jun 29, 2024
7 checks passed

lravenclaw pushed a commit to lravenclaw/llvm-project that referenced this pull request Jul 3, 2024

[NVPTX] remove store.params of undef (llvm#96940)

458b8de

Extend the logic for `NVPTXISD::StoreRetval` to `NVPTXISD::StoreParam` to remove stores of undef values.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[NVPTX] remove store.params of undef #96940

[NVPTX] remove store.params of undef #96940

Uh oh!

AlexMaclean commented Jun 27, 2024

Uh oh!

llvmbot commented Jun 27, 2024

Uh oh!

jlebar Jun 27, 2024

Uh oh!

AlexMaclean Jun 28, 2024

Uh oh!

jlebar Jun 28, 2024

Uh oh!

Artem-B Jul 8, 2024

Uh oh!

jlebar Jun 27, 2024

Uh oh!

AlexMaclean Jun 28, 2024

Uh oh!

jlebar Jun 27, 2024

Uh oh!

AlexMaclean Jun 28, 2024

Uh oh!

jlebar Jun 28, 2024

Uh oh!

jlebar Jun 27, 2024

Uh oh!

AlexMaclean Jun 28, 2024

Uh oh!

Uh oh!

llvm-ci commented Jun 29, 2024

Uh oh!

Uh oh!

		@@ -0,0 +1,93 @@
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5

[NVPTX] remove store.params of undef #96940

[NVPTX] remove store.params of undef #96940

Uh oh!

Conversation

AlexMaclean commented Jun 27, 2024

Uh oh!

llvmbot commented Jun 27, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Jun 29, 2024

Uh oh!

Uh oh!