-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[NVPTX] remove store.params of undef #96940
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc < %s -march=nvptx64 | FileCheck %s | ||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} | ||
|
||
target triple = "nvptx64-nvidia-cuda" | ||
|
||
%struct.T = type { i64, <2 x i32>, <4 x i32> } | ||
|
||
declare void @test_call(%struct.T) | ||
|
||
define void @test_store_param_undef() { | ||
; CHECK-LABEL: test_store_param_undef( | ||
; CHECK: { | ||
; CHECK-EMPTY: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: { // callseq 0, 0 | ||
; CHECK-NEXT: .param .align 16 .b8 param0[32]; | ||
; CHECK-NEXT: call.uni | ||
; CHECK-NEXT: test_call, | ||
; CHECK-NEXT: ( | ||
; CHECK-NEXT: param0 | ||
; CHECK-NEXT: ); | ||
; CHECK-NEXT: } // callseq 0 | ||
; CHECK-NEXT: ret; | ||
call void @test_call(%struct.T undef) | ||
ret void | ||
} | ||
|
||
define void @test_store_param_def(i64 %param0, i32 %param1) { | ||
; CHECK-LABEL: test_store_param_def( | ||
; CHECK: { | ||
; CHECK-NEXT: .reg .b32 %r<6>; | ||
; CHECK-NEXT: .reg .b64 %rd<2>; | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_param_def_param_0]; | ||
; CHECK-NEXT: ld.param.u32 %r1, [test_store_param_def_param_1]; | ||
; CHECK-NEXT: { // callseq 1, 0 | ||
; CHECK-NEXT: .param .align 16 .b8 param0[32]; | ||
; CHECK-NEXT: st.param.b64 [param0+0], %rd1; | ||
; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly I'm unclear on why we expect to have a v2.b32 and v4.b32 store here? It seems like only one value in each of these struct fields has a non-undef value. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is correct, this is similar to the existing DAG combining behavior for normal stores: only when the value is completely undef is the store removed. I agree the logic could be improved to narrow the store but I think this is beyond the scope of this change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sg |
||
; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5}; | ||
; CHECK-NEXT: call.uni | ||
; CHECK-NEXT: test_call, | ||
; CHECK-NEXT: ( | ||
; CHECK-NEXT: param0 | ||
; CHECK-NEXT: ); | ||
; CHECK-NEXT: } // callseq 1 | ||
; CHECK-NEXT: ret; | ||
%V2 = insertelement <2 x i32> undef, i32 %param1, i32 1 | ||
%V4 = insertelement <4 x i32> undef, i32 %param1, i32 1 | ||
%S0 = insertvalue %struct.T undef, i64 %param0, 0 | ||
%S1 = insertvalue %struct.T %S0, <2 x i32> %V2, 1 | ||
%S2 = insertvalue %struct.T %S1, <4 x i32> %V4, 2 | ||
call void @test_call(%struct.T %S2) | ||
ret void | ||
} | ||
|
||
define void @test_store_undef(ptr %out) { | ||
; CHECK-LABEL: test_store_undef( | ||
; CHECK: { | ||
; CHECK-EMPTY: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: ret; | ||
store %struct.T undef, ptr %out | ||
ret void | ||
} | ||
|
||
define void @test_store_def(i64 %param0, i32 %param1, ptr %out) { | ||
; CHECK-LABEL: test_store_def( | ||
; CHECK: { | ||
; CHECK-NEXT: .reg .b32 %r<6>; | ||
; CHECK-NEXT: .reg .b64 %rd<3>; | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_def_param_0]; | ||
; CHECK-NEXT: ld.param.u32 %r1, [test_store_def_param_1]; | ||
; CHECK-NEXT: ld.param.u64 %rd2, [test_store_def_param_2]; | ||
; CHECK-NEXT: st.v4.u32 [%rd2+16], {%r2, %r1, %r3, %r4}; | ||
; CHECK-NEXT: st.v2.u32 [%rd2+8], {%r5, %r1}; | ||
; CHECK-NEXT: st.u64 [%rd2], %rd1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here. Why do we store the full struct when most of it is undef? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See above. |
||
; CHECK-NEXT: ret; | ||
%V2 = insertelement <2 x i32> undef, i32 %param1, i32 1 | ||
%V4 = insertelement <4 x i32> undef, i32 %param1, i32 1 | ||
%S0 = insertvalue %struct.T undef, i64 %param0, 0 | ||
%S1 = insertvalue %struct.T %S0, <2 x i32> %V2, 1 | ||
%S2 = insertvalue %struct.T %S1, <4 x i32> %V4, 2 | ||
store %struct.T %S2, ptr %out | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Up to you, but personally I feel like, we might as well write the tests manually instead of using this tool? Or, use the tool, and then delete the lines in the tests that are not relevant?
Like we don't care about the CHECK-EMPTY's and the lines containing comments.
The advantage of this is that, if we ever change the codegen format (and it does change sometimes!), we won't have to regenerate this test and then figure out if the changes are "meaningful".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's funny, @Artem-B has frequently requested that I use this tool. If it's alright with you I'll keep this test as is, though it would be nice to have consensus on what type of tests we prefer, I wonder if the auto generation script could be updated to make the checks that are generated more flexible.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Heh, okay, I defer to Art. :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Auto-generator is not perfect, but it has two major benefits:
Human-written tests can be good, but usually aren't. The effort of writing a good test falls somewhere between tedious and infeasible. I'll take a good enough automatic test, even if it's overly verbose. It also lowers the review burden as we no longer need to argue about the quality of the checks in the test. Just concentrate on what we're testing and that the generated code is valid.