-
Notifications
You must be signed in to change notification settings - Fork 13.6k
AMDGPU: Add instruction flags when lowering ctor/dtor #111652
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThese should be well behaved address computations. Full diff: https://github.com/llvm/llvm-project/pull/111652.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index ea11002bb6a5fa..a774ad53b5bede 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -112,9 +112,13 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) {
Type *Int64Ty = IntegerType::getInt64Ty(C);
auto *EndPtr = IRB.CreatePtrToInt(End, Int64Ty);
auto *BeginPtr = IRB.CreatePtrToInt(Begin, Int64Ty);
- auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr);
- auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3));
- auto *Offset = IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1));
+ auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr, "", /*HasNUW=*/true,
+ /*HasNSW=*/true);
+ auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3), "",
+ /*isExact=*/true);
+ auto *Offset =
+ IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1), "", /*HasNUW=*/true,
+ /*HasNSW=*/true);
Start = IRB.CreateInBoundsGEP(
PtrArrayTy, Begin,
ArrayRef<Value *>({ConstantInt::get(Int64Ty, 0), Offset}));
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
index a87e07cb57e05e..968871af2d059a 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
@@ -64,8 +64,8 @@ define void @bar() addrspace(1) {
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
+; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
index a423b320db559d..98497a64e3204c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
@@ -79,8 +79,8 @@ define internal void @bar() {
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
+; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
diff --git a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
index 309ecb17e79ed1..a137f31c7aeeca 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
@@ -71,8 +71,8 @@ define internal void @bar.5() {
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
+; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
|
These should be well behaved address computations.
61f32fc
to
20bc27e
Compare
auto *Offset = IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1)); | ||
auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr, "", /*HasNUW=*/true, | ||
/*HasNSW=*/true); | ||
auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3), "", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this ashr actually? I assume end > start always?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's because I typed the GEP so I had to do arr[size / sizeof(void *)]
or something, don't remember exactly.
These should be well behaved address computations.