Skip to content

Commit e85fcb7

Browse files
authored
AMDGPU: Add instruction flags when lowering ctor/dtor (#111652)
These should be well behaved address computations.
1 parent f59b151 commit e85fcb7

File tree

4 files changed

+13
-9
lines changed

4 files changed

+13
-9
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,13 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) {
112112
Type *Int64Ty = IntegerType::getInt64Ty(C);
113113
auto *EndPtr = IRB.CreatePtrToInt(End, Int64Ty);
114114
auto *BeginPtr = IRB.CreatePtrToInt(Begin, Int64Ty);
115-
auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr);
116-
auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3));
117-
auto *Offset = IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1));
115+
auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr, "", /*HasNUW=*/true,
116+
/*HasNSW=*/true);
117+
auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3), "",
118+
/*isExact=*/true);
119+
auto *Offset =
120+
IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1), "", /*HasNUW=*/true,
121+
/*HasNSW=*/true);
118122
Start = IRB.CreateInBoundsGEP(
119123
PtrArrayTy, Begin,
120124
ArrayRef<Value *>({ConstantInt::get(Int64Ty, 0), Offset}));

llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ define void @bar() addrspace(1) {
6464
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
6565
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
6666
; CHECK-NEXT: entry:
67-
; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
68-
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
67+
; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
68+
; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
6969
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
7070
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start
7171
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]

llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ define internal void @bar() {
7979
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
8080
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
8181
; CHECK-NEXT: entry:
82-
; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
83-
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
82+
; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
83+
; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
8484
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
8585
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start
8686
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]

llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ define internal void @bar.5() {
7171
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
7272
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
7373
; CHECK-NEXT: entry:
74-
; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
75-
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
74+
; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
75+
; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
7676
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
7777
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start
7878
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]

0 commit comments

Comments
 (0)