Skip to content

Commit 1febd71

Browse files
authored
[NVPTX] Add TTI support for folding isspacep in InferAS (#114486)
This change enables constant folding of '`@llvm.nvvm.isspacep.*`' intrinsics if the address space can be propagated in InferAdressSpace.
1 parent d3177d8 commit 1febd71

File tree

4 files changed

+215
-24
lines changed

4 files changed

+215
-24
lines changed

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,8 @@ void NVPTXPassConfig::addAddressSpaceInferencePasses() {
302302
// be eliminated by SROA.
303303
addPass(createSROAPass());
304304
addPass(createNVPTXLowerAllocaPass());
305+
// TODO: Consider running InferAddressSpaces during opt, earlier in the
306+
// compilation flow.
305307
addPass(createInferAddressSpacesPass());
306308
addPass(createNVPTXAtomicLowerPass());
307309
}

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

Lines changed: 63 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -416,33 +416,38 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
416416
llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
417417
}
418418

419+
// Returns true/false when we know the answer, nullopt otherwise.
420+
static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
421+
if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
422+
AS == NVPTXAS::ADDRESS_SPACE_PARAM)
423+
return std::nullopt; // Got to check at run-time.
424+
switch (IID) {
425+
case Intrinsic::nvvm_isspacep_global:
426+
return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
427+
case Intrinsic::nvvm_isspacep_local:
428+
return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
429+
case Intrinsic::nvvm_isspacep_shared:
430+
return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
431+
case Intrinsic::nvvm_isspacep_shared_cluster:
432+
// We can't tell shared from shared_cluster at compile time from AS alone,
433+
// but it can't be either is AS is not shared.
434+
return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
435+
: std::optional{false};
436+
case Intrinsic::nvvm_isspacep_const:
437+
return AS == NVPTXAS::ADDRESS_SPACE_CONST;
438+
default:
439+
llvm_unreachable("Unexpected intrinsic");
440+
}
441+
}
442+
419443
// Returns an instruction pointer (may be nullptr if we do not know the answer).
420444
// Returns nullopt if `II` is not one of the `isspacep` intrinsics.
445+
//
446+
// TODO: If InferAddressSpaces were run early enough in the pipeline this could
447+
// be removed in favor of the constant folding that occurs there through
448+
// rewriteIntrinsicWithAddressSpace
421449
static std::optional<Instruction *>
422450
handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
423-
// Returns true/false when we know the answer, nullopt otherwise.
424-
auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional<bool> {
425-
if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
426-
AS == NVPTXAS::ADDRESS_SPACE_PARAM)
427-
return std::nullopt; // Got to check at run-time.
428-
switch (IID) {
429-
case Intrinsic::nvvm_isspacep_global:
430-
return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
431-
case Intrinsic::nvvm_isspacep_local:
432-
return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
433-
case Intrinsic::nvvm_isspacep_shared:
434-
return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
435-
case Intrinsic::nvvm_isspacep_shared_cluster:
436-
// We can't tell shared from shared_cluster at compile time from AS alone,
437-
// but it can't be either is AS is not shared.
438-
return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
439-
: std::optional{false};
440-
case Intrinsic::nvvm_isspacep_const:
441-
return AS == NVPTXAS::ADDRESS_SPACE_CONST;
442-
default:
443-
llvm_unreachable("Unexpected intrinsic");
444-
}
445-
};
446451

447452
switch (auto IID = II.getIntrinsicID()) {
448453
case Intrinsic::nvvm_isspacep_global:
@@ -458,7 +463,7 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
458463
if (auto *ASCO = dyn_cast<AddrSpaceCastOperator>(Op0))
459464
AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
460465

461-
if (std::optional<bool> Answer = CheckASMatch(IID, AS))
466+
if (std::optional<bool> Answer = evaluateIsSpace(IID, AS))
462467
return IC.replaceInstUsesWith(II,
463468
ConstantInt::get(II.getType(), *Answer));
464469
return nullptr; // Don't know the answer, got to check at run time.
@@ -525,3 +530,37 @@ void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
525530
TTI::PeelingPreferences &PP) {
526531
BaseT::getPeelingPreferences(L, SE, PP);
527532
}
533+
534+
bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
535+
Intrinsic::ID IID) const {
536+
switch (IID) {
537+
case Intrinsic::nvvm_isspacep_const:
538+
case Intrinsic::nvvm_isspacep_global:
539+
case Intrinsic::nvvm_isspacep_local:
540+
case Intrinsic::nvvm_isspacep_shared:
541+
case Intrinsic::nvvm_isspacep_shared_cluster: {
542+
OpIndexes.push_back(0);
543+
return true;
544+
}
545+
}
546+
return false;
547+
}
548+
549+
Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
550+
Value *OldV,
551+
Value *NewV) const {
552+
const Intrinsic::ID IID = II->getIntrinsicID();
553+
switch (IID) {
554+
case Intrinsic::nvvm_isspacep_const:
555+
case Intrinsic::nvvm_isspacep_global:
556+
case Intrinsic::nvvm_isspacep_local:
557+
case Intrinsic::nvvm_isspacep_shared:
558+
case Intrinsic::nvvm_isspacep_shared_cluster: {
559+
const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
560+
if (const auto R = evaluateIsSpace(IID, NewAS))
561+
return ConstantInt::get(II->getType(), *R);
562+
return nullptr;
563+
}
564+
}
565+
return nullptr;
566+
}

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
123123
return true;
124124
}
125125
}
126+
127+
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
128+
Intrinsic::ID IID) const;
129+
130+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
131+
Value *NewV) const;
126132
};
127133

128134
} // end namespace llvm
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes=infer-address-spaces,bdce %s | FileCheck %s
3+
4+
target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline
8+
declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline
9+
declare i1 @llvm.nvvm.isspacep.local(ptr) readnone noinline
10+
declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline
11+
declare i1 @llvm.nvvm.isspacep.cluster.shared(ptr) readnone noinline
12+
13+
define i1 @test_isspacep_const_true(ptr addrspace(4) %addr) {
14+
; CHECK-LABEL: define i1 @test_isspacep_const_true(
15+
; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
16+
; CHECK-NEXT: [[ENTRY:.*:]]
17+
; CHECK-NEXT: ret i1 true
18+
;
19+
entry:
20+
%addr0 = addrspacecast ptr addrspace(4) %addr to ptr
21+
%addr1 = getelementptr i8, ptr %addr0, i32 10
22+
%val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
23+
ret i1 %val
24+
}
25+
26+
define i1 @test_isspacep_const_false(ptr addrspace(1) %addr) {
27+
; CHECK-LABEL: define i1 @test_isspacep_const_false(
28+
; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
29+
; CHECK-NEXT: [[ENTRY:.*:]]
30+
; CHECK-NEXT: ret i1 false
31+
;
32+
entry:
33+
%addr0 = addrspacecast ptr addrspace(1) %addr to ptr
34+
%addr1 = getelementptr i8, ptr %addr0, i32 10
35+
%val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
36+
ret i1 %val
37+
}
38+
39+
define i1 @test_isspacep_global_true(ptr addrspace(1) %addr) {
40+
; CHECK-LABEL: define i1 @test_isspacep_global_true(
41+
; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
42+
; CHECK-NEXT: [[ENTRY:.*:]]
43+
; CHECK-NEXT: ret i1 true
44+
;
45+
entry:
46+
%addr0 = addrspacecast ptr addrspace(1) %addr to ptr
47+
%addr1 = getelementptr i8, ptr %addr0, i32 10
48+
%val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
49+
ret i1 %val
50+
}
51+
52+
define i1 @test_isspacep_global_false(ptr addrspace(4) %addr) {
53+
; CHECK-LABEL: define i1 @test_isspacep_global_false(
54+
; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
55+
; CHECK-NEXT: [[ENTRY:.*:]]
56+
; CHECK-NEXT: ret i1 false
57+
;
58+
entry:
59+
%addr0 = addrspacecast ptr addrspace(4) %addr to ptr
60+
%addr1 = getelementptr i8, ptr %addr0, i32 10
61+
%val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
62+
ret i1 %val
63+
}
64+
65+
define i1 @test_isspacep_local_true(ptr addrspace(5) %addr) {
66+
; CHECK-LABEL: define i1 @test_isspacep_local_true(
67+
; CHECK-SAME: ptr addrspace(5) [[ADDR:%.*]]) {
68+
; CHECK-NEXT: [[ENTRY:.*:]]
69+
; CHECK-NEXT: ret i1 true
70+
;
71+
entry:
72+
%addr0 = addrspacecast ptr addrspace(5) %addr to ptr
73+
%addr1 = getelementptr i8, ptr %addr0, i32 10
74+
%val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
75+
ret i1 %val
76+
}
77+
78+
define i1 @test_isspacep_local_false(ptr addrspace(1) %addr) {
79+
; CHECK-LABEL: define i1 @test_isspacep_local_false(
80+
; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
81+
; CHECK-NEXT: [[ENTRY:.*:]]
82+
; CHECK-NEXT: ret i1 false
83+
;
84+
entry:
85+
%addr0 = addrspacecast ptr addrspace(1) %addr to ptr
86+
%addr1 = getelementptr i8, ptr %addr0, i32 10
87+
%val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
88+
ret i1 %val
89+
}
90+
91+
define i1 @test_isspacep_shared_true(ptr addrspace(3) %addr) {
92+
; CHECK-LABEL: define i1 @test_isspacep_shared_true(
93+
; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
94+
; CHECK-NEXT: [[ENTRY:.*:]]
95+
; CHECK-NEXT: ret i1 true
96+
;
97+
entry:
98+
%addr0 = addrspacecast ptr addrspace(3) %addr to ptr
99+
%addr1 = getelementptr i8, ptr %addr0, i32 10
100+
%val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
101+
ret i1 %val
102+
}
103+
104+
define i1 @test_isspacep_shared_false(ptr addrspace(1) %addr) {
105+
; CHECK-LABEL: define i1 @test_isspacep_shared_false(
106+
; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
107+
; CHECK-NEXT: [[ENTRY:.*:]]
108+
; CHECK-NEXT: ret i1 false
109+
;
110+
entry:
111+
%addr0 = addrspacecast ptr addrspace(1) %addr to ptr
112+
%addr1 = getelementptr i8, ptr %addr0, i32 10
113+
%val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
114+
ret i1 %val
115+
}
116+
117+
define i1 @test_isspacep_cluster_shared_unsure(ptr addrspace(3) %addr) {
118+
; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_unsure(
119+
; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
120+
; CHECK-NEXT: [[ENTRY:.*:]]
121+
; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr i8, ptr addrspace(3) [[ADDR]], i32 10
122+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[ADDR1]] to ptr
123+
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr [[TMP0]])
124+
; CHECK-NEXT: ret i1 [[VAL]]
125+
;
126+
entry:
127+
%addr0 = addrspacecast ptr addrspace(3) %addr to ptr
128+
%addr1 = getelementptr i8, ptr %addr0, i32 10
129+
%val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
130+
ret i1 %val
131+
}
132+
133+
define i1 @test_isspacep_cluster_shared_false(ptr addrspace(1) %addr) {
134+
; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_false(
135+
; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
136+
; CHECK-NEXT: [[ENTRY:.*:]]
137+
; CHECK-NEXT: ret i1 false
138+
;
139+
entry:
140+
%addr0 = addrspacecast ptr addrspace(1) %addr to ptr
141+
%addr1 = getelementptr i8, ptr %addr0, i32 10
142+
%val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
143+
ret i1 %val
144+
}

0 commit comments

Comments
 (0)