Skip to content

Commit c29b1aa

Browse files
committed
[NVPTX] Bugfix: fix ODR issues in NVPTXSubtarget
1 parent ec2ce73 commit c29b1aa

File tree

2 files changed

+39
-30
lines changed

2 files changed

+39
-30
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -705,12 +705,8 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
705705
AtomicOrdering Ordering = N->getSuccessOrdering();
706706
auto CodeAddrSpace = getCodeAddrSpace(N);
707707

708-
// Supports relaxed, acquire, release, weak:
709-
bool hasAtomics =
710-
Subtarget->getPTXVersion() >= 60 && Subtarget->getSmVersion() >= 70;
711-
// Supports mmio:
712-
bool hasRelaxedMMIO =
713-
Subtarget->getPTXVersion() >= 82 && Subtarget->getSmVersion() >= 70;
708+
bool HasMemoryOrdering = Subtarget->hasMemoryOrdering();
709+
bool HasRelaxedMMIO = Subtarget->hasRelaxedMMIO();
714710

715711
// TODO: lowering for SequentiallyConsistent Operations: for now, we error.
716712
// TODO: lowering for AcquireRelease Operations: for now, we error.
@@ -756,7 +752,7 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
756752
// sm_60 and older.
757753
if (!(Ordering == AtomicOrdering::NotAtomic ||
758754
Ordering == AtomicOrdering::Monotonic) &&
759-
!hasAtomics) {
755+
!HasMemoryOrdering) {
760756
SmallString<256> Msg;
761757
raw_svector_ostream OS(Msg);
762758
OS << "PTX does not support \"atomic\" for orderings different than"
@@ -769,7 +765,7 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
769765
// the volatile semantics and preserve the atomic ones. [4]: TODO: volatile
770766
// atomics with order stronger than relaxed are currently unimplemented in
771767
// sm_60 and older.
772-
if (!hasAtomics && N->isVolatile() &&
768+
if (!HasMemoryOrdering && N->isVolatile() &&
773769
!(Ordering == AtomicOrdering::NotAtomic ||
774770
Ordering == AtomicOrdering::Monotonic)) {
775771
SmallString<256> Msg;
@@ -790,7 +786,7 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
790786
CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL ||
791787
CodeAddrSpace == NVPTX::PTXLdStInstCode::SHARED);
792788
bool useRelaxedMMIO =
793-
hasRelaxedMMIO && CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL;
789+
HasRelaxedMMIO && CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL;
794790

795791
switch (Ordering) {
796792
case AtomicOrdering::NotAtomic:
@@ -803,7 +799,7 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
803799
: addrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
804800
: NVPTX::PTXLdStInstCode::NotAtomic;
805801
else
806-
return hasAtomics ? NVPTX::PTXLdStInstCode::Relaxed
802+
return HasMemoryOrdering ? NVPTX::PTXLdStInstCode::Relaxed
807803
: addrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
808804
: NVPTX::PTXLdStInstCode::NotAtomic;
809805
case AtomicOrdering::Acquire:

llvm/lib/Target/NVPTX/NVPTXSubtarget.h

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -59,42 +59,55 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
5959
NVPTXSubtarget(const Triple &TT, const std::string &CPU,
6060
const std::string &FS, const NVPTXTargetMachine &TM);
6161

62-
const TargetFrameLowering *getFrameLowering() const override {
62+
inline const TargetFrameLowering *getFrameLowering() const override {
6363
return &FrameLowering;
6464
}
65-
const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
66-
const NVPTXRegisterInfo *getRegisterInfo() const override {
65+
inline const NVPTXInstrInfo *getInstrInfo() const override {
66+
return &InstrInfo;
67+
}
68+
inline const NVPTXRegisterInfo *getRegisterInfo() const override {
6769
return &InstrInfo.getRegisterInfo();
6870
}
69-
const NVPTXTargetLowering *getTargetLowering() const override {
71+
inline const NVPTXTargetLowering *getTargetLowering() const override {
7072
return &TLInfo;
7173
}
72-
const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
74+
inline const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
7375
return &TSInfo;
7476
}
7577

76-
bool hasAtomAddF64() const { return SmVersion >= 60; }
77-
bool hasAtomScope() const { return SmVersion >= 60; }
78-
bool hasAtomBitwise64() const { return SmVersion >= 32; }
79-
bool hasAtomMinMax64() const { return SmVersion >= 32; }
80-
bool hasLDG() const { return SmVersion >= 32; }
78+
inline bool hasAtomAddF64() const { return SmVersion >= 60; }
79+
inline bool hasAtomScope() const { return SmVersion >= 60; }
80+
inline bool hasAtomBitwise64() const { return SmVersion >= 32; }
81+
inline bool hasAtomMinMax64() const { return SmVersion >= 32; }
82+
inline bool hasLDG() const { return SmVersion >= 32; }
8183
inline bool hasHWROT32() const { return SmVersion >= 32; }
8284
bool hasImageHandles() const;
83-
bool hasFP16Math() const { return SmVersion >= 53; }
84-
bool hasBF16Math() const { return SmVersion >= 80; }
85+
inline bool hasFP16Math() const { return SmVersion >= 53; }
86+
inline bool hasBF16Math() const { return SmVersion >= 80; }
8587
bool allowFP16Math() const;
86-
bool hasMaskOperator() const { return PTXVersion >= 71; }
87-
bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
88-
unsigned int getFullSmVersion() const { return FullSmVersion; }
89-
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
88+
inline bool hasMaskOperator() const { return PTXVersion >= 71; }
89+
inline bool hasNoReturn() const {
90+
return SmVersion >= 30 && PTXVersion >= 64;
91+
}
92+
// Does SM & PTX support memory orderings (weak and atomic: relaxed, acquire,
93+
// release, acq_rel, sc) ?
94+
inline bool hasMemoryOrdering() const {
95+
return SmVersion >= 70 && PTXVersion >= 60;
96+
}
97+
// Does SM & PTX support atomic relaxed MMIO operations ?
98+
inline bool hasRelaxedMMIO() const {
99+
return SmVersion >= 70 && PTXVersion >= 82;
100+
}
101+
inline unsigned int getFullSmVersion() const { return FullSmVersion; }
102+
inline unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
90103
// GPUs with "a" suffix have include architecture-accelerated features that
91104
// are supported on the specified architecture only, hence such targets do not
92105
// follow the onion layer model. hasAAFeatures() allows distinguishing such
93106
// GPU variants from the base GPU architecture.
94107
// - 0 represents base GPU model,
95108
// - non-zero value identifies particular architecture-accelerated variant.
96-
bool hasAAFeatures() const { return getFullSmVersion() % 10; }
97-
std::string getTargetName() const { return TargetName; }
109+
inline bool hasAAFeatures() const { return getFullSmVersion() % 10; }
110+
inline std::string getTargetName() const { return TargetName; }
98111

99112
// Get maximum value of required alignments among the supported data types.
100113
// From the PTX ISA doc, section 8.2.3:
@@ -103,9 +116,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
103116
// of 64 bits. Memory operations with a vector data-type are modelled as a
104117
// set of equivalent memory operations with a scalar data-type, executed in
105118
// an unspecified order on the elements in the vector.
106-
unsigned getMaxRequiredAlignment() const { return 8; }
119+
inline unsigned getMaxRequiredAlignment() const { return 8; }
107120

108-
unsigned getPTXVersion() const { return PTXVersion; }
121+
inline unsigned getPTXVersion() const { return PTXVersion; }
109122

110123
NVPTXSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
111124
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);

0 commit comments

Comments
 (0)