@@ -59,42 +59,55 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
59
59
NVPTXSubtarget (const Triple &TT, const std::string &CPU,
60
60
const std::string &FS, const NVPTXTargetMachine &TM);
61
61
62
- const TargetFrameLowering *getFrameLowering () const override {
62
+ inline const TargetFrameLowering *getFrameLowering () const override {
63
63
return &FrameLowering;
64
64
}
65
- const NVPTXInstrInfo *getInstrInfo () const override { return &InstrInfo; }
66
- const NVPTXRegisterInfo *getRegisterInfo () const override {
65
+ inline const NVPTXInstrInfo *getInstrInfo () const override {
66
+ return &InstrInfo;
67
+ }
68
+ inline const NVPTXRegisterInfo *getRegisterInfo () const override {
67
69
return &InstrInfo.getRegisterInfo ();
68
70
}
69
- const NVPTXTargetLowering *getTargetLowering () const override {
71
+ inline const NVPTXTargetLowering *getTargetLowering () const override {
70
72
return &TLInfo;
71
73
}
72
- const SelectionDAGTargetInfo *getSelectionDAGInfo () const override {
74
+ inline const SelectionDAGTargetInfo *getSelectionDAGInfo () const override {
73
75
return &TSInfo;
74
76
}
75
77
76
- bool hasAtomAddF64 () const { return SmVersion >= 60 ; }
77
- bool hasAtomScope () const { return SmVersion >= 60 ; }
78
- bool hasAtomBitwise64 () const { return SmVersion >= 32 ; }
79
- bool hasAtomMinMax64 () const { return SmVersion >= 32 ; }
80
- bool hasLDG () const { return SmVersion >= 32 ; }
78
+ inline bool hasAtomAddF64 () const { return SmVersion >= 60 ; }
79
+ inline bool hasAtomScope () const { return SmVersion >= 60 ; }
80
+ inline bool hasAtomBitwise64 () const { return SmVersion >= 32 ; }
81
+ inline bool hasAtomMinMax64 () const { return SmVersion >= 32 ; }
82
+ inline bool hasLDG () const { return SmVersion >= 32 ; }
81
83
inline bool hasHWROT32 () const { return SmVersion >= 32 ; }
82
84
bool hasImageHandles () const ;
83
- bool hasFP16Math () const { return SmVersion >= 53 ; }
84
- bool hasBF16Math () const { return SmVersion >= 80 ; }
85
+ inline bool hasFP16Math () const { return SmVersion >= 53 ; }
86
+ inline bool hasBF16Math () const { return SmVersion >= 80 ; }
85
87
bool allowFP16Math () const ;
86
- bool hasMaskOperator () const { return PTXVersion >= 71 ; }
87
- bool hasNoReturn () const { return SmVersion >= 30 && PTXVersion >= 64 ; }
88
- unsigned int getFullSmVersion () const { return FullSmVersion; }
89
- unsigned int getSmVersion () const { return getFullSmVersion () / 10 ; }
88
+ inline bool hasMaskOperator () const { return PTXVersion >= 71 ; }
89
+ inline bool hasNoReturn () const {
90
+ return SmVersion >= 30 && PTXVersion >= 64 ;
91
+ }
92
+ // Does SM & PTX support memory orderings (weak and atomic: relaxed, acquire,
93
+ // release, acq_rel, sc) ?
94
+ inline bool hasMemoryOrdering () const {
95
+ return SmVersion >= 70 && PTXVersion >= 60 ;
96
+ }
97
+ // Does SM & PTX support atomic relaxed MMIO operations ?
98
+ inline bool hasRelaxedMMIO () const {
99
+ return SmVersion >= 70 && PTXVersion >= 82 ;
100
+ }
101
+ inline unsigned int getFullSmVersion () const { return FullSmVersion; }
102
+ inline unsigned int getSmVersion () const { return getFullSmVersion () / 10 ; }
90
103
// GPUs with "a" suffix have include architecture-accelerated features that
91
104
// are supported on the specified architecture only, hence such targets do not
92
105
// follow the onion layer model. hasAAFeatures() allows distinguishing such
93
106
// GPU variants from the base GPU architecture.
94
107
// - 0 represents base GPU model,
95
108
// - non-zero value identifies particular architecture-accelerated variant.
96
- bool hasAAFeatures () const { return getFullSmVersion () % 10 ; }
97
- std::string getTargetName () const { return TargetName; }
109
+ inline bool hasAAFeatures () const { return getFullSmVersion () % 10 ; }
110
+ inline std::string getTargetName () const { return TargetName; }
98
111
99
112
// Get maximum value of required alignments among the supported data types.
100
113
// From the PTX ISA doc, section 8.2.3:
@@ -103,9 +116,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
103
116
// of 64 bits. Memory operations with a vector data-type are modelled as a
104
117
// set of equivalent memory operations with a scalar data-type, executed in
105
118
// an unspecified order on the elements in the vector.
106
- unsigned getMaxRequiredAlignment () const { return 8 ; }
119
+ inline unsigned getMaxRequiredAlignment () const { return 8 ; }
107
120
108
- unsigned getPTXVersion () const { return PTXVersion; }
121
+ inline unsigned getPTXVersion () const { return PTXVersion; }
109
122
110
123
NVPTXSubtarget &initializeSubtargetDependencies (StringRef CPU, StringRef FS);
111
124
void ParseSubtargetFeatures (StringRef CPU, StringRef TuneCPU, StringRef FS);
0 commit comments