Skip to content

Commit 89a2bef

Browse files
[libomptarget][nvptx] Fix build, symbol ordering in target_impl.h
1 parent aaa5a5e commit 89a2bef

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,6 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
101101
return __lanemask_gt();
102102
}
103103

104-
EXTERN bool __kmpc_impl_is_first_active_thread();
105-
106104
INLINE uint32_t __kmpc_impl_smid() {
107105
return __smid();
108106
}
@@ -126,6 +124,8 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
126124
return __ballot64(1);
127125
}
128126

127+
EXTERN bool __kmpc_impl_is_first_active_thread();
128+
129129
EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
130130
int32_t SrcLane);
131131

openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,6 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
9494
return res;
9595
}
9696

97-
// Return true if this is the first active thread in the warp.
98-
INLINE bool __kmpc_impl_is_first_active_thread() {
99-
unsigned long long Mask = __kmpc_impl_activemask();
100-
unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
101-
unsigned long long Sh = Mask << ShNum;
102-
// Truncate Sh to the 32 lower bits
103-
return (unsigned)Sh == 0;
104-
}
105-
10697
INLINE uint32_t __kmpc_impl_smid() {
10798
uint32_t id;
10899
asm("mov.u32 %0, %%smid;" : "=r"(id));
@@ -142,6 +133,15 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
142133
#endif
143134
}
144135

136+
// Return true if this is the first active thread in the warp.
137+
INLINE bool __kmpc_impl_is_first_active_thread() {
138+
unsigned long long Mask = __kmpc_impl_activemask();
139+
unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
140+
unsigned long long Sh = Mask << ShNum;
141+
// Truncate Sh to the 32 lower bits
142+
return (unsigned)Sh == 0;
143+
}
144+
145145
// In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
146146

147147
INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,

0 commit comments

Comments
 (0)