Skip to content

Commit 18b6724

Browse files
committed
[OpenMP][VE] Support OpenMP runtime on VE
Support OpenMP runtime library on VE. This patch makes OpenMP compilable for VE architecture. Almost all tests run correctly on VE. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D159401
1 parent 52b4bec commit 18b6724

12 files changed

+243
-9
lines changed

openmp/runtime/CMakeLists.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD})
3030
# If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake
3131
libomp_get_architecture(LIBOMP_DETECTED_ARCH)
3232
set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING
33-
"The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64).")
33+
"The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64/ve).")
3434
# Should assertions be enabled? They are on by default.
3535
set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
3636
"enable assertions?")
@@ -63,6 +63,8 @@ else() # Part of LLVM build
6363
set(LIBOMP_ARCH riscv64)
6464
elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64")
6565
set(LIBOMP_ARCH loongarch64)
66+
elseif(LIBOMP_NATIVE_ARCH MATCHES "ve")
67+
set(LIBOMP_ARCH ve)
6668
else()
6769
# last ditch effort
6870
libomp_get_architecture(LIBOMP_ARCH)
@@ -83,7 +85,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64")
8385
endif()
8486
endif()
8587

86-
libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64)
88+
libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64 ve)
8789

8890
set(LIBOMP_LIB_TYPE normal CACHE STRING
8991
"Performance,Profiling,Stubs library (normal/profile/stubs)")
@@ -162,6 +164,7 @@ set(MIPS64 FALSE)
162164
set(MIPS FALSE)
163165
set(RISCV64 FALSE)
164166
set(LOONGARCH64 FALSE)
167+
set(VE FALSE)
165168
if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32") # IA-32 architecture
166169
set(IA32 TRUE)
167170
elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture
@@ -188,6 +191,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture
188191
set(RISCV64 TRUE)
189192
elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture
190193
set(LOONGARCH64 TRUE)
194+
elseif("${LIBOMP_ARCH}" STREQUAL "ve") # VE architecture
195+
set(VE TRUE)
191196
endif()
192197

193198
# Set some flags based on build_type

openmp/runtime/cmake/LibompGetArchitecture.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ function(libomp_get_architecture return_arch)
4949
#error ARCHITECTURE=riscv64
5050
#elif defined(__loongarch__) && __loongarch_grlen == 64
5151
#error ARCHITECTURE=loongarch64
52+
#elif defined(__ve__)
53+
#error ARCHITECTURE=ve
5254
#else
5355
#error ARCHITECTURE=UnknownArchitecture
5456
#endif

openmp/runtime/cmake/LibompUtils.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ function(libomp_get_legal_arch return_arch_string)
111111
set(${return_arch_string} "RISCV64" PARENT_SCOPE)
112112
elseif(${LOONGARCH64})
113113
set(${return_arch_string} "LOONGARCH64" PARENT_SCOPE)
114+
elseif(${VE})
115+
set(${return_arch_string} "VE" PARENT_SCOPE)
114116
else()
115117
set(${return_arch_string} "${LIBOMP_ARCH}" PARENT_SCOPE)
116118
libomp_warning_say("libomp_get_legal_arch(): Warning: Unknown architecture: Using ${LIBOMP_ARCH}")

openmp/runtime/src/kmp.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,10 @@ extern void __kmp_init_target_task();
11701170
#elif KMP_ARCH_X86_64
11711171
#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
11721172
#define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
1173+
#elif KMP_ARCH_VE
1174+
// Minimum stack size for pthread for VE is 4MB.
1175+
// https://www.hpc.nec/documents/veos/en/glibc/Difference_Points_glibc.htm
1176+
#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
11731177
#else
11741178
#define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
11751179
#endif

openmp/runtime/src/kmp_affinity.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,17 @@ class KMPHwlocAffinity : public KMPAffinity {
286286
#elif __NR_sched_getaffinity != 123
287287
#error Wrong code for getaffinity system call.
288288
#endif /* __NR_sched_getaffinity */
289+
#elif KMP_ARCH_VE
290+
#ifndef __NR_sched_setaffinity
291+
#define __NR_sched_setaffinity 203
292+
#elif __NR_sched_setaffinity != 203
293+
#error Wrong code for setaffinity system call.
294+
#endif /* __NR_sched_setaffinity */
295+
#ifndef __NR_sched_getaffinity
296+
#define __NR_sched_getaffinity 204
297+
#elif __NR_sched_getaffinity != 204
298+
#error Wrong code for getaffinity system call.
299+
#endif /* __NR_sched_getaffinity */
289300
#else
290301
#error Unknown or unsupported architecture
291302
#endif /* KMP_ARCH_* */

openmp/runtime/src/kmp_os.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ typedef unsigned long long kmp_uint64;
178178
#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
179179
#define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
180180
#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
181-
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
181+
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
182182
#define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
183183
#else
184184
#error "Can't determine size_t printf format specifier."
@@ -1043,7 +1043,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
10431043
#endif /* KMP_OS_WINDOWS */
10441044

10451045
#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \
1046-
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
1046+
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
10471047
#if KMP_OS_WINDOWS
10481048
#undef KMP_MB
10491049
#define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst)

openmp/runtime/src/kmp_platform.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
#define KMP_ARCH_MIPS64 0
9494
#define KMP_ARCH_RISCV64 0
9595
#define KMP_ARCH_LOONGARCH64 0
96+
#define KMP_ARCH_VE 0
9697

9798
#if KMP_OS_WINDOWS
9899
#if defined(_M_AMD64) || defined(__x86_64)
@@ -142,6 +143,9 @@
142143
#elif defined __loongarch__ && __loongarch_grlen == 64
143144
#undef KMP_ARCH_LOONGARCH64
144145
#define KMP_ARCH_LOONGARCH64 1
146+
#elif defined __ve__
147+
#undef KMP_ARCH_VE
148+
#define KMP_ARCH_VE 1
145149
#endif
146150
#endif
147151

@@ -206,7 +210,7 @@
206210
// TODO: Fixme - This is clever, but really fugly
207211
#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \
208212
KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \
209-
KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64)
213+
KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64 + KMP_ARCH_VE)
210214
#error Unknown or unsupported architecture
211215
#endif
212216

openmp/runtime/src/kmp_runtime.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8830,7 +8830,7 @@ __kmp_determine_reduction_method(
88308830
int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
88318831

88328832
#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8833-
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
8833+
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
88348834

88358835
#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
88368836
KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD

openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@
162162
#define ITT_ARCH_ARM64 6
163163
#endif /* ITT_ARCH_ARM64 */
164164

165+
#ifndef ITT_ARCH_VE
166+
#define ITT_ARCH_VE 8
167+
#endif /* ITT_ARCH_VE */
168+
165169
#ifndef ITT_ARCH
166170
#if defined _M_IX86 || defined __i386__
167171
#define ITT_ARCH ITT_ARCH_IA32
@@ -175,6 +179,8 @@
175179
#define ITT_ARCH ITT_ARCH_ARM64
176180
#elif defined __powerpc64__
177181
#define ITT_ARCH ITT_ARCH_PPC64
182+
#elif defined __ve__
183+
#define ITT_ARCH ITT_ARCH_VE
178184
#endif
179185
#endif
180186

openmp/runtime/src/z_Linux_asm.S

Lines changed: 195 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,6 +2060,198 @@ __kmp_invoke_microtask:
20602060

20612061
#endif /* KMP_ARCH_LOONGARCH64 */
20622062

2063+
#if KMP_ARCH_VE
2064+
2065+
//------------------------------------------------------------------------
2066+
//
2067+
// typedef void (*microtask_t)(int *gtid, int *tid, ...);
2068+
//
2069+
// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2070+
// void *p_argv[]
2071+
// #if OMPT_SUPPORT
2072+
// ,
2073+
// void **exit_frame_ptr
2074+
// #endif
2075+
// ) {
2076+
// #if OMPT_SUPPORT
2077+
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2078+
// #endif
2079+
//
2080+
// (*pkfn)(&gtid, &tid, argv[0], ...);
2081+
//
2082+
// return 1;
2083+
// }
2084+
//
2085+
// Parameters:
2086+
// s0: pkfn
2087+
// s1: gtid
2088+
// s2: tid
2089+
// s3: argc
2090+
// s4: p_argv
2091+
// s5: exit_frame_ptr
2092+
//
2093+
// Locals:
2094+
// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
2095+
// __tid: tid param pushed on stack so can pass &tid to pkfn
2096+
//
2097+
// Temp. registers:
2098+
//
2099+
// s34: used to calculate the dynamic stack size
2100+
// s35: used as temporary for stack placement calculation
2101+
// s36: used as temporary for stack arguments
2102+
// s37: used as temporary for number of remaining pkfn parms
2103+
// s38: used to traverse p_argv array
2104+
//
2105+
// return: s0 (always 1/TRUE)
2106+
//
2107+
2108+
__gtid = -4
2109+
__tid = -8
2110+
2111+
// -- Begin __kmp_invoke_microtask
2112+
// mark_begin;
2113+
.text
2114+
.globl __kmp_invoke_microtask
2115+
// A function requires 8 bytes align.
2116+
.p2align 3
2117+
.type __kmp_invoke_microtask,@function
2118+
__kmp_invoke_microtask:
2119+
.cfi_startproc
2120+
2121+
// First, save fp and lr. VE stores them at caller stack frame.
2122+
st %fp, 0(, %sp)
2123+
st %lr, 8(, %sp)
2124+
or %fp, 0, %sp
2125+
.cfi_def_cfa %fp, 0
2126+
.cfi_offset %lr, 8
2127+
.cfi_offset %fp, 0
2128+
2129+
// Compute the dynamic stack size:
2130+
//
2131+
// - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them
2132+
// by reference
2133+
// - We need 8 bytes for whole arguments. We have two + 'argc'
2134+
// arguments (condider &gtid and &tid). We need to reserve
2135+
// (argc + 2) * 8 bytes.
2136+
// - We need 176 bytes for RSA and others
2137+
//
2138+
// The total number of bytes is then (argc + 2) * 8 + 8 + 176.
2139+
//
2140+
// |------------------------------|
2141+
// | return address of callee | 8(%fp)
2142+
// |------------------------------|
2143+
// | frame pointer of callee | 0(%fp)
2144+
// |------------------------------| <------------------ %fp
2145+
// | __tid / __gtid | -8(%fp) / -4(%fp)
2146+
// |------------------------------|
2147+
// | argc+2 for arguments | 176(%sp)
2148+
// |------------------------------|
2149+
// | RSA |
2150+
// |------------------------------|
2151+
// | return address |
2152+
// |------------------------------|
2153+
// | frame pointer |
2154+
// |------------------------------| <------------------ %sp
2155+
2156+
adds.w.sx %s34, 2, %s3
2157+
sll %s34, %s34, 3
2158+
lea %s34, 184(, %s34)
2159+
subs.l %sp, %sp, %s34
2160+
2161+
// Align the stack to 16 bytes.
2162+
and %sp, -16, %sp
2163+
2164+
// Save pkfn.
2165+
or %s12, 0, %s0
2166+
2167+
// Call host to allocate stack if it is necessary.
2168+
brge.l %sp, %sl, .L_kmp_pass
2169+
ld %s61, 24(, %tp)
2170+
lea %s63, 0x13b
2171+
shm.l %s63, 0(%s61)
2172+
shm.l %sl, 8(%s61)
2173+
shm.l %sp, 16(%s61)
2174+
monc
2175+
2176+
.L_kmp_pass:
2177+
lea %s35, 176(, %sp)
2178+
adds.w.sx %s37, 0, %s3
2179+
or %s38, 0, %s4
2180+
2181+
#if OMPT_SUPPORT
2182+
// Save frame pointer into exit_frame.
2183+
st %fp, 0(%s5)
2184+
#endif
2185+
2186+
// Prepare arguments for the pkfn function (first 8 using s0-s7
2187+
// registers, but need to store stack also because of varargs).
2188+
2189+
stl %s1, __gtid(%fp)
2190+
stl %s2, __tid(%fp)
2191+
2192+
adds.l %s0, __gtid, %fp
2193+
st %s0, 0(, %s35)
2194+
adds.l %s1, __tid, %fp
2195+
st %s1, 8(, %s35)
2196+
2197+
breq.l 0, %s37, .L_kmp_call
2198+
ld %s2, 0(, %s38)
2199+
st %s2, 16(, %s35)
2200+
2201+
breq.l 1, %s37, .L_kmp_call
2202+
ld %s3, 8(, %s38)
2203+
st %s3, 24(, %s35)
2204+
2205+
breq.l 2, %s37, .L_kmp_call
2206+
ld %s4, 16(, %s38)
2207+
st %s4, 32(, %s35)
2208+
2209+
breq.l 3, %s37, .L_kmp_call
2210+
ld %s5, 24(, %s38)
2211+
st %s5, 40(, %s35)
2212+
2213+
breq.l 4, %s37, .L_kmp_call
2214+
ld %s6, 32(, %s38)
2215+
st %s6, 48(, %s35)
2216+
2217+
breq.l 5, %s37, .L_kmp_call
2218+
ld %s7, 40(, %s38)
2219+
st %s7, 56(, %s35)
2220+
2221+
breq.l 6, %s37, .L_kmp_call
2222+
2223+
// Prepare any additional argument passed through the stack.
2224+
adds.l %s37, -6, %s37
2225+
lea %s38, 48(, %s38)
2226+
lea %s35, 64(, %s35)
2227+
.L_kmp_loop:
2228+
ld %s36, 0(, %s38)
2229+
st %s36, 0(, %s35)
2230+
adds.l %s37, -1, %s37
2231+
adds.l %s38, 8, %s38
2232+
adds.l %s35, 8, %s35
2233+
brne.l 0, %s37, .L_kmp_loop
2234+
2235+
.L_kmp_call:
2236+
// Call pkfn function.
2237+
bsic %lr, (, %s12)
2238+
2239+
// Return value.
2240+
lea %s0, 1
2241+
2242+
// Restore stack and return.
2243+
or %sp, 0, %fp
2244+
ld %lr, 8(, %sp)
2245+
ld %fp, 0(, %sp)
2246+
b.l.t (, %lr)
2247+
.Lfunc_end0:
2248+
.size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2249+
.cfi_endproc
2250+
2251+
// -- End __kmp_invoke_microtask
2252+
2253+
#endif /* KMP_ARCH_VE */
2254+
20632255
#if KMP_ARCH_ARM || KMP_ARCH_MIPS
20642256
.data
20652257
COMMON .gomp_critical_user_, 32, 3
@@ -2073,7 +2265,8 @@ __kmp_unnamed_critical_addr:
20732265
#endif
20742266
#endif /* KMP_ARCH_ARM */
20752267

2076-
#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
2268+
#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \
2269+
KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
20772270
#ifndef KMP_PREFIX_UNDERSCORE
20782271
# define KMP_PREFIX_UNDERSCORE(x) x
20792272
#endif
@@ -2088,7 +2281,7 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
20882281
.size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
20892282
#endif
20902283
#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
2091-
KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 */
2284+
KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE */
20922285

20932286
#if KMP_OS_LINUX
20942287
# if KMP_ARCH_ARM || KMP_ARCH_AARCH64

openmp/runtime/src/z_Linux_util.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2456,7 +2456,7 @@ int __kmp_get_load_balance(int max) {
24562456
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \
24572457
((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \
24582458
KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
2459-
KMP_ARCH_ARM)
2459+
KMP_ARCH_ARM || KMP_ARCH_VE)
24602460

24612461
// we really only need the case with 1 argument, because CLANG always build
24622462
// a struct of pointers to shared variables referenced in the outlined function

0 commit comments

Comments
 (0)