Closed
Description
Trying to compile nodejs 21.1.0 with c0a7391, I see the following AVX-512 related errors, my Haswell-EP clearly doesn't support these instructions.
./deps/simdutf/simdutf.cpp:18171:26: error: always_inline function '_mm512_setzero_si512' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
18171 | __m512i currentmax = _mm512_setzero_si512();
| ^
../deps/simdutf/simdutf.cpp:18171:26: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
../deps/simdutf/simdutf.cpp:18173:20: error: always_inline function '_mm512_loadu_si512' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
18173 | __m512i in = _mm512_loadu_si512((__m512i *)buf);
| ^
../deps/simdutf/simdutf.cpp:18173:20: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
../deps/simdutf/simdutf.cpp:18174:43: error: always_inline function '_mm512_set1_epi16' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
18174 | __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
| ^
../deps/simdutf/simdutf.cpp:18174:43: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
../deps/simdutf/simdutf.cpp:18174:22: error: always_inline function '_mm512_sub_epi16' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
18174 | __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
| ^
../deps/simdutf/simdutf.cpp:18174:22: error: AVX vector argument of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
../deps/simdutf/simdutf.cpp:18176:11: error: '__builtin_ia32_ucmpw512_mask' needs target feature avx512bw,evex512
18176 | _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
| ^
/home/marcus/llvm18/lib/clang/18/include/avx512bwintrin.h:367:5: note: expanded from macro '_mm512_cmplt_epu16_mask'
367 | _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
| ^
/home/marcus/llvm18/lib/clang/18/include/avx512bwintrin.h:266:15: note: expanded from macro '_mm512_cmp_epu16_mask'
266 | ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
| ^
../deps/simdutf/simdutf.cpp:18176:41: error: always_inline function '_mm512_set1_epi16' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
18176 | _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
| ^
../deps/simdutf/simdutf.cpp:18176:41: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
../deps/simdutf/simdutf.cpp:18190:38: error: '__builtin_ia32_ucmpw512_mask' needs target feature avx512bw,evex512
18190 | __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(
| ^
/home/marcus/llvm18/lib/clang/18/include/avx512bwintrin.h:367:5: note: expanded from macro '_mm512_cmplt_epu16_mask'
367 | _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
| ^
/home/marcus/llvm18/lib/clang/18/include/avx512bwintrin.h:266:15: note: expanded from macro '_mm512_cmp_epu16_mask'
266 | ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
| ^
../deps/simdutf/simdutf.cpp:18191:21: error: always_inline function '_mm512_set1_epi16' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
18191 | diff, _mm512_set1_epi16(uint16_t(0x0400)));
| ^
../deps/simdutf/simdutf.cpp:18191:21: error: AVX vector return of type '__m512i' (vector of 8 'long long' values) without 'evex512' enabled changes the ABI
../deps/simdutf/simdutf.cpp:18234:20: error: always_inline function '_mm512_max_epu32' requires target feature 'evex512', but would be inlined into function 'detect_encodings' that is compiled without support for 'evex512'
18234 | currentmax = _mm512_max_epu32(in, currentmax);
| ^
Flags used:
export CC=clang
export CXX=clang++
export CC_LD=lld
export CXX_LD=lld
export AR=llvm-ar
export NM=llvm-nm
export STRIP=llvm-strip
export OBJCOPY=llvm-objcopy
export OBJDUMP=llvm-objdump
export READELF=llvm-readelf
export RANLIB=llvm-ranlib
export HOSTCC=clang
export HOSTCXX=clang++
export HOSTAR=llvm-ar
export CPPFLAGS="-D_FORTIFY_SOURCE=0"
export CFLAGS="-O3 -march=native -mtune=native -maes -mbmi2 -mpclmul -mllvm -inline-threshold=1000 -mllvm -polly -mllvm -polly-position=early -mllvm -polly-dependences-computeout=900000000 -mllvm -polly-detect-profitability-min-per-loop-insts=40 -mllvm -polly-tiling=true -mllvm -polly-prevect-width=256 -mllvm -polly-vectorizer=stripmine -mllvm -polly-invariant-load-hoisting -mllvm -polly-loopfusion-greedy -mllvm -polly-run-inliner -mllvm -polly-run-dce -mllvm -polly-enable-delicm=true -mllvm -polly -fmerge-all-constants -mllvm -extra-vectorizer-passes -mllvm -enable-cond-stores-vec -mllvm -slp-vectorize-hor-store -mllvm -enable-loopinterchange -mllvm -enable-loop-distribute -mllvm -enable-unroll-and-jam -mllvm -enable-loop-flatten -mllvm -interleave-small-loop-scalar-reduction -mllvm -unroll-runtime-multi-exit -mllvm -aggressive-ext-opt -fno-math-errno -fno-trapping-math -falign-functions=32 -funroll-loops -fno-semantic-interposition -fcf-protection=none -mharden-sls=none -fomit-frame-pointer -mprefer-vector-width=256 -flto=thin -fwhole-program-vtables -fsplit-lto-unit -mllvm -adce-remove-loops -mllvm -enable-ext-tsp-block-placement=1 -mllvm -enable-gvn-hoist -mllvm -enable-dfa-jump-thread -Wno-error=backend-plugin -fgnuc-version=13.2.1 -ffp-contract=fast -fdata-sections -ffunction-sections -fno-unique-section-names -fsplit-machine-functions -freroll-loops"
export CXXFLAGS="${CFLAGS} -Wp,-U_GLIBCXX_ASSERTIONS"
export LDFLAGS="-Wl,--lto-CGO3 -Wl,--gc-sections -Wl,--icf=all -Wl,--lto-O3,-O3,-Bsymbolic-functions,--as-needed -fcf-protection=none -mharden-sls=none -Wl,-mllvm,-polly -Wl,-mllvm,-polly-invariant-load-hoisting -Wl,-mllvm -Wl,-polly-loopfusion-greedy -Wl,-mllvm -Wl,-polly-run-inliner -Wl,-mllvm -Wl,-polly-run-dce -Wl,-mllvm -Wl,-polly-enable-delicm=true -Wl,-mllvm -Wl,-extra-vectorizer-passes -Wl,-mllvm -Wl,-enable-cond-stores-vec -Wl,-mllvm -Wl,-slp-vectorize-hor-store -Wl,-mllvm -Wl,-enable-loopinterchange -Wl,-mllvm -Wl,-enable-loop-distribute -Wl,-mllvm -Wl,-enable-unroll-and-jam -Wl,-mllvm -Wl,-enable-loop-flatten -Wl,-mllvm -Wl,-interleave-small-loop-scalar-reduction -Wl,-mllvm -Wl,-unroll-runtime-multi-exit -Wl,-mllvm -Wl,-aggressive-ext-opt -Wl,-mllvm -Wl,-enable-interleaved-mem-accesses -Wl,-mllvm -Wl,-enable-masked-interleaved-mem-accesses -march=native -maes -mbmi2 -mpclmul -flto=thin -fwhole-program-vtables -fuse-ld=lld -Wl,-zmax-page-size=0x200000 -freroll-loops -Wl,-mllvm -Wl,-adce-remove-loops -Wl,-mllvm -Wl,-enable-ext-tsp-block-placement=1 -Wl,-mllvm -Wl,-enable-gvn-hoist -Wl,-mllvm -Wl,-enable-dfa-jump-thread -Wl,--push-state -Wl,-whole-archive -ljemalloc_pic -Wl,--pop-state -lpthread -lstdc++ -lm -ldl -Wl,--undefined-version"
CCLDFLAGS="$LDFLAGS"
CXXLDFLAGS="$LDFLAGS"
export ASFLAGS="-D__AVX__=1 -D__AVX2__=1 -msse2avx -D__FMA__=1"
[edit: Removed the PGO-profile from the flags, it is not needed to re-produce the issue]