Skip to content

[builtins][arm64] Build __init_cpu_features_resolver on Apple platforms #73685

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

jroelofs
Copy link
Contributor

@jroelofs jroelofs commented Nov 28, 2023

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Nov 28, 2023

@llvm/pr-subscribers-backend-x86

Author: Jon Roelofs (jroelofs)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/73685.diff

2 Files Affected:

  • (modified) compiler-rt/lib/builtins/cpu_model.c (+76-6)
  • (modified) llvm/lib/Target/X86/X86AsmPrinter.cpp (+2)
diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c
index b0ec5e51e96d491..b0306f5f36baabf 100644
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@@ -948,6 +948,8 @@ _Bool __aarch64_have_lse_atomics
 #if defined(__has_include)
 #if __has_include(<sys/auxv.h>)
 #include <sys/auxv.h>
+#define HAVE_SYS_AUXV
+#endif
 
 #if __has_include(<sys/ifunc.h>)
 #include <sys/ifunc.h>
@@ -961,6 +963,8 @@ typedef struct __ifunc_arg_t {
 
 #if __has_include(<asm/hwcap.h>)
 #include <asm/hwcap.h>
+#include HAVE_SYS_HWCAP
+#endif
 
 #if defined(__ANDROID__)
 #include <string.h>
@@ -997,6 +1001,9 @@ typedef struct __ifunc_arg_t {
 #ifndef HWCAP_SHA2
 #define HWCAP_SHA2 (1 << 6)
 #endif
+#ifndef HWCAP_CRC32
+#define HWCAP_CRC32 (1 << 7)
+#endif
 #ifndef HWCAP_ATOMICS
 #define HWCAP_ATOMICS (1 << 8)
 #endif
@@ -1149,6 +1156,7 @@ typedef struct __ifunc_arg_t {
   if (__system_property_get("ro.arch", arch) > 0 &&                            \
       strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0)
 
+#if !defined(__APPLE__)
 static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
 #if defined(__FreeBSD__)
   unsigned long hwcap;
@@ -1162,7 +1170,7 @@ static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
   zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
   __aarch64_have_lse_atomics =
       status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0;
-#else
+#elif defined(HAVE_SYS_AUXV)
   unsigned long hwcap = getauxval(AT_HWCAP);
   _Bool result = (hwcap & HWCAP_ATOMICS) != 0;
 #if defined(__ANDROID__)
@@ -1180,8 +1188,11 @@ static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
   }
 #endif // defined(__ANDROID__)
   __aarch64_have_lse_atomics = result;
+#else
+#error No support for checking for lse atomics on this platfrom yet.
 #endif // defined(__FreeBSD__)
 }
+#endif // !defined(__APPLE__)
 
 #if !defined(DISABLE_AARCH64_FMV)
 // CPUFeatures must correspond to the same AArch64 features in
@@ -1259,6 +1270,64 @@ struct {
   // As features grows new fields could be added
 } __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
 
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#if TARGET_OS_OSX || TARGET_OS_IPHONE
+#include <dispatch/dispatch.h>
+#include <sys/sysctl.h>
+
+static bool isKnownAndSupported(const char *name) {
+  int32_t val = 0;
+  size_t size = sizeof(val);
+  if (sysctlbyname(name, &val, &size, NULL, 0))
+    return false;
+  return val;
+}
+
+void __init_cpu_features_resolver(void) {
+  static dispatch_once_t onceToken = 0;
+  dispatch_once(&onceToken, ^{
+    // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
+    static struct {
+      const char *sysctl_name;
+      enum CPUFeatures feature;
+    } Features[] = {
+        {"hw.optional.arm.FEAT_FlagM", FEAT_FLAGM},
+        {"hw.optional.arm.FEAT_FlagM2", FEAT_FLAGM2},
+        {"hw.optional.arm.FEAT_FHM", FEAT_FP16FML},
+        {"hw.optional.arm.FEAT_DotProd", FEAT_DOTPROD},
+        {"hw.optional.arm.FEAT_RDM", FEAT_RDM},
+        {"hw.optional.arm.FEAT_LSE", FEAT_LSE},
+        {"hw.optional.floatingpoint", FEAT_FP},
+        {"hw.optional.AdvSIMD", FEAT_SIMD},
+        {"hw.optional.armv8_crc32", FEAT_CRC},
+        {"hw.optional.arm.FEAT_SHA1", FEAT_SHA1},
+        {"hw.optional.arm.FEAT_SHA256", FEAT_SHA2},
+        {"hw.optional.armv8_2_sha3", FEAT_SHA3},
+        {"hw.optional.arm.FEAT_AES", FEAT_AES},
+        {"hw.optional.arm.FEAT_PMULL", FEAT_PMULL},
+        {"hw.optional.arm.FEAT_FP16", FEAT_FP16},
+        {"hw.optional.arm.FEAT_JSCVT", FEAT_JSCVT},
+        {"hw.optional.arm.FEAT_FCMA", FEAT_FCMA},
+        {"hw.optional.arm.FEAT_LRCPC", FEAT_RCPC},
+        {"hw.optional.arm.FEAT_LRCPC2", FEAT_RCPC2},
+        {"hw.optional.arm.FEAT_FRINTTS", FEAT_FRINTTS},
+        {"hw.optional.arm.FEAT_I8MM", FEAT_I8MM},
+        {"hw.optional.arm.FEAT_BF16", FEAT_BF16},
+        {"hw.optional.arm.FEAT_SB", FEAT_SB},
+        {"hw.optional.arm.FEAT_SSBS", FEAT_SSBS2},
+        {"hw.optional.arm.FEAT_BTI", FEAT_BTI},
+    };
+
+    for (size_t I = 0, E = sizeof(Features) / sizeof(Features[0]); I != E; ++I)
+      if (isKnownAndSupported(Features[I].sysctl_name))
+        __aarch64_cpu_features.features |= (1ULL << Features[I].feature);
+
+    __aarch64_cpu_features.features |= (1ULL << FEAT_INIT);
+  });
+}
+#endif // TARGET_OS_OSX || TARGET_OS_IPHONE
+#else  // defined(__APPLE__)
 static void __init_cpu_features_constructor(unsigned long hwcap,
                                             const __ifunc_arg_t *arg) {
 #define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
@@ -1467,8 +1536,8 @@ void __init_cpu_features_resolver(unsigned long hwcap,
 }
 
 void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
-  unsigned long hwcap;
-  unsigned long hwcap2;
+  unsigned long hwcap = 0;
+  unsigned long hwcap2 = 0;
   // CPU features already initialized.
   if (__aarch64_cpu_features.features)
     return;
@@ -1478,7 +1547,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
   res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2);
   if (res)
     return;
-#else
+#elif defined(HAVE_SYS_AUXV)
 #if defined(__ANDROID__)
   // Don't set any CPU features,
   // detection could be wrong on Exynos 9810.
@@ -1486,6 +1555,8 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
 #endif // defined(__ANDROID__)
   hwcap = getauxval(AT_HWCAP);
   hwcap2 = getauxval(AT_HWCAP2);
+#else
+#error No support for checking hwcap on this platform yet.
 #endif // defined(__FreeBSD__)
   __ifunc_arg_t arg;
   arg._size = sizeof(__ifunc_arg_t);
@@ -1497,8 +1568,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
 #undef setCPUFeature
 #undef IF_EXYNOS9810
 }
+#endif // defined(__APPLE__)
 #endif // !defined(DISABLE_AARCH64_FMV)
 #endif // defined(__has_include)
-#endif // __has_include(<sys/auxv.h>)
-#endif // __has_include(<asm/hwcap.h>)
 #endif // defined(__aarch64__)
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 5241aa6e1c0eade..37158900d2404dd 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -556,6 +556,8 @@ void X86AsmPrinter::emitGlobalIFunc(Module &M, const GlobalIFunc &GI) {
   JMP.setOpcode(X86::JMP_4);
   JMP.addOperand(MCOperand::createExpr(lowerConstant(GI.getResolver())));
   OutStreamer->emitInstruction(JMP, *Subtarget);
+
+  // FIXME: do the manual .symbol_resolver lowering that we did in AArch64AsmPrinter.
 }
 
 static bool printAsmMRegister(const X86AsmPrinter &P, const MachineOperand &MO,

Copy link

github-actions bot commented Nov 28, 2023

:white_check_mark: With the latest revision this PR passed the C/C++ code formatter.

Created using spr 1.3.4

[skip ci]
Created using spr 1.3.4
Created using spr 1.3.4

[skip ci]
Created using spr 1.3.4
Created using spr 1.3.4

[skip ci]
Created using spr 1.3.4
Created using spr 1.3.4

[skip ci]
Created using spr 1.3.4

void __init_cpu_features_resolver(void) {
static dispatch_once_t onceToken = 0;
dispatch_once(&onceToken, ^{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not familiar with blocks. Does this introduce any dependency on libcalls? Can this reuse if (__aarch64_cpu_features.features) used by ELF?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only dependencies from the block + the dispatch_once should be on things in compiler-rt and in libSystem (which we need anyway for the sysctls). I am not sure whether we can get away with doing the if (__aarch64_cpu_features.features) check that ELF does, since the resolvers that will be calling this will happen lazily at runtime on the Darwin implementation, and therefore potentially by separate threads (unlike ELF, which would run them at load time). This function is idempotent, but I don't think that is enough of an atomicity guarantee, especially if __aarch64_cpu_features grows more fields.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation. If __init_cpu_features_resolver can be called concurrently, I think it is worth a comment, since the other __init_* functions are guaranteed to be very early in the program startup sequence and is serial.

Created using spr 1.3.4

[skip ci]
Created using spr 1.3.4
Created using spr 1.3.4

[skip ci]
Created using spr 1.3.4
Created using spr 1.3.4

[skip ci]
Created using spr 1.3.4
Created using spr 1.3.4

[skip ci]
Created using spr 1.3.4
Created using spr 1.3.4

[skip ci]
Created using spr 1.3.4
@jroelofs jroelofs changed the base branch from users/jroelofs/spr/main.builtinsarm64-build-__init_cpu_features_resolver-on-apple-platforms to main December 14, 2023 21:57
@jroelofs jroelofs merged commit 212a5e1 into main Dec 14, 2023
@jroelofs jroelofs deleted the users/jroelofs/spr/builtinsarm64-build-__init_cpu_features_resolver-on-apple-platforms branch December 14, 2023 21:57
@mstorsjo
Copy link
Member

This commit broken building compiler-rt builtins for Windows on aarch64; building now hits these errors:

llvm-project/compiler-rt/lib/builtins/cpu_model.c:1192:2: error: No support for checking for lse atomics on this platfrom yet.
 1192 | #error No support for checking for lse atomics on this platfrom yet.
      |  ^
llvm-project/compiler-rt/lib/builtins/cpu_model.c:1571:2: error: No support for checking hwcap on this platform yet.
 1571 | #error No support for checking hwcap on this platform yet.
      |  ^
2 errors generated.

Before this change, most of this whole file was ifdeffed out when building on Windows (and Apple platforms, I would presume), but now most of it is included, then hitting this #error.

I guess it could work to just remove the #error cases, but this file suffers from a pretty deep ifdef nesting jungle, so I'm not sure if that's the best solution. (FWIW, if we wanted to add aarch64 CPU feature detection for Windows here, the code would be more of a separate codepath just like the Apple case, it doesn't share the linux/BSD HWCAP style.)

I can push a quick fix, either removing the #error or reverting this commit, later during the day.

BTW, when compiling the file I also get a bunch of warnings in this style:

llvm-project/compiler-rt/lib/builtins/cpu_model.c:1448:36: warning: value size does not match register size specified by the constraint and modifier [-Wasm-operand-widths]
 1448 |     getCPUFeature(ID_AA64PFR1_EL1, ftr);
      |                                    ^
llvm-project/compiler-rt/lib/builtins/cpu_model.c:1448:5: note: use constraint modifier "w"
 1448 |     getCPUFeature(ID_AA64PFR1_EL1, ftr);
      |     ^
llvm-project/compiler-rt/lib/builtins/cpu_model.c:1345:45: note: expanded from macro 'getCPUFeature'
 1345 | #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
      |

@DavidSpickett
Copy link
Collaborator

Also seen on Linaro's Windows on Arm 2 stage bot: https://lab.llvm.org/buildbot/#/builders/120/builds/5990

DavidSpickett added a commit that referenced this pull request Dec 15, 2023
… platforms (#73685)"

And its follow up "fixup! [builtins][arm64] Build __init_cpu_features_resolver on Apple platforms (#73685)".

This reverts commit 5b47052 and
212a5e1.

Due to build failures on Windows:
```
C:\Users\Tcwg\llvm-worker\clang-arm64-windows-msvc-2stage\llvm\compiler-rt\lib\builtins\cpu_model.c(1571,2): error: No support for checking hwcap on this platform yet.
 1571 | #error No support for checking hwcap on this platform yet.
      |  ^
```
https://lab.llvm.org/buildbot/#/builders/120/builds/5990
@jroelofs
Copy link
Contributor Author

Sorry about that... thanks!

@jroelofs
Copy link
Contributor Author

BTW, when compiling the file I also get a bunch of warnings in this style:

@mstorsjo maybe unsigned long is 32 bits on that platform... what's the target triple?

@mstorsjo
Copy link
Member

BTW, when compiling the file I also get a bunch of warnings in this style:

@mstorsjo maybe unsigned long is 32 bits on that platform... what's the target triple?

Ah, indeed - yes, Windows has 32 bit longs. The triples are aarch64-windows-gnu or aarch64-windows-msvc.

@jroelofs
Copy link
Contributor Author

this file suffers from a pretty deep ifdef nesting jungle, so I'm not sure if that's the best solution

I'll refactor, and put up a new PR.

@jroelofs
Copy link
Contributor Author

Refactor here: #75635

jroelofs added a commit that referenced this pull request Dec 19, 2023
jroelofs added a commit that referenced this pull request Dec 19, 2023
jroelofs added a commit to swiftlang/llvm-project that referenced this pull request Jan 10, 2024
jroelofs added a commit to swiftlang/llvm-project that referenced this pull request Jan 10, 2024
qihangkong pushed a commit to rvgpu/rvgpu-llvm that referenced this pull request Apr 23, 2024
qihangkong pushed a commit to rvgpu/rvgpu-llvm that referenced this pull request Apr 23, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

7 participants