-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[llvm][AArch64] Support -mcpu=apple-m4 #95478
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-aarch64 Author: Jon Roelofs (jroelofs) ChangesFull diff: https://github.com/llvm/llvm-project/pull/95478.diff 4 Files Affected:
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index df8e685eb6667..c1a68a0ec5c19 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -521,7 +521,14 @@ inline constexpr CpuInfo CpuInfos[] = {
AArch64::ExtensionBitset({AArch64::AEK_AES, AArch64::AEK_SHA2,
AArch64::AEK_SHA3, AArch64::AEK_FP16,
AArch64::AEK_FP16FML})},
-
+ // Technically apple-m4 is ARMv9.2a, but a quirk of LLVM defines v9.0 as
+ // requiring SVE, which is optional according to the Arm ARM and not
+ // supported by the core. ARMv8.7a is the next closest choice.
+ {"apple-m4", ARMV8_7A,
+ AArch64::ExtensionBitset(
+ {AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_SHA3,
+ AArch64::AEK_FP16, AArch64::AEK_FP16FML, AArch64::AEK_SME,
+ AArch64::AEK_SME2, AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64})},
{"apple-s4", ARMV8_3A,
AArch64::ExtensionBitset(
{AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_FP16})},
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index c04c20c78e8eb..57df6b85ab11d 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -398,6 +398,22 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
FeatureZCRegMove,
FeatureZCZeroing]>;
+def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
+ "Apple M4", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseArithmeticLogic,
+ FeatureFuseCCSelect,
+ FeatureFuseCryptoEOR,
+ FeatureFuseLiterals,
+ FeatureZCRegMove,
+ FeatureZCZeroing
+ ]>;
+
def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
[FeatureExynosCheapAsMoveHandling,
@@ -784,6 +800,14 @@ def ProcessorFeatures {
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
FeatureHCX];
+ // Technically apple-m4 is ARMv9.2. See the corresponding comment in
+ // AArch64TargetParser.h.
+ list<SubtargetFeature> AppleM4 = [HasV8_7aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureSHA3,
+ FeatureFullFP16, FeatureFP16FML,
+ FeatureAES, FeatureBF16,
+ FeatureSME2,
+ FeatureSMEF64F64, FeatureSMEI16I64];
list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
FeaturePerfMon];
list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
@@ -1010,6 +1034,9 @@ def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16,
[TuneAppleA16]>;
def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17,
[TuneAppleA17]>;
+def : ProcessorModel<"apple-m4", CycloneModel, ProcessorFeatures.AppleM4,
+ [TuneAppleM4]>;
+
// Mac CPUs
def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
[TuneAppleA14]>;
@@ -1025,8 +1052,8 @@ def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12,
[TuneAppleA12]>;
// Alias for the latest Apple processor model supported by LLVM.
-def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA16,
- [TuneAppleA16]>;
+def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleM4,
+ [TuneAppleM4]>;
// Fujitsu A64FX
def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 2920066cfdcff..1fad1d5ca6d7d 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -182,6 +182,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
case AppleA15:
case AppleA16:
case AppleA17:
+ case AppleM4:
CacheLineSize = 64;
PrefetchDistance = 280;
MinPrefetchStride = 2048;
@@ -191,6 +192,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
case AppleA15:
case AppleA16:
case AppleA17:
+ case AppleM4:
MaxInterleaveFactor = 4;
break;
default:
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 23555dfc68dc0..ccc101e907441 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1644,6 +1644,21 @@ INSTANTIATE_TEST_SUITE_P(
AArch64::AEK_I8MM, AArch64::AEK_JSCVT, AArch64::AEK_FCMA,
AArch64::AEK_PAUTH}),
"8.6-A"),
+ ARMCPUTestParams<AArch64::ExtensionBitset>(
+ "apple-m4", "armv8.7-a", "crypto-neon-fp-armv8",
+ AArch64::ExtensionBitset(
+ {AArch64::AEK_CRC, AArch64::AEK_AES,
+ AArch64::AEK_SHA2, AArch64::AEK_SHA3,
+ AArch64::AEK_FP, AArch64::AEK_SIMD,
+ AArch64::AEK_LSE, AArch64::AEK_RAS,
+ AArch64::AEK_RDM, AArch64::AEK_RCPC,
+ AArch64::AEK_DOTPROD, AArch64::AEK_FP16,
+ AArch64::AEK_FP16FML, AArch64::AEK_BF16,
+ AArch64::AEK_I8MM, AArch64::AEK_JSCVT,
+ AArch64::AEK_FCMA, AArch64::AEK_PAUTH,
+ AArch64::AEK_SME, AArch64::AEK_SME2,
+ AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64}),
+ "8.7-A"),
ARMCPUTestParams<AArch64::ExtensionBitset>(
"apple-s4", "armv8.3-a", "crypto-neon-fp-armv8",
AArch64::ExtensionBitset(
@@ -1872,7 +1887,7 @@ INSTANTIATE_TEST_SUITE_P(
ARMCPUTestParams<AArch64::ExtensionBitset>::PrintToStringParamName);
// Note: number of CPUs includes aliases.
-static constexpr unsigned NumAArch64CPUArchs = 79;
+static constexpr unsigned NumAArch64CPUArchs = 80;
TEST(TargetParserTest, testAArch64CPUArchList) {
SmallVector<StringRef, NumAArch64CPUArchs> List;
|
@jroelofs So does the actual spec define 9.0 as requiring SVE, or just LLVM? if just llvm, can we fix this within llvm? |
As I explained in the comment, it is just an llvm quirk, and not the Arm ARM that requires this. Fixing it might be painful. Depends on how many projects are incorrectly assuming v9.0 implies SVE support at this point. |
@@ -1010,6 +1034,9 @@ def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16, | |||
[TuneAppleA16]>; | |||
def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17, | |||
[TuneAppleA17]>; | |||
def : ProcessorModel<"apple-m4", CycloneModel, ProcessorFeatures.AppleM4, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't this go below, under // Mac CPUs
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, it is an iPad CPU:
https://www.apple.com/newsroom/2024/05/apple-introduces-m4-chip/
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should probably rearrange them to remove that distinction though, so that aliases that share the same ProcessorFeatures.AppleA*
are together. I'll do that in a follow-up commit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, right - sorry I forgot about that distinction. (I guess it'll end up in macs at some point too, and I know you can't comment on that - so until then this is indeed the right sorting.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is a problem that is worth fixing long term. How to correctly model release versions and their optional dependencies. Furthermore, for the BTI example how remove features. |
|
||
// Technically apple-m4 is ARMv9.2a, but a quirk of LLVM defines v9.0 as | ||
// requiring SVE, which is optional according to the Arm ARM and not | ||
// supported by the core. ARMv8.7a is the next closest choice. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From the Arm ARM:
FEAT_SVE2 is OPTIONAL from Armv9.0.
In LLVM, SVE2 is an Implied
(read: mandatory) feature of 9.0-a (wrong), and SVE and SVE2 are both on by default for the architecture:
def HasV9_0aOps : Architecture64<9, 0, "a", "v9a",
[HasV8_5aOps, FeatureMEC, FeatureSVE2],
!listconcat(HasV8_5aOps.DefaultExts, [FeatureFullFP16, FeatureSVE,
FeatureSVE2])>;
It should be possible to remove SVE2 from the Implied
list while keeping it in the list of default extensions, which would avoid any user-facing changes.
I'm not sure why FEAT_MEC is enabled there either.
FEAT_MEC is OPTIONAL from Armv9.2.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's do that as a separate change after this one. That order will ensure this is still correct (as it can be) even after the revert, without the author of the revert having to think about this.
No description provided.