Skip to content

Commit 6424abc

Browse files
authored
[AArch64] Enable AvoidLDAPUR for cpu=generic between armv8.4 and armv9.3. (#125261)
As added in #124274, CPUs in this range can suffer from performance issues with ldapur. As the gain from ldar->ldapr is expected to be greater than the minor gain from ldapr->ldapur, this opts to avoid the instruction under the default -mcpu=generic when the -march is less that armv8.8 / armv9.3. I renamed AArch64Subtarget::Others to AArch64Subtarget::Generic to be clearer what it means.
1 parent 3872e55 commit 6424abc

File tree

4 files changed

+14
-6
lines changed

4 files changed

+14
-6
lines changed

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,12 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
130130
// this in the future so we can specify it together with the subtarget
131131
// features.
132132
switch (ARMProcFamily) {
133-
case Others:
133+
case Generic:
134+
// Using TuneCPU=generic we avoid ldapur instructions to line up with the
135+
// cpus that use the AvoidLDAPUR feature. We don't want this to be on
136+
// forever, so it is enabled between armv8.4 and armv8.7/armv9.2.
137+
if (hasV8_4aOps() && !hasV8_8aOps())
138+
AvoidLDAPUR = true;
134139
break;
135140
case Carmel:
136141
CacheLineSize = 64;

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,15 @@ class Triple;
3838
class AArch64Subtarget final : public AArch64GenSubtargetInfo {
3939
public:
4040
enum ARMProcFamilyEnum : uint8_t {
41-
Others,
41+
Generic,
4242
#define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
4343
#include "llvm/TargetParser/AArch64TargetParserDef.inc"
4444
#undef ARM_PROCESSOR_FAMILY
4545
};
4646

4747
protected:
4848
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
49-
ARMProcFamilyEnum ARMProcFamily = Others;
49+
ARMProcFamilyEnum ARMProcFamily = Generic;
5050

5151
// Enable 64-bit vectorization in SLP.
5252
unsigned MinVectorRegisterBitWidth = 64;

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4282,7 +4282,7 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
42824282
// If mcpu is omitted, getProcFamily() returns AArch64Subtarget::Others, so by
42834283
// checking for that case, we can ensure that the default behaviour is
42844284
// unchanged
4285-
if (ST->getProcFamily() != AArch64Subtarget::Others &&
4285+
if (ST->getProcFamily() != AArch64Subtarget::Generic &&
42864286
!ST->getSchedModel().isOutOfOrder()) {
42874287
UP.Runtime = true;
42884288
UP.Partial = true;

llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "(?!^\s*lda.*\bsp\b)^\s*.*\bsp\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
2-
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL
3-
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
2+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.8a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL
3+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
44
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo,avoid-ldapur -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
55
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=neoverse-v2 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
66
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=neoverse-v3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
77
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
88
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x4 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
99
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x925 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
10+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
11+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.8a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
12+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9.3a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
1013

1114
define i8 @load_atomic_i8_aligned_unordered(ptr %ptr) {
1215
; CHECK-LABEL: load_atomic_i8_aligned_unordered:

0 commit comments

Comments
 (0)