Skip to content

Commit 6e1ea7e

Browse files
authored
[AArch64] Set the default streaming hazard size to 1024 for +sme,+sve (#123753)
The default for all other feature combinations remains at zero (i.e. no streaming hazards). This value may be adjusted in the future (e.g. based on the processor family), for now, it is set conservatively.
1 parent 4df6d3d commit 6e1ea7e

18 files changed

+90
-27
lines changed

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,10 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
358358
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
359359
IsLittle(LittleEndian), IsStreaming(IsStreaming),
360360
IsStreamingCompatible(IsStreamingCompatible),
361-
StreamingHazardSize(AArch64StreamingHazardSize),
361+
StreamingHazardSize(
362+
AArch64StreamingHazardSize.getNumOccurrences() > 0
363+
? std::optional<unsigned>(AArch64StreamingHazardSize)
364+
: std::nullopt),
362365
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
363366
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
364367
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
8585

8686
bool IsStreaming;
8787
bool IsStreamingCompatible;
88-
unsigned StreamingHazardSize;
88+
std::optional<unsigned> StreamingHazardSize;
8989
unsigned MinSVEVectorSizeInBits;
9090
unsigned MaxSVEVectorSizeInBits;
9191
unsigned VScaleForTuning = 1;
@@ -179,7 +179,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
179179

180180
/// Returns the size of memory region that if accessed by both the CPU and
181181
/// the SME unit could result in a hazard. 0 = disabled.
182-
unsigned getStreamingHazardSize() const { return StreamingHazardSize; }
182+
unsigned getStreamingHazardSize() const {
183+
return StreamingHazardSize.value_or(
184+
!hasSMEFA64() && hasSME() && hasSVE() ? 1024 : 0);
185+
}
183186

184187
/// Returns true if the target has NEON and the function at runtime is known
185188
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE

llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -verify-machineinstrs < %s | FileCheck %s
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -verify-machineinstrs < %s | FileCheck %s -check-prefix=OUTLINER
1+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -aarch64-streaming-hazard-size=0 -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -enable-machine-outliner -aarch64-streaming-hazard-size=0 -verify-machineinstrs < %s | FileCheck %s -check-prefix=OUTLINER
33

44
declare void @callee();
55

llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve -aarch64-disable-multivector-spill-fill -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
3-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
4-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=PAIR
2+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sme2 -mattr=+sve -aarch64-disable-multivector-spill-fill -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
3+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sme -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=NOPAIR
4+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sme2 -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=PAIR
55

66
declare void @my_func()
77
declare void @my_func2(<vscale x 16 x i8> %v)

llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc -mtriple=aarch64-darwin -mattr=+sve -mattr=+sme -enable-aarch64-sme-peephole-opt=false -verify-machineinstrs < %s | FileCheck %s
1+
; RUN: llc -mtriple=aarch64-darwin -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -enable-aarch64-sme-peephole-opt=false -verify-machineinstrs < %s | FileCheck %s
22

33
declare void @normal_callee();
44

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
2+
; RUN: llc -fast-isel=true -aarch64-streaming-hazard-size=0 -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
33
; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FISEL
4-
; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
4+
; RUN: llc -fast-isel=false -aarch64-streaming-hazard-size=0 -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
55
; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-GISEL
66

77

llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+sme < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme < %s | FileCheck %s
33

44
declare void @private_za_callee()
55
declare float @llvm.cos.f32(float)

llvm/test/CodeGen/AArch64/sme-peephole-opts.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2 < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme2 < %s | FileCheck %s
33

44
declare void @callee()
55
declare void @callee_farg(float)

llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc < %s | FileCheck %s
2+
; RUN: llc -aarch64-streaming-hazard-size=0 < %s | FileCheck %s
33

44
target triple = "aarch64-unknown-unknown-eabi-elf"
55

llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
33

44
declare void @normal_callee();
55
declare void @streaming_callee() "aarch64_pstate_sm_enabled";

llvm/test/CodeGen/AArch64/sme-streaming-body.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
33

44
declare void @normal_callee();
55
declare void @streaming_callee() "aarch64_pstate_sm_enabled";

llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -mattr=+sme < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -verify-machineinstrs -mattr=+sve -mattr=+sme < %s | FileCheck %s
33

44
; This file tests the following combinations related to streaming-enabled functions:
55
; [ ] N -> SC (Normal -> Streaming-compatible)

llvm/test/CodeGen/AArch64/sme-streaming-interface.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
33

44
; This file tests the following combinations related to streaming-enabled functions:
55
; [ ] N -> S (Normal -> Streaming)

llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc < %s | FileCheck %s
2+
; RUN: llc -aarch64-streaming-hazard-size=0 < %s | FileCheck %s
33

44
target triple = "aarch64"
55

llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=FP-CHECK
1+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=FP-CHECK
33
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=NO-SVE-CHECK
4-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs -enable-machine-outliner < %s | FileCheck %s --check-prefix=OUTLINER-CHECK
4+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs -enable-machine-outliner < %s | FileCheck %s --check-prefix=OUTLINER-CHECK
55

66
declare void @callee();
77
declare void @fixed_callee(<4 x i32>);
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefix=CHECK0
3+
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefix=CHECK1024
4+
5+
;; The following run lines check the default values for aarch64-stack-hazard-size/aarch64-streaming-hazard-size.
6+
7+
;; When +sme,+sve is set the hazard size should default to 1024.
8+
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -mattr=+sve | FileCheck %s --check-prefix=CHECK1024
9+
10+
;; The hazard size can still be overridden/disabled when +sme,+sve is set.
11+
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme -mattr=+sve -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefix=CHECK0
12+
13+
;; When +sme-fa64 is set alongside +sme,+sve the default hazard size should be 0.
14+
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme-fa64 -mattr=+sme -mattr=+sve | FileCheck %s --check-prefix=CHECK0
15+
16+
;; When +sme is set (without +sve) the default hazard size should be 0.
17+
; RUN: llc < %s -mtriple=aarch64 -mattr=+sme | FileCheck %s --check-prefix=CHECK0
18+
19+
define i32 @spill_fpr_with_gpr_stack_object(i64 %d) "aarch64_pstate_sm_compatible" {
20+
; CHECK0-LABEL: spill_fpr_with_gpr_stack_object:
21+
; CHECK0: // %bb.0: // %entry
22+
; CHECK0-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
23+
; CHECK0-NEXT: .cfi_def_cfa_offset 16
24+
; CHECK0-NEXT: .cfi_offset b8, -16
25+
; CHECK0-NEXT: mov x8, x0
26+
; CHECK0-NEXT: mov w0, wzr
27+
; CHECK0-NEXT: //APP
28+
; CHECK0-NEXT: //NO_APP
29+
; CHECK0-NEXT: str x8, [sp, #8]
30+
; CHECK0-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
31+
; CHECK0-NEXT: ret
32+
;
33+
; CHECK1024-LABEL: spill_fpr_with_gpr_stack_object:
34+
; CHECK1024: // %bb.0: // %entry
35+
; CHECK1024-NEXT: sub sp, sp, #1040
36+
; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill
37+
; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill
38+
; CHECK1024-NEXT: sub sp, sp, #1040
39+
; CHECK1024-NEXT: .cfi_def_cfa_offset 2080
40+
; CHECK1024-NEXT: .cfi_offset w29, -8
41+
; CHECK1024-NEXT: .cfi_offset b8, -1040
42+
; CHECK1024-NEXT: mov x8, x0
43+
; CHECK1024-NEXT: mov w0, wzr
44+
; CHECK1024-NEXT: //APP
45+
; CHECK1024-NEXT: //NO_APP
46+
; CHECK1024-NEXT: str x8, [sp, #8]
47+
; CHECK1024-NEXT: add sp, sp, #1040
48+
; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload
49+
; CHECK1024-NEXT: ldr d8, [sp] // 8-byte Folded Reload
50+
; CHECK1024-NEXT: add sp, sp, #1040
51+
; CHECK1024-NEXT: ret
52+
entry:
53+
%a = alloca i64
54+
tail call void asm sideeffect "", "~{d8}"() #1
55+
store i64 %d, ptr %a
56+
ret i32 0
57+
}

llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
3-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=CHECK-NO-SME-ROUTINES
4-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-MOPS
2+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK
3+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=CHECK-NO-SME-ROUTINES
4+
; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-MOPS
55

66
@dst = global [512 x i8] zeroinitializer, align 1
77
@src = global [512 x i8] zeroinitializer, align 1

llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 | FileCheck %s --check-prefixes=CHECK
3-
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT
2+
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-streaming-hazard-size=0 | FileCheck %s --check-prefixes=CHECK
3+
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-streaming-hazard-size=0 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT
44

55
; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64
66
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8

0 commit comments

Comments
 (0)