Skip to content

Commit 2ed87db

Browse files
committed
[AArch64][SME] Add remarks to flag lazy ZA saves, and SMSTART/SMSTOP transitions
1 parent d688816 commit 2ed87db

File tree

3 files changed

+162
-1
lines changed

3 files changed

+162
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+40-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "llvm/Analysis/LoopInfo.h"
3232
#include "llvm/Analysis/MemoryLocation.h"
3333
#include "llvm/Analysis/ObjCARCUtil.h"
34+
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
3435
#include "llvm/Analysis/TargetTransformInfo.h"
3536
#include "llvm/Analysis/ValueTracking.h"
3637
#include "llvm/Analysis/VectorUtils.h"
@@ -7362,6 +7363,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
73627363
else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
73637364
CalleeAttrs = SMEAttrs(ES->getSymbol());
73647365

7366+
auto DescribeCallsite =
7367+
[&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
7368+
R << "call from '" << ore::NV("Caller", MF.getName()) << "' to '";
7369+
if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
7370+
R << ore::NV("Callee", ES->getSymbol());
7371+
else if (CLI.CB && CLI.CB->getCalledFunction())
7372+
R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName());
7373+
else
7374+
R << "unknown callee";
7375+
R << "'";
7376+
return R;
7377+
};
7378+
73657379
bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
73667380
if (RequiresLazySave) {
73677381
SDValue NumZaSaveSlices;
@@ -7388,13 +7402,38 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
73887402
ISD::INTRINSIC_VOID, DL, MVT::Other, Chain,
73897403
DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
73907404
TPIDR2ObjAddr);
7405+
OptimizationRemarkEmitter ORE(&MF.getFunction());
7406+
ORE.emit([&]() {
7407+
auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
7408+
CLI.CB)
7409+
: OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
7410+
&MF.getFunction());
7411+
DescribeCallsite(R) << " sets up a lazy save for ZA";
7412+
if (CalleeAttrs.preservesZA())
7413+
R << ", but callee preserves ZA, so we request 0 slices to be saved";
7414+
else
7415+
R << ", and we request that all slices be saved";
7416+
R << ore::setExtraArgs()
7417+
<< ore::NV("CalleePreservesZA", CalleeAttrs.preservesZA());
7418+
return R;
7419+
});
73917420
}
73927421

73937422
SDValue PStateSM;
73947423
std::optional<bool> RequiresSMChange =
73957424
CallerAttrs.requiresSMChange(CalleeAttrs);
7396-
if (RequiresSMChange)
7425+
if (RequiresSMChange) {
73977426
PStateSM = getPStateSM(DAG, Chain, CallerAttrs, DL, MVT::i64);
7427+
OptimizationRemarkEmitter ORE(&MF.getFunction());
7428+
ORE.emit([&]() {
7429+
auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMETransition",
7430+
CLI.CB)
7431+
: OptimizationRemarkAnalysis("sme", "SMETransition",
7432+
&MF.getFunction());
7433+
DescribeCallsite(R) << " requires a streaming mode transition";
7434+
return R;
7435+
});
7436+
}
73987437

73997438
// Adjust the stack pointer for the new arguments...
74007439
// These operations are automatically eliminated by the prolog/epilog pass
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s
3+
4+
declare void @private_za_callee()
5+
declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved"
6+
declare float @llvm.cos.f32(float)
7+
8+
define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
9+
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
10+
call void @private_za_callee()
11+
ret void
12+
}
13+
14+
define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
15+
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
16+
call void @private_za_callee()
17+
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
18+
call void @private_za_callee()
19+
ret void
20+
}
21+
22+
define void @test_lazy_save_preserved_callee() nounwind "aarch64_pstate_za_shared" {
23+
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_preserved_callee' to 'private_za_preserved_callee' sets up a lazy save for ZA, but callee preserves ZA, so we request 0 slices to be saved
24+
call void @private_za_preserved_callee()
25+
ret void
26+
}
27+
28+
define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" {
29+
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA, and we request that all slices be saved
30+
%res = call float @llvm.cos.f32(float %a)
31+
ret float %res
32+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve -verify-machineinstrs --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s
2+
3+
declare void @normal_callee()
4+
declare void @streaming_callee() "aarch64_pstate_sm_enabled"
5+
declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"
6+
7+
; CHECK: remark: <unknown>:0:0: call from 'normal_caller_streaming_callee' to 'streaming_callee' requires a streaming mode transition
8+
define void @normal_caller_streaming_callee() nounwind {
9+
call void @streaming_callee()
10+
ret void;
11+
}
12+
13+
; CHECK: remark: <unknown>:0:0: call from 'streaming_caller_normal_callee' to 'normal_callee' requires a streaming mode transition
14+
define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" {
15+
call void @normal_callee()
16+
ret void;
17+
}
18+
19+
; CHECK-NOT: streaming_caller_streaming_callee
20+
define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" {
21+
call void @streaming_callee()
22+
ret void;
23+
}
24+
25+
; CHECK-NOT: streaming_caller_streaming_compatible_callee
26+
define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" {
27+
call void @streaming_compatible_callee()
28+
ret void;
29+
}
30+
31+
; CHECK: remark: <unknown>:0:0: call from 'call_to_function_pointer_streaming_enabled' to 'unknown callee' requires a streaming mode transition
32+
define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind {
33+
call void %p() "aarch64_pstate_sm_enabled"
34+
ret void
35+
}
36+
37+
; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_simdfp' to 'streaming_callee' requires a streaming mode transition
38+
define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind {
39+
call void @streaming_callee()
40+
ret <4 x i32> %x;
41+
}
42+
43+
; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_sve' to 'streaming_callee' requires a streaming mode transition
44+
define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) nounwind {
45+
call void @streaming_callee()
46+
ret <vscale x 4 x i32> %x;
47+
}
48+
49+
; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_sve_duplicate' to 'streaming_callee' requires a streaming mode transition
50+
; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_sve_duplicate' to 'streaming_callee' requires a streaming mode transition
51+
define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x) nounwind {
52+
call void @streaming_callee()
53+
call void @streaming_callee()
54+
ret <vscale x 4 x i32> %x;
55+
}
56+
57+
; CHECK: remark: <unknown>:0:0: call from 'call_to_intrinsic_without_chain' to 'cos' requires a streaming mode transition
58+
define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" {
59+
entry:
60+
%res = call fast double @llvm.cos.f64(double %x)
61+
%res.fadd = fadd fast double %res, %x
62+
ret double %res.fadd
63+
}
64+
65+
declare double @llvm.cos.f64(double)
66+
67+
; CHECK: remark: <unknown>:0:0: call from 'disable_tailcallopt' to 'streaming_callee' requires a streaming mode transition
68+
define void @disable_tailcallopt() nounwind {
69+
tail call void @streaming_callee()
70+
ret void;
71+
}
72+
73+
; CHECK: remark: <unknown>:0:0: call from 'call_to_non_streaming_pass_sve_objects' to 'foo' requires a streaming mode transition
74+
define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 {
75+
entry:
76+
%Data1 = alloca <vscale x 16 x i8>, align 16
77+
%Data2 = alloca <vscale x 16 x i8>, align 16
78+
%Data3 = alloca <vscale x 16 x i8>, align 16
79+
%0 = tail call i64 @llvm.aarch64.sme.cntsb()
80+
call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
81+
%1 = load <vscale x 16 x i8>, ptr %Data1, align 16
82+
%vecext = extractelement <vscale x 16 x i8> %1, i64 0
83+
ret i8 %vecext
84+
}
85+
86+
declare i64 @llvm.aarch64.sme.cntsb()
87+
88+
declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)
89+
90+
attributes #0 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" }

0 commit comments

Comments
 (0)