Skip to content

Commit 340054e

Browse files
[AArch64][SME] Remove combination of private-ZA and preserves_za. (#78563)
The new Clang attributes no longer support the combination of having a private-ZA function that preserves ZA. The use of __arm_preserves("za") means that ZA is shared and preserved. There wasn't that much benefit to the special handling of this, because in practice it only meant that we'd avoid restoring the lazy-save afterwards, but it still needed setting up a lazy-save (with the possibility of using a 0-sized buffer). Perhaps a new attribute will be added in the future to support this case, at which point we can revert back some of the changes removed in this patch. But for now removing this code simplifies things.
1 parent 42fb1fa commit 340054e

File tree

3 files changed

+34
-103
lines changed

3 files changed

+34
-103
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 30 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7609,23 +7609,15 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
76097609

76107610
bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
76117611
if (RequiresLazySave) {
7612-
SDValue NumZaSaveSlices;
7613-
if (!CalleeAttrs.preservesZA()) {
7614-
// Set up a lazy save mechanism by storing the runtime live slices
7615-
// (worst-case SVL) to the TPIDR2 stack object.
7616-
NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
7617-
DAG.getConstant(1, DL, MVT::i32));
7618-
} else if (CalleeAttrs.preservesZA()) {
7619-
NumZaSaveSlices = DAG.getConstant(0, DL, MVT::i64);
7620-
}
7621-
76227612
unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj();
76237613
MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj);
76247614
SDValue TPIDR2ObjAddr = DAG.getFrameIndex(TPIDR2Obj,
76257615
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
76267616
SDValue NumZaSaveSlicesAddr =
76277617
DAG.getNode(ISD::ADD, DL, TPIDR2ObjAddr.getValueType(), TPIDR2ObjAddr,
76287618
DAG.getConstant(8, DL, TPIDR2ObjAddr.getValueType()));
7619+
SDValue NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
7620+
DAG.getConstant(1, DL, MVT::i32));
76297621
Chain = DAG.getTruncStore(Chain, DL, NumZaSaveSlices, NumZaSaveSlicesAddr,
76307622
MPI, MVT::i16);
76317623
Chain = DAG.getNode(
@@ -7638,14 +7630,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
76387630
CLI.CB)
76397631
: OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
76407632
&MF.getFunction());
7641-
DescribeCallsite(R) << " sets up a lazy save for ZA";
7642-
if (CalleeAttrs.preservesZA())
7643-
R << ", but callee preserves ZA, so we request 0 slices to be saved";
7644-
else
7645-
R << ", and we request that all slices be saved";
7646-
R << ore::setExtraArgs()
7647-
<< ore::NV("CalleePreservesZA", CalleeAttrs.preservesZA());
7648-
return R;
7633+
return DescribeCallsite(R) << " sets up a lazy save for ZA";
76497634
});
76507635
}
76517636

@@ -8081,34 +8066,33 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
80818066
}
80828067

80838068
if (RequiresLazySave) {
8084-
if (!CalleeAttrs.preservesZA()) {
8085-
// Unconditionally resume ZA.
8086-
Result = DAG.getNode(
8087-
AArch64ISD::SMSTART, DL, MVT::Other, Result,
8088-
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
8089-
DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
8090-
8091-
// Conditionally restore the lazy save using a pseudo node.
8092-
unsigned FI = FuncInfo->getLazySaveTPIDR2Obj();
8093-
SDValue RegMask = DAG.getRegisterMask(
8094-
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
8095-
SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
8096-
"__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout()));
8097-
SDValue TPIDR2_EL0 = DAG.getNode(
8098-
ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result,
8099-
DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
8100-
8101-
// Copy the address of the TPIDR2 block into X0 before 'calling' the
8102-
// RESTORE_ZA pseudo.
8103-
SDValue Glue;
8104-
SDValue TPIDR2Block = DAG.getFrameIndex(
8105-
FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
8106-
Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue);
8107-
Result = DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other,
8108-
{Result, TPIDR2_EL0,
8109-
DAG.getRegister(AArch64::X0, MVT::i64),
8110-
RestoreRoutine, RegMask, Result.getValue(1)});
8111-
}
8069+
// Unconditionally resume ZA.
8070+
Result = DAG.getNode(
8071+
AArch64ISD::SMSTART, DL, MVT::Other, Result,
8072+
DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32),
8073+
DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64));
8074+
8075+
// Conditionally restore the lazy save using a pseudo node.
8076+
unsigned FI = FuncInfo->getLazySaveTPIDR2Obj();
8077+
SDValue RegMask = DAG.getRegisterMask(
8078+
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
8079+
SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
8080+
"__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout()));
8081+
SDValue TPIDR2_EL0 = DAG.getNode(
8082+
ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result,
8083+
DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
8084+
8085+
// Copy the address of the TPIDR2 block into X0 before 'calling' the
8086+
// RESTORE_ZA pseudo.
8087+
SDValue Glue;
8088+
SDValue TPIDR2Block = DAG.getFrameIndex(
8089+
FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
8090+
Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue);
8091+
Result =
8092+
DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other,
8093+
{Result, TPIDR2_EL0, DAG.getRegister(AArch64::X0, MVT::i64),
8094+
RestoreRoutine, RegMask, Result.getValue(1)});
8095+
81128096
// Finally reset the TPIDR2_EL0 register to 0.
81138097
Result = DAG.getNode(
81148098
ISD::INTRINSIC_VOID, DL, MVT::Other, Result,

llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,24 @@
22
; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s
33

44
declare void @private_za_callee()
5-
declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved"
65
declare float @llvm.cos.f32(float)
76

87
define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
9-
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
8+
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA
109
call void @private_za_callee()
1110
ret void
1211
}
1312

1413
define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
15-
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
14+
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA
1615
call void @private_za_callee()
17-
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
16+
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA
1817
call void @private_za_callee()
1918
ret void
2019
}
2120

22-
define void @test_lazy_save_preserved_callee() nounwind "aarch64_pstate_za_shared" {
23-
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_preserved_callee' to 'private_za_preserved_callee' sets up a lazy save for ZA, but callee preserves ZA, so we request 0 slices to be saved
24-
call void @private_za_preserved_callee()
25-
ret void
26-
}
27-
2821
define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" {
29-
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA, and we request that all slices be saved
22+
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA
3023
%res = call float @llvm.cos.f32(float %a)
3124
ret float %res
3225
}

llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
; RUN: llc -mtriple=aarch64 -mattr=+sme < %s | FileCheck %s
33

44
declare void @private_za_callee()
5-
declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved"
65
declare float @llvm.cos.f32(float)
76

87
; Test lazy-save mechanism for a single callee.
@@ -170,48 +169,3 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z
170169
call void @private_za_callee()
171170
ret void
172171
}
173-
174-
175-
; Test lazy-save mechanism for an aarch64_pstate_za_shared caller
176-
; calling a callee with aarch64_pstate_za_preserved.
177-
define void @za_shared_caller_za_preserved_callee() nounwind "aarch64_pstate_za_shared" "aarch64_pstate_sm_compatible" {
178-
; CHECK-LABEL: za_shared_caller_za_preserved_callee:
179-
; CHECK: // %bb.0:
180-
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
181-
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
182-
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
183-
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
184-
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
185-
; CHECK-NEXT: add x29, sp, #64
186-
; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
187-
; CHECK-NEXT: sub sp, sp, #16
188-
; CHECK-NEXT: rdsvl x8, #1
189-
; CHECK-NEXT: mov x9, sp
190-
; CHECK-NEXT: msub x8, x8, x8, x9
191-
; CHECK-NEXT: mov sp, x8
192-
; CHECK-NEXT: sub x9, x29, #80
193-
; CHECK-NEXT: stp x8, xzr, [x29, #-80]
194-
; CHECK-NEXT: msr TPIDR2_EL0, x9
195-
; CHECK-NEXT: bl __arm_sme_state
196-
; CHECK-NEXT: and x19, x0, #0x1
197-
; CHECK-NEXT: tbz w19, #0, .LBB4_2
198-
; CHECK-NEXT: // %bb.1:
199-
; CHECK-NEXT: smstop sm
200-
; CHECK-NEXT: .LBB4_2:
201-
; CHECK-NEXT: bl private_za_preserved_callee
202-
; CHECK-NEXT: tbz w19, #0, .LBB4_4
203-
; CHECK-NEXT: // %bb.3:
204-
; CHECK-NEXT: smstart sm
205-
; CHECK-NEXT: .LBB4_4:
206-
; CHECK-NEXT: msr TPIDR2_EL0, xzr
207-
; CHECK-NEXT: sub sp, x29, #64
208-
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
209-
; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
210-
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
211-
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
212-
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
213-
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
214-
; CHECK-NEXT: ret
215-
call void @private_za_preserved_callee()
216-
ret void
217-
}

0 commit comments

Comments
 (0)