Skip to content

Commit 070797b

Browse files
committed
[AArch64][PAC] Eliminate excessive MOVs when computing blend
As function calls do not generally preserve X16 and X17, it is beneficial to allow AddrDisc operand of B(L)RA instruction to reside in these registers and make use of this condition when computing the discriminator. This can save up to two MOVs in cases such as loading a (signed) virtual function pointer via a (signed) pointer to vtable, for example ldr x9, [x16] mov x8, x16 mov x17, x8 movk x17, #34646, lsl #48 blraa x9, x17 can be simplified to ldr x8, [x16] movk x16, #34646, lsl #48 blraa x8, x16
1 parent 6fe7ad8 commit 070797b

File tree

3 files changed

+89
-50
lines changed

3 files changed

+89
-50
lines changed

llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp

Lines changed: 53 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,15 @@ class AArch64AsmPrinter : public AsmPrinter {
162162
// Emit the sequence for AUT or AUTPAC.
163163
void emitPtrauthAuthResign(const MachineInstr *MI);
164164

165-
// Emit the sequence to compute a discriminator into x17, or reuse AddrDisc.
166-
unsigned emitPtrauthDiscriminator(uint16_t Disc, unsigned AddrDisc);
165+
// Emit the sequence to compute the discriminator.
166+
// ScratchReg should be x16/x17.
167+
// The returned register is either unmodified AddrDisc or x16/x17.
168+
// If the expanded pseudo is allowed to clobber AddrDisc register, setting
169+
// MayUseAddrAsScratch may save one MOV instruction, provided the address
170+
// is already in x16/x17.
171+
Register emitPtrauthDiscriminator(uint16_t Disc, Register AddrDisc,
172+
Register ScratchReg,
173+
bool MayUseAddrAsScratch = false);
167174

168175
// Emit the sequence for LOADauthptrstatic
169176
void LowerLOADauthptrstatic(const MachineInstr &MI);
@@ -1726,8 +1733,10 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
17261733
}
17271734
}
17281735

1729-
unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
1730-
unsigned AddrDisc) {
1736+
Register AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
1737+
Register AddrDisc,
1738+
Register ScratchReg,
1739+
bool MayUseAddrAsScratch) {
17311740
// So far we've used NoRegister in pseudos. Now we need real encodings.
17321741
if (AddrDisc == AArch64::NoRegister)
17331742
AddrDisc = AArch64::XZR;
@@ -1737,16 +1746,24 @@ unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
17371746
if (!Disc)
17381747
return AddrDisc;
17391748

1740-
// If there's only a constant discriminator, MOV it into x17.
1749+
// If there's only a constant discriminator, MOV it into the scratch register.
17411750
if (AddrDisc == AArch64::XZR) {
1742-
emitMOVZ(AArch64::X17, Disc, 0);
1743-
return AArch64::X17;
1751+
emitMOVZ(ScratchReg, Disc, 0);
1752+
return ScratchReg;
17441753
}
17451754

1746-
// If there are both, emit a blend into x17.
1747-
emitMovXReg(AArch64::X17, AddrDisc);
1748-
emitMOVK(AArch64::X17, Disc, 48);
1749-
return AArch64::X17;
1755+
// If there are both, emit a blend into the scratch register.
1756+
1757+
// Check if we can save one MOV instruction.
1758+
assert(MayUseAddrAsScratch || ScratchReg != AddrDisc);
1759+
bool AddrDiscIsSafe = AddrDisc == AArch64::X16 || AddrDisc == AArch64::X17;
1760+
if (MayUseAddrAsScratch && AddrDiscIsSafe)
1761+
ScratchReg = AddrDisc;
1762+
else
1763+
emitMovXReg(ScratchReg, AddrDisc);
1764+
1765+
emitMOVK(ScratchReg, Disc, 48);
1766+
return ScratchReg;
17501767
}
17511768

17521769
/// Emits a code sequence to check an authenticated pointer value.
@@ -1963,7 +1980,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
19631980

19641981
// Compute aut discriminator into x17
19651982
assert(isUInt<16>(AUTDisc));
1966-
unsigned AUTDiscReg = emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc);
1983+
Register AUTDiscReg =
1984+
emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc, AArch64::X17);
19671985
bool AUTZero = AUTDiscReg == AArch64::XZR;
19681986
unsigned AUTOpc = getAUTOpcodeForKey(AUTKey, AUTZero);
19691987

@@ -2004,7 +2022,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
20042022

20052023
// Compute pac discriminator into x17
20062024
assert(isUInt<16>(PACDisc));
2007-
unsigned PACDiscReg = emitPtrauthDiscriminator(PACDisc, PACAddrDisc);
2025+
Register PACDiscReg =
2026+
emitPtrauthDiscriminator(PACDisc, PACAddrDisc, AArch64::X17);
20082027
bool PACZero = PACDiscReg == AArch64::XZR;
20092028
unsigned PACOpc = getPACOpcodeForKey(PACKey, PACZero);
20102029

@@ -2036,8 +2055,17 @@ void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
20362055

20372056
unsigned AddrDisc = MI->getOperand(3).getReg();
20382057

2039-
// Compute discriminator into x17
2040-
unsigned DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc);
2058+
// Make sure AddrDisc is solely used to compute the discriminator.
2059+
// While hardly meaningful, it is still possible to describe an authentication
2060+
// of a pointer against its own value (instead of storage address) with
2061+
// intrinsics, so use report_fatal_error instead of assert.
2062+
if (BrTarget == AddrDisc)
2063+
report_fatal_error("Branch target is signed with its own value");
2064+
2065+
// x16 and x17 are implicit-def'ed by MI, and AddrDisc is not used as any
2066+
// other input, so try to save one MOV by setting MayUseAddrAsScratch.
2067+
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, AArch64::X17,
2068+
/*MayUseAddrAsScratch=*/true);
20412069
bool IsZeroDisc = DiscReg == AArch64::XZR;
20422070

20432071
unsigned Opc;
@@ -2331,16 +2359,7 @@ void AArch64AsmPrinter::LowerMOVaddrPAC(const MachineInstr &MI) {
23312359
}
23322360
}
23332361

2334-
unsigned DiscReg = AddrDisc;
2335-
if (Disc != 0) {
2336-
if (AddrDisc != AArch64::XZR) {
2337-
emitMovXReg(AArch64::X17, AddrDisc);
2338-
emitMOVK(AArch64::X17, Disc, 48);
2339-
} else {
2340-
emitMOVZ(AArch64::X17, Disc, 0);
2341-
}
2342-
DiscReg = AArch64::X17;
2343-
}
2362+
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, AArch64::X17);
23442363

23452364
auto MIB = MCInstBuilder(getPACOpcodeForKey(Key, DiscReg == AArch64::XZR))
23462365
.addReg(AArch64::X16)
@@ -2608,6 +2627,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
26082627
// instruction here.
26092628
case AArch64::AUTH_TCRETURN:
26102629
case AArch64::AUTH_TCRETURN_BTI: {
2630+
Register Callee = MI->getOperand(0).getReg();
26112631
const uint64_t Key = MI->getOperand(2).getImm();
26122632
assert((Key == AArch64PACKey::IA || Key == AArch64PACKey::IB) &&
26132633
"Invalid auth key for tail-call return");
@@ -2617,31 +2637,23 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
26172637

26182638
Register AddrDisc = MI->getOperand(4).getReg();
26192639

2620-
Register ScratchReg = MI->getOperand(0).getReg() == AArch64::X16
2621-
? AArch64::X17
2622-
: AArch64::X16;
2640+
Register ScratchReg = Callee == AArch64::X16 ? AArch64::X17 : AArch64::X16;
26232641

26242642
emitPtrauthTailCallHardening(MI);
26252643

2626-
unsigned DiscReg = AddrDisc;
2627-
if (Disc) {
2628-
if (AddrDisc != AArch64::NoRegister) {
2629-
if (ScratchReg != AddrDisc)
2630-
emitMovXReg(ScratchReg, AddrDisc);
2631-
emitMOVK(ScratchReg, Disc, 48);
2632-
} else {
2633-
emitMOVZ(ScratchReg, Disc, 0);
2634-
}
2635-
DiscReg = ScratchReg;
2636-
}
2644+
// See the comments in emitPtrauthBranch.
2645+
if (Callee == AddrDisc)
2646+
report_fatal_error("Call target is signed with its own value");
2647+
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, ScratchReg,
2648+
/*MayUseAddrAsScratch=*/true);
26372649

2638-
const bool IsZero = DiscReg == AArch64::NoRegister;
2650+
const bool IsZero = DiscReg == AArch64::XZR;
26392651
const unsigned Opcodes[2][2] = {{AArch64::BRAA, AArch64::BRAAZ},
26402652
{AArch64::BRAB, AArch64::BRABZ}};
26412653

26422654
MCInst TmpInst;
26432655
TmpInst.setOpcode(Opcodes[Key][IsZero]);
2644-
TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
2656+
TmpInst.addOperand(MCOperand::createReg(Callee));
26452657
if (!IsZero)
26462658
TmpInst.addOperand(MCOperand::createReg(DiscReg));
26472659
EmitToStreamer(*OutStreamer, TmpInst);

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1840,36 +1840,36 @@ let Predicates = [HasPAuth] in {
18401840
// materialization here), in part because they're handled in a safer way by
18411841
// the kernel, notably on Darwin.
18421842
def BLRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1843-
GPR64noip:$AddrDisc),
1843+
GPR64:$AddrDisc),
18441844
[(AArch64authcall GPR64noip:$Rn, timm:$Key, timm:$Disc,
1845-
GPR64noip:$AddrDisc)]>, Sched<[]> {
1845+
GPR64:$AddrDisc)]>, Sched<[]> {
18461846
let isCodeGenOnly = 1;
18471847
let hasSideEffects = 1;
18481848
let mayStore = 0;
18491849
let mayLoad = 0;
18501850
let isCall = 1;
18511851
let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1852-
let Defs = [X17,LR];
1852+
let Defs = [X16,X17,LR];
18531853
let Uses = [SP];
18541854
}
18551855

18561856
def BLRA_RVMARKER : Pseudo<
18571857
(outs), (ins i64imm:$rvfunc, GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1858-
GPR64noip:$AddrDisc),
1858+
GPR64:$AddrDisc),
18591859
[(AArch64authcall_rvmarker tglobaladdr:$rvfunc,
18601860
GPR64noip:$Rn, timm:$Key, timm:$Disc,
1861-
GPR64noip:$AddrDisc)]>, Sched<[]> {
1861+
GPR64:$AddrDisc)]>, Sched<[]> {
18621862
let isCodeGenOnly = 1;
18631863
let isCall = 1;
1864-
let Defs = [X17,LR];
1864+
let Defs = [X16,X17,LR];
18651865
let Uses = [SP];
18661866
}
18671867

18681868
// BRA pseudo, generalized version of BRAA/BRAB/Z.
18691869
// This directly manipulates x16/x17, which are the only registers the OS
18701870
// guarantees are safe to use for sensitive operations.
18711871
def BRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1872-
GPR64noip:$AddrDisc), []>, Sched<[]> {
1872+
GPR64:$AddrDisc), []>, Sched<[]> {
18731873
let isCodeGenOnly = 1;
18741874
let hasNoSchedulingInfo = 1;
18751875
let hasSideEffects = 1;
@@ -1880,7 +1880,7 @@ let Predicates = [HasPAuth] in {
18801880
let isBarrier = 1;
18811881
let isIndirectBranch = 1;
18821882
let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1883-
let Defs = [X17];
1883+
let Defs = [X16,X17];
18841884
}
18851885

18861886
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
@@ -1971,7 +1971,7 @@ let Predicates = [HasPAuth] in {
19711971
// make sure at least one register is usable as a scratch one - for that
19721972
// purpose, use tcGPRnotx16x17 register class for one of the operands.
19731973
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Size = 16,
1974-
Uses = [SP] in {
1974+
Defs = [X16,X17], Uses = [SP] in {
19751975
def AUTH_TCRETURN
19761976
: Pseudo<(outs), (ins tcGPRnotx16x17:$dst, i32imm:$FPDiff, i32imm:$Key,
19771977
i64imm:$Disc, tcGPR64:$AddrDisc),

llvm/test/CodeGen/AArch64/ptrauth-call.ll

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,33 @@ define void @test_tailcall_omit_mov_x16_x16(ptr %objptr) #0 {
188188
ret void
189189
}
190190

191+
define i32 @test_call_omit_extra_moves(ptr %objptr) #0 {
192+
; CHECK-LABEL: test_call_omit_extra_moves:
193+
; DARWIN-NEXT: stp x29, x30, [sp, #-16]!
194+
; ELF-NEXT: str x30, [sp, #-16]!
195+
; CHECK-NEXT: ldr x16, [x0]
196+
; CHECK-NEXT: mov x17, x0
197+
; CHECK-NEXT: movk x17, #6503, lsl #48
198+
; CHECK-NEXT: autda x16, x17
199+
; CHECK-NEXT: ldr x8, [x16]
200+
; CHECK-NEXT: movk x16, #34646, lsl #48
201+
; CHECK-NEXT: blraa x8, x16
202+
; CHECK-NEXT: mov w0, #42
203+
; DARWIN-NEXT: ldp x29, x30, [sp], #16
204+
; ELF-NEXT: ldr x30, [sp], #16
205+
; CHECK-NEXT: ret
206+
%vtable.signed = load ptr, ptr %objptr
207+
%objptr.int = ptrtoint ptr %objptr to i64
208+
%vtable.discr = tail call i64 @llvm.ptrauth.blend(i64 %objptr.int, i64 6503)
209+
%vtable.signed.int = ptrtoint ptr %vtable.signed to i64
210+
%vtable.int = tail call i64 @llvm.ptrauth.auth(i64 %vtable.signed.int, i32 2, i64 %vtable.discr)
211+
%vtable = inttoptr i64 %vtable.int to ptr
212+
%callee.signed = load ptr, ptr %vtable
213+
%callee.discr = tail call i64 @llvm.ptrauth.blend(i64 %vtable.int, i64 34646)
214+
%call.result = tail call i32 %callee.signed(ptr %objptr) [ "ptrauth"(i32 0, i64 %callee.discr) ]
215+
ret i32 42
216+
}
217+
191218
define i32 @test_call_ia_arg(ptr %arg0, i64 %arg1) #0 {
192219
; DARWIN-LABEL: test_call_ia_arg:
193220
; DARWIN-NEXT: stp x29, x30, [sp, #-16]!

0 commit comments

Comments
 (0)