-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[BOLT] Gadget scanner: detect address materialization and arithmetic #132540
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BOLT] Gadget scanner: detect address materialization and arithmetic #132540
Conversation
8f1221b
to
b79f18b
Compare
@llvm/pr-subscribers-bolt Author: Anatoly Trosinenko (atrosinenko) ChangesIn addition to authenticated pointers, consider the contents of a
Full diff: https://github.com/llvm/llvm-project/pull/132540.diff 6 Files Affected:
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index b3d54ccd5955d..50137a9137951 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,6 +587,22 @@ class MCPlusBuilder {
return getNoRegister();
}
+ virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return getNoRegister();
+ }
+
+ /// Analyzes if this instruction can safely perform address arithmetics.
+ ///
+ /// If the first element of the returned pair is no-register, this instruction
+ /// is considered unknown. Otherwise, (output, input) pair is returned,
+ /// so that output is as trusted as input is.
+ virtual std::pair<MCPhysReg, MCPhysReg>
+ analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return std::make_pair(getNoRegister(), getNoRegister());
+ }
+
virtual bool isTerminator(const MCInst &Inst) const;
virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 08b55bb55d0dc..10545347a6711 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -325,18 +325,7 @@ class PacRetAnalysis
});
}
- State computeNext(const MCInst &Point, const State &Cur) {
- PacStatePrinter P(BC);
- LLVM_DEBUG({
- dbgs() << " PacRetAnalysis::ComputeNext(";
- BC.InstPrinter->printInst(&const_cast<MCInst &>(Point), 0, "", *BC.STI,
- dbgs());
- dbgs() << ", ";
- P.print(dbgs(), Cur);
- dbgs() << ")\n";
- });
-
- State Next = Cur;
+ BitVector getClobberedRegs(const MCInst &Point) const {
BitVector Clobbered(NumRegs, false);
// Assume a call can clobber all registers, including callee-saved
// registers. There's a good chance that callee-saved registers will be
@@ -349,6 +338,62 @@ class PacRetAnalysis
Clobbered.set();
else
BC.MIB->getClobberedRegs(Point, Clobbered);
+ return Clobbered;
+ }
+
+ // Returns all registers that can be treated as if they are written by an
+ // authentication instruction.
+ SmallVector<MCPhysReg> getAuthenticatedRegs(const MCInst &Point,
+ const State &Cur) const {
+ SmallVector<MCPhysReg> Regs;
+ const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+ // A signed pointer can be authenticated, or
+ ErrorOr<MCPhysReg> AutReg = BC.MIB->getAuthenticatedReg(Point);
+ if (AutReg && *AutReg != NoReg)
+ Regs.push_back(*AutReg);
+
+ // ... a safe address can be materialized, or
+ MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+ if (NewAddrReg != NoReg)
+ Regs.push_back(NewAddrReg);
+
+ // ... address can be updated in a safe manner, producing the result
+ // which is as trusted as the input address.
+ MCPhysReg ArithResult, ArithSrc;
+ std::tie(ArithResult, ArithSrc) =
+ BC.MIB->analyzeSafeAddressArithmetics(Point);
+ if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
+ Regs.push_back(ArithResult);
+
+ return Regs;
+ }
+
+ State computeNext(const MCInst &Point, const State &Cur) {
+ PacStatePrinter P(BC);
+ LLVM_DEBUG({
+ dbgs() << " PacRetAnalysis::ComputeNext(";
+ BC.InstPrinter->printInst(&const_cast<MCInst &>(Point), 0, "", *BC.STI,
+ dbgs());
+ dbgs() << ", ";
+ P.print(dbgs(), Cur);
+ dbgs() << ")\n";
+ });
+
+ // First, compute various properties of the instruction, taking the state
+ // before its execution into account, if necessary.
+
+ BitVector Clobbered = getClobberedRegs(Point);
+ // Compute the set of registers that can be considered as written by
+ // an authentication instruction. This includes operations that are
+ // *strictly better* than authentication, such as materializing a
+ // PC-relative constant.
+ SmallVector<MCPhysReg> AuthenticatedOrBetter =
+ getAuthenticatedRegs(Point, Cur);
+
+ // Then, compute the state after this instruction is executed.
+ State Next = Cur;
+
Next.SafeToDerefRegs.reset(Clobbered);
// Keep track of this instruction if it writes to any of the registers we
// need to track that for:
@@ -356,17 +401,18 @@ class PacRetAnalysis
if (Clobbered[Reg])
lastWritingInsts(Next, Reg) = {&Point};
- ErrorOr<MCPhysReg> AutReg = BC.MIB->getAuthenticatedReg(Point);
- if (AutReg && *AutReg != BC.MIB->getNoRegister()) {
- // The sub-registers of *AutReg are also trusted now, but not its
- // super-registers (as they retain untrusted register units).
- BitVector AuthenticatedSubregs =
- BC.MIB->getAliases(*AutReg, /*OnlySmaller=*/true);
- for (MCPhysReg Reg : AuthenticatedSubregs.set_bits()) {
- Next.SafeToDerefRegs.set(Reg);
- if (RegsToTrackInstsFor.isTracked(Reg))
- lastWritingInsts(Next, Reg).clear();
- }
+ // After accounting for clobbered registers in general, override the state
+ // according to authentication and other *special cases* of clobbering.
+
+ // The sub-registers of each authenticated register are also trusted now,
+ // but not their super-registers (as they retain untrusted register units).
+ BitVector AuthenticatedSubregs(NumRegs);
+ for (MCPhysReg AutReg : AuthenticatedOrBetter)
+ AuthenticatedSubregs |= BC.MIB->getAliases(AutReg, /*OnlySmaller=*/true);
+ for (MCPhysReg Reg : AuthenticatedSubregs.set_bits()) {
+ Next.SafeToDerefRegs.set(Reg);
+ if (RegsToTrackInstsFor.isTracked(Reg))
+ lastWritingInsts(Next, Reg).clear();
}
LLVM_DEBUG({
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 9ce1514639f95..d5e92fb47a39b 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -319,6 +319,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
}
+ MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const override {
+ switch (Inst.getOpcode()) {
+ case AArch64::ADR:
+ case AArch64::ADRP:
+ return Inst.getOperand(0).getReg();
+ default:
+ return getNoRegister();
+ }
+ }
+
+ std::pair<MCPhysReg, MCPhysReg>
+ analyzeSafeAddressArithmetics(const MCInst &Inst) const override {
+ switch (Inst.getOpcode()) {
+ default:
+ return std::make_pair(getNoRegister(), getNoRegister());
+ case AArch64::ADDXri:
+ case AArch64::SUBXri:
+ return std::make_pair(Inst.getOperand(0).getReg(),
+ Inst.getOperand(1).getReg());
+ case AArch64::ORRXrs:
+ // "mov Xd, Xm" is equivalent to "orr Xd, XZR, Xm, lsl #0"
+ if (Inst.getOperand(1).getReg() != AArch64::XZR ||
+ Inst.getOperand(3).getImm() != 0)
+ return std::make_pair(getNoRegister(), getNoRegister());
+
+ return std::make_pair(Inst.getOperand(0).getReg(),
+ Inst.getOperand(2).getReg());
+ }
+ }
+
bool isADRP(const MCInst &Inst) const override {
return Inst.getOpcode() == AArch64::ADRP;
}
diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
index 586da6d2a92e4..d835f218b3d25 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
@@ -141,24 +141,9 @@ f_nonx30_ret_ok:
stp x29, x30, [sp, #-16]!
mov x29, sp
bl g
- add x0, x0, #3
ldp x29, x30, [sp], #16
- // FIXME: Should the scanner understand that an authenticated register (below x30,
- // after the autiasp instruction), is OK to be moved to another register
- // and then that register being used to return?
- // This respects that pac-ret hardening intent, but the scanner currently
- // will produce a false positive for this.
- // Is it worthwhile to make the scanner more complex for this case?
- // So far, scanning many millions of instructions across a linux distro,
- // I haven't encountered such an example.
- // The ".if 0" block below tests this case and currently fails.
-.if 0
autiasp
mov x16, x30
-.else
- mov x16, x30
- autia x16, sp
-.endif
// CHECK-NOT: function f_nonx30_ret_ok
ret x16
.size f_nonx30_ret_ok, .-f_nonx30_ret_ok
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
new file mode 100644
index 0000000000000..b7559f15d6bb9
--- /dev/null
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
@@ -0,0 +1,228 @@
+// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pacret %t.exe 2>&1 | FileCheck %s
+
+// Test various patterns that should or should not be considered safe
+// materialization of PC-relative addresses.
+//
+// Note that while "instructions that write to the affected registers"
+// section of the report is still technically correct, it does not necessarily
+// mentions the instructions that are used incorrectly.
+//
+// FIXME: Switch to PAC* instructions instead of indirect tail call for testing
+// if a register is considered safe when detection of signing oracles is
+// implemented, as it is more traditional usage of PC-relative constants.
+// Moreover, using PAC instructions would improve test robustness, as
+// handling of *calls* can be influenced by what BOLT classifies as a
+// tail call, for example.
+
+ .text
+
+// Define a function that is reachable by ADR instruction.
+ .type sym,@function
+sym:
+ ret
+ .size sym, .-sym
+
+ .globl good_adr
+ .type good_adr,@function
+good_adr:
+// CHECK-NOT: good_adr
+ adr x0, sym
+ br x0
+ .size good_adr, .-good_adr
+
+ .globl good_adrp
+ .type good_adrp,@function
+good_adrp:
+// CHECK-NOT: good_adrp
+ adrp x0, sym
+ br x0
+ .size good_adrp, .-good_adrp
+
+ .globl good_adrp_add
+ .type good_adrp_add,@function
+good_adrp_add:
+// CHECK-NOT: good_adrp_add
+ adrp x0, sym
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adrp_add, .-good_adrp_add
+
+ .globl good_adrp_add_with_const_offset
+ .type good_adrp_add_with_const_offset,@function
+good_adrp_add_with_const_offset:
+// CHECK-NOT: good_adrp_add_with_const_offset
+ adrp x0, sym
+ add x0, x0, :lo12:sym
+ add x0, x0, #8
+ br x0
+ .size good_adrp_add_with_const_offset, .-good_adrp_add_with_const_offset
+
+ .globl bad_adrp_with_nonconst_offset
+ .type bad_adrp_with_nonconst_offset,@function
+bad_adrp_with_nonconst_offset:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, x1
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{.*}}
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, x1
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ adrp x0, sym
+ add x0, x0, x1
+ br x0
+ .size bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset
+
+ .globl bad_split_adrp
+ .type bad_split_adrp,@function
+bad_split_adrp:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW
+ cbz x2, 1f
+ adrp x0, sym
+1:
+ add x0, x0, :lo12:sym
+ br x0
+ .size bad_split_adrp, .-bad_split_adrp
+
+// Materialization of absolute addresses is not expected.
+
+ .globl bad_immediate_constant
+ .type bad_immediate_constant,@function
+bad_immediate_constant:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_immediate_constant, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, #{{.*}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, #{{.*}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ movz x0, #1234
+ br x0
+ .size bad_immediate_constant, .-bad_immediate_constant
+
+// Any ADR or ADRP instruction followed by any number of increments/decrements
+// by constant is considered safe.
+
+ .globl good_adr_with_add
+ .type good_adr_with_add,@function
+good_adr_with_add:
+// CHECK-NOT: good_adr_with_add
+ adr x0, sym
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adr_with_add, .-good_adr_with_add
+
+ .globl good_adrp_with_add_non_consecutive
+ .type good_adrp_with_add_non_consecutive,@function
+good_adrp_with_add_non_consecutive:
+// CHECK-NOT: good_adrp_with_add_non_consecutive
+ adrp x0, sym
+ mul x1, x2, x3
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adrp_with_add_non_consecutive, .-good_adrp_with_add_non_consecutive
+
+ .globl good_many_offsets
+ .type good_many_offsets,@function
+good_many_offsets:
+// CHECK-NOT: good_many_offsets
+ adrp x0, sym
+ add x1, x0, #8
+ add x2, x1, :lo12:sym
+ br x2
+ .size good_many_offsets, .-good_many_offsets
+
+// MOV Xd, Xm (which is an alias of ORR Xd, XZR, Xm) is handled as part of
+// support for address arithmetics, but ORR in general is not.
+
+ .globl good_mov_reg
+ .type good_mov_reg,@function
+good_mov_reg:
+// CHECK-NOT: good_mov_reg
+ adrp x0, sym
+ mov x1, x0
+ orr x2, xzr, x1 // the same as "mov x2, x1"
+ br x2
+ .size good_mov_reg, .-good_mov_reg
+
+ .globl bad_orr_not_xzr
+ .type bad_orr_not_xzr,@function
+bad_orr_not_xzr:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_xzr, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, x1, x0
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x1, #0
+// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, x1, x0
+// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL
+ adrp x0, sym
+ movz x1, #0
+ orr x2, x1, x0
+ br x2
+ .size bad_orr_not_xzr, .-bad_orr_not_xzr
+
+ .globl bad_orr_not_lsl0
+ .type bad_orr_not_lsl0,@function
+bad_orr_not_lsl0:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_lsl0, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1
+// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL
+ adrp x0, sym
+ orr x2, xzr, x0, lsl #1
+ br x2
+ .size bad_orr_not_lsl0, .-bad_orr_not_lsl0
+
+// Check that the input register operands of `add`/`mov` is correct.
+
+ .globl bad_add_input_reg
+ .type bad_add_input_reg,@function
+bad_add_input_reg:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_add_input_reg, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x1, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x1, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ adrp x0, sym
+ add x0, x1, :lo12:sym
+ br x0
+ .size bad_add_input_reg, .-bad_add_input_reg
+
+ .globl bad_mov_input_reg
+ .type bad_mov_input_reg,@function
+bad_mov_input_reg:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_mov_input_reg, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, x1
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, x1
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ adrp x0, sym
+ mov x0, x1
+ br x0
+ .size bad_mov_input_reg, .-bad_mov_input_reg
+
+ .globl main
+ .type main,@function
+main:
+ mov x0, 0
+ ret
+ .size main, .-main
diff --git a/bolt/test/binary-analysis/AArch64/lit.local.cfg b/bolt/test/binary-analysis/AArch64/lit.local.cfg
index 6ac7d3cc0fec7..54e093566dea9 100644
--- a/bolt/test/binary-analysis/AArch64/lit.local.cfg
+++ b/bolt/test/binary-analysis/AArch64/lit.local.cfg
@@ -1,7 +1,8 @@
if "AArch64" not in config.root.targets:
config.unsupported = True
-flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding"
+# -Wl,--no-relax prevents converting ADRP+ADD pairs into NOP+ADR.
+flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding -Wl,--no-relax"
config.substitutions.insert(0, ("%cflags", f"%cflags {flags}"))
config.substitutions.insert(0, ("%cxxflags", f"%cxxflags {flags}"))
|
bbb379d
to
126800a
Compare
b79f18b
to
03bff7c
Compare
126800a
to
2d82b35
Compare
72e3de5
to
53f6310
Compare
bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
Outdated
Show resolved
Hide resolved
movz x0, #1234 | ||
br x0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the classification of good and bad sequences is probably a bit tricky in general. For example, the #1234
is not attacker-controlled, and in some real code we might use movz
and movk
to materialise a constant address.
We can surely update these tests as other cases come up, so I don't think this needs to change, but I wanted to acknowledge it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right, updated the comment to clarify this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess it would be straightforward to add support for movz
and movk
by adding a few lines to getMaterializedAddressRegForPtrAuth
and analyzeAddressArithmeticsForPtrAuth
?
Anyway, I agree with @jacobbramley , this can be implemented in a later patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't have a strong opinion for or against trying to allow every instruction that is expected to be safe, but it should probably be harmless to treat movz
as yet another address-materializing instruction (in addition to adr
and adrp
) and movk
as yet another case of address arithmetics.
On the other hand, when testing a prototype, I saw a few resign-with-offset instruction sequences that looked completely harmless, but they were reported as signing or authentication oracles. The reason was that the offset did not fit into the immediate operand of add Xd, Xn, imm
, so it was first placed to register and then add Xd, Xm, Xn
was emitted. There were very few such reports, but if we would like to eliminate such false-positives, it may be useful to handle "address constants" and "small constant numbers" separately.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes sense.
I seem to remember that as part of the stack-clash scanner prototype, I also implemented a dataflow analysis that keeps track of whether a particular register contains a constant.
I'm just mentioning it here in case it would come in handy to avoid such false-positive where constants are in registers, rather than as an immediate in the instruction encoding.
Of course, that would not be for this PR, but for a later improvement.
5a43ad9
to
e611f6c
Compare
e611f6c
to
eebc0ca
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kbeyls Thank you for the comments, my wording was indeed unclear.
Considering the threat model, there are two documents on Pointer Authentication in LLVM, one under llvm/docs
and another one under clang/docs
. As far as I remember, one of these documents contained a detailed description of the threat model in the original ptrauth branch, but it seems to get lost at some point.
Tagging @ahmedbougacha.
eebc0ca
to
a5d8504
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
In addition to authenticated pointers, consider the contents of a register safe if it was * written by PC-relative address computation * updated by an arithmetic instruction whose input address is safe
9ddb906
to
2bf9f89
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just a few more minor comments on the test cases
movz x0, #1234 | ||
br x0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess it would be straightforward to add support for movz
and movk
by adding a few lines to getMaterializedAddressRegForPtrAuth
and analyzeAddressArithmeticsForPtrAuth
?
Anyway, I agree with @jacobbramley , this can be implemented in a later patch.
bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, this now looks good to merge to me.
movz x0, #1234 | ||
br x0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes sense.
I seem to remember that as part of the stack-clash scanner prototype, I also implemented a dataflow analysis that keeps track of whether a particular register contains a constant.
I'm just mentioning it here in case it would come in handy to avoid such false-positive where constants are in registers, rather than as an immediate in the instruction encoding.
Of course, that would not be for this PR, but for a later improvement.
bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
Outdated
Show resolved
Hide resolved
Local branch origin/amd-gfx bc9dded Merged main:be6ccc98f382 into origin/amd-gfx:943bd63b166f Remote branch main 0fc7aec [BOLT] Gadget scanner: detect address materialization and arithmetic (llvm#132540)
In addition to authenticated pointers, consider the contents of a
register safe if it was