llvm · ahmedbougacha · Jul 23, 2024 · Sep 27, 2021 · Jul 19, 2024 · Jul 22, 2024
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -104,6 +104,8 @@ class AArch64AsmPrinter : public AsmPrinter {
 
   void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
 
+  void LowerHardenedBRJumpTable(const MachineInstr &MI);
+
   void LowerMOPS(MCStreamer &OutStreamer, const MachineInstr &MI);
 
   void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
@@ -1310,6 +1312,139 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
                                   .addImm(Size == 4 ? 0 : 2));
 }
 
+void AArch64AsmPrinter::LowerHardenedBRJumpTable(const MachineInstr &MI) {
+  unsigned InstsEmitted = 0;
+
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  assert(MJTI && "Can't lower jump-table dispatch without JTI");
+
+  const std::vector<MachineJumpTableEntry> &JTs = MJTI->getJumpTables();
+  assert(!JTs.empty() && "Invalid JT index for jump-table dispatch");
+
+  // Emit:
+  //     mov x17, #<size of table>     ; depending on table size, with MOVKs
+  //     cmp x16, x17                  ; or #imm if table size fits in 12-bit
+  //     csel x16, x16, xzr, ls        ; check for index overflow
+  //
+  //     adrp x17, Ltable@PAGE         ; materialize table address
+  //     add x17, Ltable@PAGEOFF
+  //     ldrsw x16, [x17, x16, lsl #2] ; load table entry
+  //
+  //   Lanchor:
+  //     adr x17, Lanchor              ; compute target address
+  //     add x16, x17, x16
+  //     br x16                        ; branch to target
+
+  MachineOperand JTOp = MI.getOperand(0);
+
+  unsigned JTI = JTOp.getIndex();
+  assert(!AArch64FI->getJumpTableEntryPCRelSymbol(JTI) &&
+         "unsupported compressed jump table");
+
+  const uint64_t NumTableEntries = JTs[JTI].MBBs.size();
+
+  // cmp only supports a 12-bit immediate.  If we need more, materialize the
+  // immediate, using x17 as a scratch register.
+  uint64_t MaxTableEntry = NumTableEntries - 1;
+  if (isUInt<12>(MaxTableEntry)) {
+    EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXri)
+                                     .addReg(AArch64::XZR)
+                                     .addReg(AArch64::X16)
+                                     .addImm(MaxTableEntry)
+                                     .addImm(0));
+    ++InstsEmitted;
+  } else {
+    EmitToStreamer(*OutStreamer,
+                   MCInstBuilder(AArch64::MOVZXi)
+                       .addReg(AArch64::X17)
+                       .addImm(static_cast<uint16_t>(MaxTableEntry))
+                       .addImm(0));
+    ++InstsEmitted;
+    // It's sad that we have to manually materialize instructions, but we can't
+    // trivially reuse the main pseudo expansion logic.
+    // A MOVK sequence is easy enough to generate and handles the general case.
+    for (int Offset = 16; Offset < 64; Offset += 16) {
+      if ((MaxTableEntry >> Offset) == 0)
+        break;
+      EmitToStreamer(*OutStreamer,
+                     MCInstBuilder(AArch64::MOVKXi)
+                         .addReg(AArch64::X17)
+                         .addReg(AArch64::X17)
+                         .addImm(static_cast<uint16_t>(MaxTableEntry >> Offset))
+                         .addImm(Offset));
+      ++InstsEmitted;
+    }
+    EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXrs)
+                                     .addReg(AArch64::XZR)
+                                     .addReg(AArch64::X16)
+                                     .addReg(AArch64::X17)
+                                     .addImm(0));
+    ++InstsEmitted;
+  }
+
+  // This picks entry #0 on failure.
+  // We might want to trap instead.
+  EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::CSELXr)
+                                   .addReg(AArch64::X16)
+                                   .addReg(AArch64::X16)
+                                   .addReg(AArch64::XZR)
+                                   .addImm(AArch64CC::LS));
+  ++InstsEmitted;
+
+  // Prepare the @PAGE/@PAGEOFF low/high operands.
+  MachineOperand JTMOHi(JTOp), JTMOLo(JTOp);
+  MCOperand JTMCHi, JTMCLo;
+
+  JTMOHi.setTargetFlags(AArch64II::MO_PAGE);
+  JTMOLo.setTargetFlags(AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+
+  MCInstLowering.lowerOperand(JTMOHi, JTMCHi);
+  MCInstLowering.lowerOperand(JTMOLo, JTMCLo);
+
+  EmitToStreamer(
+      *OutStreamer,
+      MCInstBuilder(AArch64::ADRP).addReg(AArch64::X17).addOperand(JTMCHi));
+  ++InstsEmitted;
+
+  EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXri)
+                                   .addReg(AArch64::X17)
+                                   .addReg(AArch64::X17)
+                                   .addOperand(JTMCLo)
+                                   .addImm(0));
+  ++InstsEmitted;
+
+  EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
+                                   .addReg(AArch64::X16)
+                                   .addReg(AArch64::X17)
+                                   .addReg(AArch64::X16)
+                                   .addImm(0)
+                                   .addImm(1));
+  ++InstsEmitted;
+
+  MCSymbol *AdrLabel = MF->getContext().createTempSymbol();
+  const auto *AdrLabelE = MCSymbolRefExpr::create(AdrLabel, MF->getContext());
+  AArch64FI->setJumpTableEntryInfo(JTI, 4, AdrLabel);
+
+  OutStreamer->emitLabel(AdrLabel);
+  EmitToStreamer(
+      *OutStreamer,
+      MCInstBuilder(AArch64::ADR).addReg(AArch64::X17).addExpr(AdrLabelE));
+  ++InstsEmitted;
+
+  EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+                                   .addReg(AArch64::X16)
+                                   .addReg(AArch64::X17)
+                                   .addReg(AArch64::X16)
+                                   .addImm(0));
+  ++InstsEmitted;
+
+  EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::BR).addReg(AArch64::X16));
+  ++InstsEmitted;
+
+  (void)InstsEmitted;
+  assert(STI->getInstrInfo()->getInstSizeInBytes(MI) >= InstsEmitted * 4);
+}
+
 void AArch64AsmPrinter::LowerMOPS(llvm::MCStreamer &OutStreamer,
                                   const llvm::MachineInstr &MI) {
   unsigned Opcode = MI.getOpcode();
@@ -2177,6 +2312,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
     LowerJumpTableDest(*OutStreamer, *MI);
     return;
 
+  case AArch64::BR_JumpTable:
+    LowerHardenedBRJumpTable(*MI);
+    return;
+
   case AArch64::FMOVH0:
   case AArch64::FMOVS0:
   case AArch64::FMOVD0:

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10678,6 +10678,30 @@ SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
   auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
   AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
 
+  // With aarch64-jump-table-hardening, we only expand the jump table dispatch
+  // sequence later, to guarantee the integrity of the intermediate values.
+  if (DAG.getMachineFunction().getFunction().hasFnAttribute(
+          "aarch64-jump-table-hardening")) {
+    CodeModel::Model CM = getTargetMachine().getCodeModel();
+    if (Subtarget->isTargetMachO()) {
+      if (CM != CodeModel::Small && CM != CodeModel::Large)
+        report_fatal_error("Unsupported code-model for hardened jump-table");
+    } else {
+      // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
+      assert(Subtarget->isTargetELF() &&
+             "jump table hardening only supported on MachO/ELF");
+      if (CM != CodeModel::Small)
+        report_fatal_error("Unsupported code-model for hardened jump-table");
+    }
+
+    SDValue X16Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X16,
+                                       Entry, SDValue());
+    SDNode *B = DAG.getMachineNode(AArch64::BR_JumpTable, DL, MVT::Other,
+                                   DAG.getTargetJumpTable(JTI, MVT::i32),
+                                   X16Copy.getValue(0), X16Copy.getValue(1));
+    return SDValue(B, 0);
+  }
+
   SDNode *Dest =
       DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
                          Entry, DAG.getTargetJumpTable(JTI, MVT::i32));

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1143,6 +1143,33 @@ def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
                      Sched<[]>;
 }
 
+// A hardened but more expensive version of jump-table dispatch.
+// This combines the target address computation (otherwise done using the
+// JumpTableDest pseudos above) with the branch itself (otherwise done using
+// a plain BR) in a single non-attackable sequence.
+//
+// We take the final entry index as an operand to allow isel freedom. This does
+// mean that the index can be attacker-controlled.  To address that, we also do
+// limited checking of the offset, mainly ensuring it still points within the
+// jump-table array.  When it doesn't, this branches to the first entry.
+// We might want to trap instead.
+//
+// This is intended for use in conjunction with ptrauth for other code pointers,
+// to avoid signing jump-table entries and turning them into pointers.
+//
+// Entry index is passed in x16.  Clobbers x16/x17/nzcv.
+let isNotDuplicable = 1 in
+def BR_JumpTable : Pseudo<(outs), (ins i32imm:$jti), []>, Sched<[]> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let isIndirectBranch = 1;
+  let isBarrier = 1;
+  let isNotDuplicable = 1;
+  let Defs = [X16,X17,NZCV];
+  let Uses = [X16];
+  let Size = 44; // 28 fixed + 16 variable, for table size materialization
+}
+
 // Space-consuming pseudo to aid testing of placement and reachability
 // algorithms. Immediate operand is the number of bytes this "instruction"
 // occupies; register operands can be used to enforce dependency and constrain

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -3597,10 +3597,33 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
   unsigned JTI = I.getOperand(1).getIndex();
   Register Index = I.getOperand(2).getReg();
 
+  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
+
+  // With aarch64-jump-table-hardening, we only expand the jump table dispatch
+  // sequence later, to guarantee the integrity of the intermediate values.
+  if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
+    CodeModel::Model CM = TM.getCodeModel();
+    if (STI.isTargetMachO()) {
+      if (CM != CodeModel::Small && CM != CodeModel::Large)
+        report_fatal_error("Unsupported code-model for hardened jump-table");
+    } else {
+      // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
+      assert(STI.isTargetELF() &&
+             "jump table hardening only supported on MachO/ELF");
+      if (CM != CodeModel::Small)
+        report_fatal_error("Unsupported code-model for hardened jump-table");
+    }
+
+    MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
+    MIB.buildInstr(AArch64::BR_JumpTable)
+        .addJumpTableIndex(I.getOperand(1).getIndex());
+    I.eraseFromParent();
+    return true;
+  }
+
   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
 
-  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
   auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
                                       {TargetReg, ScratchReg}, {JTAddr, Index})
                            .addJumpTableIndex(JTI);

diff --git a/llvm/test/CodeGen/AArch64/hardened-br-jump-table.ll b/llvm/test/CodeGen/AArch64/hardened-br-jump-table.ll
@@ -0,0 +1,133 @@
+; RUN: rm -rf %t && split-file %s %t
+
+;--- err1.ll
+
+; RUN: not --crash llc %t/err1.ll -mtriple=aarch64-elf \
+; RUN:   -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
+; RUN:   -code-model=large \
+; RUN:   -o - -verify-machineinstrs 2>&1 | FileCheck %s --check-prefix=ERR1
+
+; RUN: not --crash llc %t/err1.ll -mtriple=aarch64-elf \
+; RUN:   -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
+; RUN:   -global-isel -global-isel-abort=1 \
+; RUN:   -code-model=large \
+; RUN:   -o - -verify-machineinstrs 2>&1 | FileCheck %s --check-prefix=ERR1
+
+; ERR1: LLVM ERROR: Unsupported code-model for hardened jump-table
+define i32 @test_jumptable(i32 %in) "aarch64-jump-table-hardening" {
+
+  switch i32 %in, label %def [
+    i32 0, label %lbl1
+    i32 1, label %lbl2
+  ]
+
+def:
+  ret i32 0
+
+lbl1:
+  ret i32 1
+
+lbl2:
+  ret i32 2
+}
+
+;--- test.ll
+
+; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \
+; RUN:   -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
+; RUN:   -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO
+
+; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \
+; RUN:   -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
+; RUN:   -global-isel -global-isel-abort=1 \
+; RUN:   -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO
+
+; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \
+; RUN:   -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
+; RUN:   -code-model=large \
+; RUN:   -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO
+
+; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \
+; RUN:   -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
+; RUN:   -global-isel -global-isel-abort=1 \
+; RUN:   -code-model=large \
+; RUN:   -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO
+
+; RUN: llc %t/test.ll -mtriple=aarch64-elf \
+; RUN:   -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
+; RUN:   -o - -verify-machineinstrs | FileCheck %s --check-prefix=ELF
+
+; RUN: llc %t/test.ll -mtriple=aarch64-elf \
+; RUN:   -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
+; RUN:   -global-isel -global-isel-abort=1 \
+; RUN:   -o - -verify-machineinstrs | FileCheck %s --check-prefix=ELF
+
+; MACHO-LABEL: test_jumptable:
+; MACHO:        mov   w16, w0
+; MACHO:        cmp   x16, #5
+; MACHO:        csel  x16, x16, xzr, ls
+; MACHO-NEXT:   adrp  x17, LJTI0_0@PAGE
+; MACHO-NEXT:   add   x17, x17, LJTI0_0@PAGEOFF
+; MACHO-NEXT:   ldrsw x16, [x17, x16, lsl #2]
+; MACHO-NEXT:  Ltmp0:
+; MACHO-NEXT:   adr   x17, Ltmp0
+; MACHO-NEXT:   add   x16, x17, x16
+; MACHO-NEXT:   br    x16
+
+; ELF-LABEL: test_jumptable:
+; ELF:        mov   w16, w0
+; ELF:        cmp   x16, #5
+; ELF:        csel  x16, x16, xzr, ls
+; ELF-NEXT:   adrp  x17, .LJTI0_0
+; ELF-NEXT:   add   x17, x17, :lo12:.LJTI0_0
+; ELF-NEXT:   ldrsw x16, [x17, x16, lsl #2]
+; ELF-NEXT:  .Ltmp0:
+; ELF-NEXT:   adr   x17, .Ltmp0
+; ELF-NEXT:   add   x16, x17, x16
+; ELF-NEXT:   br    x16
+
+define i32 @test_jumptable(i32 %in) "aarch64-jump-table-hardening" {
+
+  switch i32 %in, label %def [
+    i32 0, label %lbl1
+    i32 1, label %lbl2
+    i32 2, label %lbl3
+    i32 4, label %lbl4
+    i32 5, label %lbl5
+  ]
+
+def:
+  ret i32 0
+
+lbl1:
+  ret i32 1
+
+lbl2:
+  ret i32 2
+
+lbl3:
+  ret i32 4
+
+lbl4:
+  ret i32 8
+
+lbl5:
+  ret i32 10
+
+}
+
+; MACHO-LABEL: LJTI0_0:
+; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
+; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
+; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
+; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
+; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
+; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
+
+; ELF-LABEL: .LJTI0_0:
+; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0
+; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0
+; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0
+; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0
+; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0
+; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0