Skip to content

Commit 042eb37

Browse files
committed
Implement register allocation for inline assembly
1 parent fd87c6d commit 042eb37

File tree

1 file changed

+207
-70
lines changed

1 file changed

+207
-70
lines changed

src/inline_asm.rs

Lines changed: 207 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::fmt::Write;
66

77
use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
88
use rustc_middle::mir::InlineAsmOperand;
9+
use rustc_span::Symbol;
910
use rustc_target::asm::*;
1011

1112
pub(crate) fn codegen_inline_asm<'tcx>(
@@ -115,11 +116,21 @@ pub(crate) fn codegen_inline_asm<'tcx>(
115116
offset
116117
};
117118

119+
let mut asm_gen = InlineAssemblyGenerator {
120+
tcx: fx.tcx,
121+
arch: InlineAsmArch::X86_64,
122+
template,
123+
operands,
124+
options,
125+
registers: Vec::new(),
126+
};
127+
asm_gen.allocate_registers();
128+
118129
// FIXME overlap input and output slots to save stack space
119-
for operand in operands {
130+
for (i, operand) in operands.iter().enumerate() {
120131
match *operand {
121132
InlineAsmOperand::In { reg, ref value } => {
122-
let reg = expect_reg(reg);
133+
let reg = asm_gen.registers[i].unwrap();
123134
clobbered_regs.push((reg, new_slot(reg.reg_class())));
124135
inputs.push((
125136
reg,
@@ -128,7 +139,7 @@ pub(crate) fn codegen_inline_asm<'tcx>(
128139
));
129140
}
130141
InlineAsmOperand::Out { reg, late: _, place } => {
131-
let reg = expect_reg(reg);
142+
let reg = asm_gen.registers[i].unwrap();
132143
clobbered_regs.push((reg, new_slot(reg.reg_class())));
133144
if let Some(place) = place {
134145
outputs.push((
@@ -139,7 +150,7 @@ pub(crate) fn codegen_inline_asm<'tcx>(
139150
}
140151
}
141152
InlineAsmOperand::InOut { reg, late: _, ref in_value, out_place } => {
142-
let reg = expect_reg(reg);
153+
let reg = asm_gen.registers[i].unwrap();
143154
clobbered_regs.push((reg, new_slot(reg.reg_class())));
144155
inputs.push((
145156
reg,
@@ -164,94 +175,220 @@ pub(crate) fn codegen_inline_asm<'tcx>(
164175
fx.inline_asm_index += 1;
165176
let asm_name = format!("{}__inline_asm_{}", fx.symbol_name, inline_asm_index);
166177

167-
let generated_asm = generate_asm_wrapper(
168-
&asm_name,
169-
InlineAsmArch::X86_64,
170-
options,
171-
template,
172-
clobbered_regs,
173-
&inputs,
174-
&outputs,
175-
);
178+
let generated_asm = asm_gen.generate_asm_wrapper(&asm_name, clobbered_regs, &inputs, &outputs);
176179
fx.cx.global_asm.push_str(&generated_asm);
177180

178181
call_inline_asm(fx, &asm_name, slot_size, inputs, outputs);
179182
}
180183

181-
fn generate_asm_wrapper(
182-
asm_name: &str,
184+
struct InlineAssemblyGenerator<'a, 'tcx> {
185+
tcx: TyCtxt<'tcx>,
183186
arch: InlineAsmArch,
187+
template: &'a [InlineAsmTemplatePiece],
188+
operands: &'a [InlineAsmOperand<'tcx>],
184189
options: InlineAsmOptions,
185-
template: &[InlineAsmTemplatePiece],
186-
clobbered_regs: Vec<(InlineAsmReg, Size)>,
187-
inputs: &[(InlineAsmReg, Size, Value)],
188-
outputs: &[(InlineAsmReg, Size, CPlace<'_>)],
189-
) -> String {
190-
let mut generated_asm = String::new();
191-
writeln!(generated_asm, ".globl {}", asm_name).unwrap();
192-
writeln!(generated_asm, ".type {},@function", asm_name).unwrap();
193-
writeln!(generated_asm, ".section .text.{},\"ax\",@progbits", asm_name).unwrap();
194-
writeln!(generated_asm, "{}:", asm_name).unwrap();
195-
196-
generated_asm.push_str(".intel_syntax noprefix\n");
197-
generated_asm.push_str(" push rbp\n");
198-
generated_asm.push_str(" mov rbp,rdi\n");
199-
200-
// Save clobbered registers
201-
if !options.contains(InlineAsmOptions::NORETURN) {
202-
// FIXME skip registers saved by the calling convention
203-
for &(reg, offset) in &clobbered_regs {
204-
save_register(&mut generated_asm, arch, reg, offset);
205-
}
206-
}
190+
registers: Vec<Option<InlineAsmReg>>,
191+
}
207192

208-
// Write input registers
209-
for &(reg, offset, _value) in inputs {
210-
restore_register(&mut generated_asm, arch, reg, offset);
211-
}
193+
impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
194+
fn allocate_registers(&mut self) {
195+
let sess = self.tcx.sess;
196+
let map = allocatable_registers(
197+
self.arch,
198+
|feature| sess.target_features.contains(&Symbol::intern(feature)),
199+
&sess.target,
200+
);
201+
let mut allocated = FxHashMap::<_, (bool, bool)>::default();
202+
let mut regs = vec![None; self.operands.len()];
203+
204+
// Add explicit registers to the allocated set.
205+
for (i, operand) in self.operands.iter().enumerate() {
206+
match *operand {
207+
InlineAsmOperand::In { reg: InlineAsmRegOrRegClass::Reg(reg), .. } => {
208+
regs[i] = Some(reg);
209+
allocated.entry(reg).or_default().0 = true;
210+
}
211+
InlineAsmOperand::Out {
212+
reg: InlineAsmRegOrRegClass::Reg(reg), late: true, ..
213+
} => {
214+
regs[i] = Some(reg);
215+
allocated.entry(reg).or_default().1 = true;
216+
}
217+
InlineAsmOperand::Out { reg: InlineAsmRegOrRegClass::Reg(reg), .. }
218+
| InlineAsmOperand::InOut { reg: InlineAsmRegOrRegClass::Reg(reg), .. } => {
219+
regs[i] = Some(reg);
220+
allocated.insert(reg, (true, true));
221+
}
222+
_ => (),
223+
}
224+
}
212225

213-
if options.contains(InlineAsmOptions::ATT_SYNTAX) {
214-
generated_asm.push_str(".att_syntax\n");
215-
}
226+
// Allocate out/inout/inlateout registers first because they are more constrained.
227+
for (i, operand) in self.operands.iter().enumerate() {
228+
match *operand {
229+
InlineAsmOperand::Out {
230+
reg: InlineAsmRegOrRegClass::RegClass(class),
231+
late: false,
232+
..
233+
}
234+
| InlineAsmOperand::InOut {
235+
reg: InlineAsmRegOrRegClass::RegClass(class), ..
236+
} => {
237+
let mut alloc_reg = None;
238+
for &reg in &map[&class] {
239+
let mut used = false;
240+
reg.overlapping_regs(|r| {
241+
if allocated.contains_key(&r) {
242+
used = true;
243+
}
244+
});
245+
246+
if !used {
247+
alloc_reg = Some(reg);
248+
break;
249+
}
250+
}
251+
252+
let reg = alloc_reg.expect("cannot allocate registers");
253+
regs[i] = Some(reg);
254+
allocated.insert(reg, (true, true));
255+
}
256+
_ => (),
257+
}
258+
}
216259

217-
// The actual inline asm
218-
for piece in template {
219-
match piece {
220-
InlineAsmTemplatePiece::String(s) => {
221-
generated_asm.push_str(s);
260+
// Allocate in/lateout.
261+
for (i, operand) in self.operands.iter().enumerate() {
262+
match *operand {
263+
InlineAsmOperand::In { reg: InlineAsmRegOrRegClass::RegClass(class), .. } => {
264+
let mut alloc_reg = None;
265+
for &reg in &map[&class] {
266+
let mut used = false;
267+
reg.overlapping_regs(|r| {
268+
if allocated.get(&r).copied().unwrap_or_default().0 {
269+
used = true;
270+
}
271+
});
272+
273+
if !used {
274+
alloc_reg = Some(reg);
275+
break;
276+
}
277+
}
278+
279+
let reg = alloc_reg.expect("cannot allocate registers");
280+
regs[i] = Some(reg);
281+
allocated.entry(reg).or_default().0 = true;
282+
}
283+
InlineAsmOperand::Out {
284+
reg: InlineAsmRegOrRegClass::RegClass(class),
285+
late: true,
286+
..
287+
} => {
288+
let mut alloc_reg = None;
289+
for &reg in &map[&class] {
290+
let mut used = false;
291+
reg.overlapping_regs(|r| {
292+
if allocated.get(&r).copied().unwrap_or_default().1 {
293+
used = true;
294+
}
295+
});
296+
297+
if !used {
298+
alloc_reg = Some(reg);
299+
break;
300+
}
301+
}
302+
303+
let reg = alloc_reg.expect("cannot allocate registers");
304+
regs[i] = Some(reg);
305+
allocated.entry(reg).or_default().1 = true;
306+
}
307+
_ => (),
222308
}
223-
InlineAsmTemplatePiece::Placeholder { operand_idx: _, modifier: _, span: _ } => todo!(),
224309
}
310+
311+
self.registers = regs;
225312
}
226-
generated_asm.push('\n');
227313

228-
if options.contains(InlineAsmOptions::ATT_SYNTAX) {
314+
fn generate_asm_wrapper(
315+
&self,
316+
asm_name: &str,
317+
clobbered_regs: Vec<(InlineAsmReg, Size)>,
318+
inputs: &[(InlineAsmReg, Size, Value)],
319+
outputs: &[(InlineAsmReg, Size, CPlace<'_>)],
320+
) -> String {
321+
let mut generated_asm = String::new();
322+
writeln!(generated_asm, ".globl {}", asm_name).unwrap();
323+
writeln!(generated_asm, ".type {},@function", asm_name).unwrap();
324+
writeln!(generated_asm, ".section .text.{},\"ax\",@progbits", asm_name).unwrap();
325+
writeln!(generated_asm, "{}:", asm_name).unwrap();
326+
229327
generated_asm.push_str(".intel_syntax noprefix\n");
230-
}
328+
generated_asm.push_str(" push rbp\n");
329+
generated_asm.push_str(" mov rbp,rdi\n");
330+
331+
// Save clobbered registers
332+
if !self.options.contains(InlineAsmOptions::NORETURN) {
333+
// FIXME skip registers saved by the calling convention
334+
for &(reg, offset) in &clobbered_regs {
335+
save_register(&mut generated_asm, self.arch, reg, offset);
336+
}
337+
}
231338

232-
if !options.contains(InlineAsmOptions::NORETURN) {
233-
// Read output registers
234-
for &(reg, offset, _place) in outputs {
235-
save_register(&mut generated_asm, arch, reg, offset);
339+
// Write input registers
340+
for &(reg, offset, _value) in inputs {
341+
restore_register(&mut generated_asm, self.arch, reg, offset);
236342
}
237343

238-
// Restore clobbered registers
239-
for &(reg, offset) in clobbered_regs.iter().rev() {
240-
restore_register(&mut generated_asm, arch, reg, offset);
344+
if self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
345+
generated_asm.push_str(".att_syntax\n");
241346
}
242347

243-
generated_asm.push_str(" pop rbp\n");
244-
generated_asm.push_str(" ret\n");
245-
} else {
246-
generated_asm.push_str(" ud2\n");
247-
}
348+
// The actual inline asm
349+
for piece in self.template {
350+
match piece {
351+
InlineAsmTemplatePiece::String(s) => {
352+
generated_asm.push_str(s);
353+
}
354+
InlineAsmTemplatePiece::Placeholder { operand_idx, modifier, span: _ } => {
355+
self.registers[*operand_idx]
356+
.unwrap()
357+
.emit(&mut generated_asm, self.arch, *modifier)
358+
.unwrap();
359+
}
360+
}
361+
}
362+
generated_asm.push('\n');
363+
364+
if self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
365+
generated_asm.push_str(".intel_syntax noprefix\n");
366+
}
367+
368+
if !self.options.contains(InlineAsmOptions::NORETURN) {
369+
// Read output registers
370+
for &(reg, offset, _place) in outputs {
371+
save_register(&mut generated_asm, self.arch, reg, offset);
372+
}
248373

249-
generated_asm.push_str(".att_syntax\n");
250-
writeln!(generated_asm, ".size {name}, .-{name}", name = asm_name).unwrap();
251-
generated_asm.push_str(".text\n");
252-
generated_asm.push_str("\n\n");
374+
// Restore clobbered registers
375+
for &(reg, offset) in clobbered_regs.iter().rev() {
376+
restore_register(&mut generated_asm, self.arch, reg, offset);
377+
}
378+
379+
generated_asm.push_str(" pop rbp\n");
380+
generated_asm.push_str(" ret\n");
381+
} else {
382+
generated_asm.push_str(" ud2\n");
383+
}
253384

254-
generated_asm
385+
generated_asm.push_str(".att_syntax\n");
386+
writeln!(generated_asm, ".size {name}, .-{name}", name = asm_name).unwrap();
387+
generated_asm.push_str(".text\n");
388+
generated_asm.push_str("\n\n");
389+
390+
generated_asm
391+
}
255392
}
256393

257394
fn call_inline_asm<'tcx>(

0 commit comments

Comments
 (0)