Skip to content

Commit dc02d76

Browse files
plietarMabezDev
authored andcommitted
Add a custom lowering of vaarg for xtensa.
LLVM does not include an implementation of the va_arg instruction for Xtensa. From what I understand, this is a conscious decision and instead language frontends are encouraged to implement it themselves. The rationale seems to be that loading values correctly requires language and ABI-specific knowledge that LLVM lacks. This is true of most architectures, and rustc already provides implementation for a number of them. This commit extends the support to include Xtensa. See https://lists.llvm.org/pipermail/llvm-dev/2017-August/116337.html for some discussion on the topic. Unfortunately there does not seem to be a reference document for the semantics of the va_list and va_arg on Xtensa. The most reliable source is the GCC implementation, which this commit tries to follow. Clang also provides its own compatible implementation. This was tested for all the types that rustc allows in variadics.
1 parent 8ca7d9d commit dc02d76

File tree

1 file changed

+92
-2
lines changed

1 file changed

+92
-2
lines changed

compiler/rustc_codegen_llvm/src/va_arg.rs

+92-2
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,23 @@ use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
1111
use rustc_middle::ty::Ty;
1212
use rustc_target::abi::{Align, Endian, HasDataLayout, Size};
1313

14+
fn round_up_to_alignment<'ll>(
15+
bx: &mut Builder<'_, 'll, '_>,
16+
mut value: &'ll Value,
17+
align: Align,
18+
) -> &'ll Value {
19+
value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
20+
return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
21+
}
22+
1423
fn round_pointer_up_to_alignment<'ll>(
1524
bx: &mut Builder<'_, 'll, '_>,
1625
addr: &'ll Value,
1726
align: Align,
1827
ptr_ty: &'ll Type,
1928
) -> &'ll Value {
2029
let mut ptr_as_int = bx.ptrtoint(addr, bx.cx().type_isize());
21-
ptr_as_int = bx.add(ptr_as_int, bx.cx().const_i32(align.bytes() as i32 - 1));
22-
ptr_as_int = bx.and(ptr_as_int, bx.cx().const_i32(-(align.bytes() as i32)));
30+
ptr_as_int = round_up_to_alignment(bx, ptr_as_int, align);
2331
bx.inttoptr(ptr_as_int, ptr_ty)
2432
}
2533

@@ -271,6 +279,87 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
271279
bx.load(val_type, val_addr, layout.align.abi)
272280
}
273281

282+
fn emit_xtensa_va_arg<'ll, 'tcx>(
283+
bx: &mut Builder<'_, 'll, 'tcx>,
284+
list: OperandRef<'tcx, &'ll Value>,
285+
target_ty: Ty<'tcx>,
286+
) -> &'ll Value {
287+
// Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
288+
// this, other than "what GCC does".
289+
//
290+
// The va_list type has three fields:
291+
// struct __va_list_tag {
292+
// int32_t *va_stk; // Arguments passed on the stack
293+
// int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
294+
// int32_t va_ndx; // Offset into the arguments, in bytes
295+
// };
296+
//
297+
// Whether an argument is loaded from va_stk or va_reg depends on the value of va_ndx.
298+
// The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
299+
//
300+
// Arguments are never split between registers and the stack. For example, if loading an 8 byte
301+
// value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
302+
let va_list_addr = list.immediate();
303+
let layout = bx.cx.layout_of(target_ty);
304+
let from_stack = bx.append_sibling_block("va_arg.from_stack");
305+
let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
306+
let end = bx.append_sibling_block("va_arg.end");
307+
308+
let i32_offset = 4;
309+
let offset_ptr =
310+
bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(i32_offset * 2)]);
311+
let mut offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align.abi);
312+
313+
let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
314+
if layout.align.abi.bytes() > 4 {
315+
offset = round_up_to_alignment(bx, offset, layout.align.abi);
316+
}
317+
318+
// Update the offset in va_list, by adding the slot's size.
319+
let offset_next = bx.add(offset, bx.const_i32(slot_size));
320+
bx.store(offset_next, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
321+
322+
// Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
323+
// If that is within the regsave area, then load from there. Otherwise load from the stack area.
324+
let regsave_size = bx.const_i32(24);
325+
let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
326+
bx.cond_br(use_regsave, from_regsave, from_stack);
327+
328+
bx.switch_to_block(from_regsave);
329+
let regsave_area_ptr =
330+
bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(i32_offset)]);
331+
let regsave_area =
332+
bx.load(bx.type_ptr(), regsave_area_ptr, bx.tcx().data_layout.pointer_align.abi);
333+
let regsave_value_ptr = bx.inbounds_gep(bx.type_i8(), regsave_area, &[offset]);
334+
bx.br(end);
335+
336+
bx.switch_to_block(from_stack);
337+
338+
// The first time we switch from regsave to stack we needs to adjust our offsets a bit.
339+
// va_stk is set up such that the first stack argument is always at va_stk + 32.
340+
// The corrected offset is written back into the va_list struct.
341+
let needs_correction = bx.icmp(IntPredicate::IntULE, offset, regsave_size);
342+
let offset_corrected = bx.select(needs_correction, bx.const_i32(32), offset);
343+
let offset_next_corrected =
344+
bx.select(needs_correction, bx.const_i32(32 + slot_size), offset_next);
345+
bx.store(offset_next_corrected, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
346+
347+
let stack_area_ptr = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(0)]);
348+
let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, bx.tcx().data_layout.pointer_align.abi);
349+
let stack_value_ptr = bx.inbounds_gep(bx.type_i8(), stack_area, &[offset_corrected]);
350+
bx.br(end);
351+
352+
bx.switch_to_block(end);
353+
354+
// On big-endian, for values smaller than the slot size we'd have to align the read to the end
355+
// of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
356+
// targets supported by rustc are litte-endian so don't worry about it.
357+
assert!(bx.tcx().sess.target.endian == Endian::Little);
358+
let value_ptr =
359+
bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
360+
return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
361+
}
362+
274363
pub(super) fn emit_va_arg<'ll, 'tcx>(
275364
bx: &mut Builder<'_, 'll, 'tcx>,
276365
addr: OperandRef<'tcx, &'ll Value>,
@@ -303,6 +392,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
303392
let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
304393
emit_ptr_va_arg(bx, addr, target_ty, indirect, Align::from_bytes(8).unwrap(), false)
305394
}
395+
"xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
306396
// For all other architecture/OS combinations fall back to using
307397
// the LLVM va_arg instruction.
308398
// https://llvm.org/docs/LangRef.html#va-arg-instruction

0 commit comments

Comments
 (0)