Skip to content

Commit ab617f8

Browse files
authored
Rollup merge of #83698 - erikdesjardins:undefconst, r=oli-obk
Use undef for uninitialized bytes in constants Fixes #83657 This generates good code when the const is fully uninit, e.g. ```rust #[no_mangle] pub const fn fully_uninit() -> MaybeUninit<[u8; 10]> { const M: MaybeUninit<[u8; 10]> = MaybeUninit::uninit(); M } ``` generates ```asm fully_uninit: ret ``` as you would expect. There is no improvement, however, when it's partially uninit, e.g. ```rust pub struct PartiallyUninit { x: u64, y: MaybeUninit<[u8; 10]> } #[no_mangle] pub const fn partially_uninit() -> PartiallyUninit { const X: PartiallyUninit = PartiallyUninit { x: 0xdeadbeefcafe, y: MaybeUninit::uninit() }; X } ``` generates ```asm partially_uninit: mov rax, rdi mov rcx, qword ptr [rip + .L__unnamed_1+16] mov qword ptr [rdi + 16], rcx movups xmm0, xmmword ptr [rip + .L__unnamed_1] movups xmmword ptr [rdi], xmm0 ret .L__unnamed_1: .asciz "\376\312\357\276\255\336\000" .zero 16 .size .L__unnamed_1, 24 ``` which copies a bunch of zeros in place of the undef bytes, the same as before this change. The constant is undef where it should be, though (see the added test) so this seems to be a missed optimization in LLVM.
2 parents 3a7479d + be66c2e commit ab617f8

File tree

7 files changed

+160
-21
lines changed

7 files changed

+160
-21
lines changed

compiler/rustc_codegen_llvm/src/consts.rs

+30-13
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,47 @@ use crate::value::Value;
88
use cstr::cstr;
99
use libc::c_uint;
1010
use rustc_codegen_ssa::traits::*;
11+
use rustc_data_structures::captures::Captures;
1112
use rustc_hir::def_id::DefId;
1213
use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
1314
use rustc_middle::mir::interpret::{
14-
read_target_uint, Allocation, ErrorHandled, GlobalAlloc, Pointer,
15+
read_target_uint, Allocation, ErrorHandled, GlobalAlloc, InitChunk, Pointer,
1516
};
1617
use rustc_middle::mir::mono::MonoItem;
1718
use rustc_middle::ty::{self, Instance, Ty};
1819
use rustc_middle::{bug, span_bug};
1920
use rustc_target::abi::{AddressSpace, Align, HasDataLayout, LayoutOf, Primitive, Scalar, Size};
21+
use std::ops::Range;
2022
use tracing::debug;
2123

2224
pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll Value {
2325
let mut llvals = Vec::with_capacity(alloc.relocations().len() + 1);
2426
let dl = cx.data_layout();
2527
let pointer_size = dl.pointer_size.bytes() as usize;
2628

29+
// Note: this function may call `inspect_with_uninit_and_ptr_outside_interpreter`,
30+
// so `range` must be within the bounds of `alloc` and not within a relocation.
31+
fn chunks_of_init_and_uninit_bytes<'ll, 'a, 'b>(
32+
cx: &'a CodegenCx<'ll, 'b>,
33+
alloc: &'a Allocation,
34+
range: Range<usize>,
35+
) -> impl Iterator<Item = &'ll Value> + Captures<'a> + Captures<'b> {
36+
alloc
37+
.init_mask()
38+
.range_as_init_chunks(Size::from_bytes(range.start), Size::from_bytes(range.end))
39+
.map(move |chunk| match chunk {
40+
InitChunk::Init(range) => {
41+
let range = (range.start.bytes() as usize)..(range.end.bytes() as usize);
42+
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(range);
43+
cx.const_bytes(bytes)
44+
}
45+
InitChunk::Uninit(range) => {
46+
let len = range.end.bytes() - range.start.bytes();
47+
cx.const_undef(cx.type_array(cx.type_i8(), len))
48+
}
49+
})
50+
}
51+
2752
let mut next_offset = 0;
2853
for &(offset, ((), alloc_id)) in alloc.relocations().iter() {
2954
let offset = offset.bytes();
@@ -32,12 +57,8 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll
3257
if offset > next_offset {
3358
// This `inspect` is okay since we have checked that it is not within a relocation, it
3459
// is within the bounds of the allocation, and it doesn't affect interpreter execution
35-
// (we inspect the result after interpreter execution). Any undef byte is replaced with
36-
// some arbitrary byte value.
37-
//
38-
// FIXME: relay undef bytes to codegen as undef const bytes
39-
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(next_offset..offset);
40-
llvals.push(cx.const_bytes(bytes));
60+
// (we inspect the result after interpreter execution).
61+
llvals.extend(chunks_of_init_and_uninit_bytes(cx, alloc, next_offset..offset));
4162
}
4263
let ptr_offset = read_target_uint(
4364
dl.endian,
@@ -65,12 +86,8 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll
6586
let range = next_offset..alloc.len();
6687
// This `inspect` is okay since we have check that it is after all relocations, it is
6788
// within the bounds of the allocation, and it doesn't affect interpreter execution (we
68-
// inspect the result after interpreter execution). Any undef byte is replaced with some
69-
// arbitrary byte value.
70-
//
71-
// FIXME: relay undef bytes to codegen as undef const bytes
72-
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(range);
73-
llvals.push(cx.const_bytes(bytes));
89+
// inspect the result after interpreter execution).
90+
llvals.extend(chunks_of_init_and_uninit_bytes(cx, alloc, range));
7491
}
7592

7693
cx.const_struct(&llvals, true)

compiler/rustc_middle/src/mir/interpret/allocation.rs

+52-5
Original file line numberDiff line numberDiff line change
@@ -761,20 +761,24 @@ impl InitMask {
761761
}
762762

763763
// FIXME(oli-obk): optimize this for allocations larger than a block.
764-
let idx = (start.bytes()..end.bytes()).map(Size::from_bytes).find(|&i| !self.get(i));
764+
let idx = (start..end).find(|&i| !self.get(i));
765765

766766
match idx {
767767
Some(idx) => {
768-
let uninit_end = (idx.bytes()..end.bytes())
769-
.map(Size::from_bytes)
770-
.find(|&i| self.get(i))
771-
.unwrap_or(end);
768+
let uninit_end = (idx..end).find(|&i| self.get(i)).unwrap_or(end);
772769
Err(idx..uninit_end)
773770
}
774771
None => Ok(()),
775772
}
776773
}
777774

775+
/// Returns an iterator, yielding a range of byte indexes for each contiguous region
776+
/// of initialized or uninitialized bytes inside the range `start..end` (end-exclusive).
777+
#[inline]
778+
pub fn range_as_init_chunks(&self, start: Size, end: Size) -> InitChunkIter<'_> {
779+
InitChunkIter::new(self, start, end)
780+
}
781+
778782
pub fn set_range(&mut self, start: Size, end: Size, new_state: bool) {
779783
let len = self.len;
780784
if end > len {
@@ -867,6 +871,49 @@ impl InitMask {
867871
}
868872
}
869873

874+
/// Yields [`InitChunk`]s. See [`InitMask::range_as_init_chunks`].
875+
pub struct InitChunkIter<'a> {
876+
init_mask: &'a InitMask,
877+
/// The current byte index into `init_mask`.
878+
start: Size,
879+
/// The end byte index into `init_mask`.
880+
end: Size,
881+
}
882+
883+
/// A contiguous chunk of initialized or uninitialized memory.
884+
pub enum InitChunk {
885+
Init(Range<Size>),
886+
Uninit(Range<Size>),
887+
}
888+
889+
impl<'a> InitChunkIter<'a> {
890+
fn new(init_mask: &'a InitMask, start: Size, end: Size) -> Self {
891+
assert!(start <= end);
892+
assert!(end <= init_mask.len);
893+
Self { init_mask, start, end }
894+
}
895+
}
896+
897+
impl<'a> Iterator for InitChunkIter<'a> {
898+
type Item = InitChunk;
899+
900+
fn next(&mut self) -> Option<Self::Item> {
901+
if self.start >= self.end {
902+
return None;
903+
}
904+
905+
let is_init = self.init_mask.get(self.start);
906+
// FIXME(oli-obk): optimize this for allocations larger than a block.
907+
let end_of_chunk =
908+
(self.start..self.end).find(|&i| self.init_mask.get(i) != is_init).unwrap_or(self.end);
909+
let range = self.start..end_of_chunk;
910+
911+
self.start = end_of_chunk;
912+
913+
Some(if is_init { InitChunk::Init(range) } else { InitChunk::Uninit(range) })
914+
}
915+
}
916+
870917
#[inline]
871918
fn bit_index(bits: Size) -> (usize, usize) {
872919
let bits = bits.bytes();

compiler/rustc_middle/src/mir/interpret/mod.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,9 @@ pub use self::error::{
125125

126126
pub use self::value::{get_slice_bytes, ConstAlloc, ConstValue, Scalar, ScalarMaybeUninit};
127127

128-
pub use self::allocation::{Allocation, AllocationExtra, InitMask, Relocations};
128+
pub use self::allocation::{
129+
Allocation, AllocationExtra, InitChunk, InitChunkIter, InitMask, Relocations,
130+
};
129131

130132
pub use self::pointer::{Pointer, PointerArithmetic};
131133

compiler/rustc_target/src/abi/mod.rs

+38
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use crate::spec::Target;
55

66
use std::convert::{TryFrom, TryInto};
77
use std::fmt;
8+
use std::iter::Step;
89
use std::num::NonZeroUsize;
910
use std::ops::{Add, AddAssign, Deref, Mul, Range, RangeInclusive, Sub};
1011
use std::str::FromStr;
@@ -433,6 +434,43 @@ impl AddAssign for Size {
433434
}
434435
}
435436

437+
unsafe impl Step for Size {
438+
#[inline]
439+
fn steps_between(start: &Self, end: &Self) -> Option<usize> {
440+
u64::steps_between(&start.bytes(), &end.bytes())
441+
}
442+
443+
#[inline]
444+
fn forward_checked(start: Self, count: usize) -> Option<Self> {
445+
u64::forward_checked(start.bytes(), count).map(Self::from_bytes)
446+
}
447+
448+
#[inline]
449+
fn forward(start: Self, count: usize) -> Self {
450+
Self::from_bytes(u64::forward(start.bytes(), count))
451+
}
452+
453+
#[inline]
454+
unsafe fn forward_unchecked(start: Self, count: usize) -> Self {
455+
Self::from_bytes(u64::forward_unchecked(start.bytes(), count))
456+
}
457+
458+
#[inline]
459+
fn backward_checked(start: Self, count: usize) -> Option<Self> {
460+
u64::backward_checked(start.bytes(), count).map(Self::from_bytes)
461+
}
462+
463+
#[inline]
464+
fn backward(start: Self, count: usize) -> Self {
465+
Self::from_bytes(u64::backward(start.bytes(), count))
466+
}
467+
468+
#[inline]
469+
unsafe fn backward_unchecked(start: Self, count: usize) -> Self {
470+
Self::from_bytes(u64::backward_unchecked(start.bytes(), count))
471+
}
472+
}
473+
436474
/// Alignment of a type in bytes (always a power of two).
437475
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Encodable, Decodable)]
438476
#[derive(HashStable_Generic)]

compiler/rustc_target/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
#![feature(never_type)]
1616
#![feature(associated_type_bounds)]
1717
#![feature(exhaustive_patterns)]
18+
#![feature(step_trait)]
19+
#![feature(step_trait_ext)]
20+
#![feature(unchecked_math)]
1821

1922
#[macro_use]
2023
extern crate rustc_macros;

src/test/codegen/consts.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,14 @@ pub fn inline_enum_const() -> E<i8, i16> {
4343
#[no_mangle]
4444
pub fn low_align_const() -> E<i16, [i16; 3]> {
4545
// Check that low_align_const and high_align_const use the same constant
46-
// CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 2 %1, i8* align 2 getelementptr inbounds (<{ [8 x i8] }>, <{ [8 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false)
46+
// CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 2 %1, i8* align 2 getelementptr inbounds (<{ [4 x i8], [4 x i8] }>, <{ [4 x i8], [4 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false)
4747
*&E::A(0)
4848
}
4949

5050
// CHECK-LABEL: @high_align_const
5151
#[no_mangle]
5252
pub fn high_align_const() -> E<i16, i32> {
5353
// Check that low_align_const and high_align_const use the same constant
54-
// CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [8 x i8] }>, <{ [8 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false)
54+
// CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [4 x i8], [4 x i8] }>, <{ [4 x i8], [4 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false)
5555
*&E::A(0)
5656
}

src/test/codegen/uninit-consts.rs

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// compile-flags: -C no-prepopulate-passes
2+
// ignore-tidy-linelength
3+
4+
// Check that we use undef (and not zero) for uninitialized bytes in constants.
5+
6+
#![crate_type = "lib"]
7+
8+
use std::mem::MaybeUninit;
9+
10+
pub struct PartiallyUninit {
11+
x: u32,
12+
y: MaybeUninit<[u8; 10]>
13+
}
14+
15+
// CHECK: [[FULLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [10 x i8] }> undef
16+
// CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"\EF\BE\AD\DE", [12 x i8] undef }>, align 4
17+
18+
// CHECK-LABEL: @fully_uninit
19+
#[no_mangle]
20+
pub const fn fully_uninit() -> MaybeUninit<[u8; 10]> {
21+
const M: MaybeUninit<[u8; 10]> = MaybeUninit::uninit();
22+
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 1 %1, i8* align 1 getelementptr inbounds (<{ [10 x i8] }>, <{ [10 x i8] }>* [[FULLY_UNINIT]], i32 0, i32 0, i32 0), i{{(32|64)}} 10, i1 false)
23+
M
24+
}
25+
26+
// CHECK-LABEL: @partially_uninit
27+
#[no_mangle]
28+
pub const fn partially_uninit() -> PartiallyUninit {
29+
const X: PartiallyUninit = PartiallyUninit { x: 0xdeadbeef, y: MaybeUninit::uninit() };
30+
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [4 x i8], [12 x i8] }>, <{ [4 x i8], [12 x i8] }>* [[PARTIALLY_UNINIT]], i32 0, i32 0, i32 0), i{{(32|64)}} 16, i1 false)
31+
X
32+
}

0 commit comments

Comments
 (0)