Skip to content

cg_llvm: use index-based loop in write_operand_repeatedly #112516

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 7 additions & 15 deletions compiler/rustc_codegen_llvm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,8 +572,6 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
) {
let zero = self.const_usize(0);
let count = self.const_usize(count);
let start = dest.project_index(self, zero).llval;
let end = dest.project_index(self, count).llval;

let header_bb = self.append_sibling_block("repeat_loop_header");
let body_bb = self.append_sibling_block("repeat_loop_body");
Expand All @@ -582,24 +580,18 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
self.br(header_bb);

let mut header_bx = Self::build(self.cx, header_bb);
let current = header_bx.phi(self.val_ty(start), &[start], &[self.llbb()]);
let i = header_bx.phi(self.val_ty(zero), &[zero], &[self.llbb()]);

let keep_going = header_bx.icmp(IntPredicate::IntNE, current, end);
let keep_going = header_bx.icmp(IntPredicate::IntULT, i, count);
header_bx.cond_br(keep_going, body_bb, next_bb);

let mut body_bx = Self::build(self.cx, body_bb);
let align = dest.align.restrict_for_offset(dest.layout.field(self.cx(), 0).size);
cg_elem
.val
.store(&mut body_bx, PlaceRef::new_sized_aligned(current, cg_elem.layout, align));

let next = body_bx.inbounds_gep(
self.backend_type(cg_elem.layout),
current,
&[self.const_usize(1)],
);
let dest_elem = dest.project_index(&mut body_bx, i);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does project_index emit GEP? Will it emit the inbounds and align info that was previously there?

Copy link
Contributor Author

@erikdesjardins erikdesjardins Jun 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes:

pub fn project_index<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
&self,
bx: &mut Bx,
llindex: V,
) -> Self {
// Statically compute the offset if we can, otherwise just use the element size,
// as this will yield the lowest alignment.
let layout = self.layout.field(bx, 0);
let offset = if let Some(llindex) = bx.const_to_opt_uint(llindex) {
layout.size.checked_mul(llindex, bx).unwrap_or(layout.size)
} else {
layout.size
};
PlaceRef {
llval: bx.inbounds_gep(
bx.cx().backend_type(self.layout),
self.llval,
&[bx.cx().const_usize(0), llindex],
),
llextra: None,
layout,
align: self.align.restrict_for_offset(offset),
}
}

What was there before was basically a reimplementation of project_index (which was sort of necessary because the GEP was threaded through the phi)

cg_elem.val.store(&mut body_bx, dest_elem);

let next = body_bx.unchecked_uadd(i, self.const_usize(1));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we add no-unsigned-wrap flag here? Do we let LLVM analyze it instead?

Copy link
Contributor Author

@erikdesjardins erikdesjardins Jun 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unchecked_uadd (vs. add) adds nuw

body_bx.br(header_bb);
header_bx.add_incoming_to_phi(current, next, body_bb);
header_bx.add_incoming_to_phi(i, next, body_bb);

*self = Self::build(self.cx, next_bb);
}
Expand Down
12 changes: 12 additions & 0 deletions tests/codegen/issues/issue-111603.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@

use std::sync::Arc;

// CHECK-LABEL: @new_from_array

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we emit the entire IR here? I think its helpful for reviewers, test coverage, and people trying to understand the codebase better.

Copy link
Contributor Author

@erikdesjardins erikdesjardins Jun 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we don't have something like LLVM's update_test_checks script, it is painful to update codegen tests. So we tend to use the minimal amount of CHECK lines so we don't have to update tests due to unrelated changes. (Also, most tests don't specify a target, so they generate slightly different IR per platform due to differences in ABI/ vectorization/etc., which would have to be dealt with, if we did this in general.)

In this case, this is the full IR of new_from_array:

IR
define { ptr, i64 } @new_from_array(i64 noundef %x) unnamed_addr #0 personality ptr @rust_eh_personality {
start:
  %array = alloca [1000 x i64], align 8
  %broadcast.splatinsert = insertelement <2 x i64> poison, i64 %x, i64 0
  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
  %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 %x, i64 0
  %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
  br label %vector.body

vector.body:                                      ; preds = %vector.body, %start
  %index = phi i64 [ 0, %start ], [ %index.next.4, %vector.body ]
  %0 = getelementptr inbounds [1000 x i64], ptr %array, i64 0, i64 %index
  store <2 x i64> %broadcast.splat, ptr %0, align 8
  %1 = getelementptr inbounds i64, ptr %0, i64 2
  store <2 x i64> %broadcast.splat2, ptr %1, align 8
  %index.next = add nuw nsw i64 %index, 4
  %2 = getelementptr inbounds [1000 x i64], ptr %array, i64 0, i64 %index.next
  store <2 x i64> %broadcast.splat, ptr %2, align 8
  %3 = getelementptr inbounds i64, ptr %2, i64 2
  store <2 x i64> %broadcast.splat2, ptr %3, align 8
  %index.next.1 = add nuw nsw i64 %index, 8
  %4 = getelementptr inbounds [1000 x i64], ptr %array, i64 0, i64 %index.next.1
  store <2 x i64> %broadcast.splat, ptr %4, align 8
  %5 = getelementptr inbounds i64, ptr %4, i64 2
  store <2 x i64> %broadcast.splat2, ptr %5, align 8
  %index.next.2 = add nuw nsw i64 %index, 12
  %6 = getelementptr inbounds [1000 x i64], ptr %array, i64 0, i64 %index.next.2
  store <2 x i64> %broadcast.splat, ptr %6, align 8
  %7 = getelementptr inbounds i64, ptr %6, i64 2
  store <2 x i64> %broadcast.splat2, ptr %7, align 8
  %index.next.3 = add nuw nsw i64 %index, 16
  %8 = getelementptr inbounds [1000 x i64], ptr %array, i64 0, i64 %index.next.3
  store <2 x i64> %broadcast.splat, ptr %8, align 8
  %9 = getelementptr inbounds i64, ptr %8, i64 2
  store <2 x i64> %broadcast.splat2, ptr %9, align 8
  %index.next.4 = add nuw nsw i64 %index, 20
  %10 = icmp eq i64 %index.next.4, 1000
  br i1 %10, label %repeat_loop_next, label %vector.body, !llvm.loop !2

repeat_loop_next:                                 ; preds = %vector.body
  %11 = load volatile i8, ptr @__rust_no_alloc_shim_is_unstable, align 1, !noalias !5
  %12 = tail call noundef align 8 dereferenceable_or_null(8016) ptr @__rust_alloc(i64 noundef 8016, i64 noundef 8) #6, !noalias !5
  %13 = icmp eq ptr %12, null
  br i1 %13, label %bb1.i.i, label %"_ZN5alloc4sync12Arc$LT$T$GT$3new17hc22c917a7edefd8bE.exit"

bb1.i.i:                                          ; preds = %repeat_loop_next
; call alloc::alloc::handle_alloc_error
  tail call void @_ZN5alloc5alloc18handle_alloc_error17h5a822ff2e844764dE(i64 noundef 8, i64 noundef 8016) #7, !noalias !5
  unreachable

"_ZN5alloc4sync12Arc$LT$T$GT$3new17hc22c917a7edefd8bE.exit": ; preds = %repeat_loop_next
  store i64 1, ptr %12, align 8, !noalias !5
  %x.sroa.4.0._14.sroa_idx.i = getelementptr inbounds i8, ptr %12, i64 8
  store i64 1, ptr %x.sroa.4.0._14.sroa_idx.i, align 8, !noalias !5
  %x.sroa.5.0._14.sroa_idx.i = getelementptr inbounds i8, ptr %12, i64 16
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(8000) %x.sroa.5.0._14.sroa_idx.i, ptr noundef nonnull align 8 dereferenceable(8000) %array, i64 8000, i1 false)
  %14 = insertvalue { ptr, i64 } poison, ptr %12, 0
  %15 = insertvalue { ptr, i64 } %14, i64 1000, 1
  ret { ptr, i64 } %15
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for sharing IR. Looks like the redundant alloca was removed. Nice!

#[no_mangle]
pub fn new_from_array(x: u64) -> Arc<[u64]> {
// Ensure that we only generate one alloca for the array.

// CHECK: alloca
// CHECK-SAME: [1000 x i64]
// CHECK-NOT: alloca
let array = [x; 1000];
Arc::new(array)
}

// CHECK-LABEL: @new_uninit
#[no_mangle]
pub fn new_uninit(x: u64) -> Arc<[u64; 1000]> {
Expand Down