-
Notifications
You must be signed in to change notification settings - Fork 13.3k
cg_llvm: use index-based loop in write_operand_repeatedly #112516
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -572,8 +572,6 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { | |
) { | ||
let zero = self.const_usize(0); | ||
let count = self.const_usize(count); | ||
let start = dest.project_index(self, zero).llval; | ||
let end = dest.project_index(self, count).llval; | ||
|
||
let header_bb = self.append_sibling_block("repeat_loop_header"); | ||
let body_bb = self.append_sibling_block("repeat_loop_body"); | ||
|
@@ -582,24 +580,18 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { | |
self.br(header_bb); | ||
|
||
let mut header_bx = Self::build(self.cx, header_bb); | ||
let current = header_bx.phi(self.val_ty(start), &[start], &[self.llbb()]); | ||
let i = header_bx.phi(self.val_ty(zero), &[zero], &[self.llbb()]); | ||
|
||
let keep_going = header_bx.icmp(IntPredicate::IntNE, current, end); | ||
let keep_going = header_bx.icmp(IntPredicate::IntULT, i, count); | ||
header_bx.cond_br(keep_going, body_bb, next_bb); | ||
|
||
let mut body_bx = Self::build(self.cx, body_bb); | ||
let align = dest.align.restrict_for_offset(dest.layout.field(self.cx(), 0).size); | ||
cg_elem | ||
.val | ||
.store(&mut body_bx, PlaceRef::new_sized_aligned(current, cg_elem.layout, align)); | ||
|
||
let next = body_bx.inbounds_gep( | ||
self.backend_type(cg_elem.layout), | ||
current, | ||
&[self.const_usize(1)], | ||
); | ||
let dest_elem = dest.project_index(&mut body_bx, i); | ||
cg_elem.val.store(&mut body_bx, dest_elem); | ||
|
||
let next = body_bx.unchecked_uadd(i, self.const_usize(1)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we add no-unsigned-wrap flag here? Do we let LLVM analyze it instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
body_bx.br(header_bb); | ||
header_bx.add_incoming_to_phi(current, next, body_bb); | ||
header_bx.add_incoming_to_phi(i, next, body_bb); | ||
|
||
*self = Self::build(self.cx, next_bb); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,18 @@ | |
|
||
use std::sync::Arc; | ||
|
||
// CHECK-LABEL: @new_from_array | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we emit the entire IR here? I think its helpful for reviewers, test coverage, and people trying to understand the codebase better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because we don't have something like LLVM's update_test_checks script, it is painful to update codegen tests. So we tend to use the minimal amount of CHECK lines so we don't have to update tests due to unrelated changes. (Also, most tests don't specify a target, so they generate slightly different IR per platform due to differences in ABI/ vectorization/etc., which would have to be dealt with, if we did this in general.) In this case, this is the full IR of IRdefine { ptr, i64 } @new_from_array(i64 noundef %x) unnamed_addr #0 personality ptr @rust_eh_personality {
start:
%array = alloca [1000 x i64], align 8
%broadcast.splatinsert = insertelement <2 x i64> poison, i64 %x, i64 0
%broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
%broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 %x, i64 0
%broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %start
%index = phi i64 [ 0, %start ], [ %index.next.4, %vector.body ]
%0 = getelementptr inbounds [1000 x i64], ptr %array, i64 0, i64 %index
store <2 x i64> %broadcast.splat, ptr %0, align 8
%1 = getelementptr inbounds i64, ptr %0, i64 2
store <2 x i64> %broadcast.splat2, ptr %1, align 8
%index.next = add nuw nsw i64 %index, 4
%2 = getelementptr inbounds [1000 x i64], ptr %array, i64 0, i64 %index.next
store <2 x i64> %broadcast.splat, ptr %2, align 8
%3 = getelementptr inbounds i64, ptr %2, i64 2
store <2 x i64> %broadcast.splat2, ptr %3, align 8
%index.next.1 = add nuw nsw i64 %index, 8
%4 = getelementptr inbounds [1000 x i64], ptr %array, i64 0, i64 %index.next.1
store <2 x i64> %broadcast.splat, ptr %4, align 8
%5 = getelementptr inbounds i64, ptr %4, i64 2
store <2 x i64> %broadcast.splat2, ptr %5, align 8
%index.next.2 = add nuw nsw i64 %index, 12
%6 = getelementptr inbounds [1000 x i64], ptr %array, i64 0, i64 %index.next.2
store <2 x i64> %broadcast.splat, ptr %6, align 8
%7 = getelementptr inbounds i64, ptr %6, i64 2
store <2 x i64> %broadcast.splat2, ptr %7, align 8
%index.next.3 = add nuw nsw i64 %index, 16
%8 = getelementptr inbounds [1000 x i64], ptr %array, i64 0, i64 %index.next.3
store <2 x i64> %broadcast.splat, ptr %8, align 8
%9 = getelementptr inbounds i64, ptr %8, i64 2
store <2 x i64> %broadcast.splat2, ptr %9, align 8
%index.next.4 = add nuw nsw i64 %index, 20
%10 = icmp eq i64 %index.next.4, 1000
br i1 %10, label %repeat_loop_next, label %vector.body, !llvm.loop !2
repeat_loop_next: ; preds = %vector.body
%11 = load volatile i8, ptr @__rust_no_alloc_shim_is_unstable, align 1, !noalias !5
%12 = tail call noundef align 8 dereferenceable_or_null(8016) ptr @__rust_alloc(i64 noundef 8016, i64 noundef 8) #6, !noalias !5
%13 = icmp eq ptr %12, null
br i1 %13, label %bb1.i.i, label %"_ZN5alloc4sync12Arc$LT$T$GT$3new17hc22c917a7edefd8bE.exit"
bb1.i.i: ; preds = %repeat_loop_next
; call alloc::alloc::handle_alloc_error
tail call void @_ZN5alloc5alloc18handle_alloc_error17h5a822ff2e844764dE(i64 noundef 8, i64 noundef 8016) #7, !noalias !5
unreachable
"_ZN5alloc4sync12Arc$LT$T$GT$3new17hc22c917a7edefd8bE.exit": ; preds = %repeat_loop_next
store i64 1, ptr %12, align 8, !noalias !5
%x.sroa.4.0._14.sroa_idx.i = getelementptr inbounds i8, ptr %12, i64 8
store i64 1, ptr %x.sroa.4.0._14.sroa_idx.i, align 8, !noalias !5
%x.sroa.5.0._14.sroa_idx.i = getelementptr inbounds i8, ptr %12, i64 16
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(8000) %x.sroa.5.0._14.sroa_idx.i, ptr noundef nonnull align 8 dereferenceable(8000) %array, i64 8000, i1 false)
%14 = insertvalue { ptr, i64 } poison, ptr %12, 0
%15 = insertvalue { ptr, i64 } %14, i64 1000, 1
ret { ptr, i64 } %15
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for sharing IR. Looks like the redundant alloca was removed. Nice! |
||
#[no_mangle] | ||
pub fn new_from_array(x: u64) -> Arc<[u64]> { | ||
// Ensure that we only generate one alloca for the array. | ||
|
||
// CHECK: alloca | ||
// CHECK-SAME: [1000 x i64] | ||
// CHECK-NOT: alloca | ||
let array = [x; 1000]; | ||
Arc::new(array) | ||
} | ||
|
||
// CHECK-LABEL: @new_uninit | ||
#[no_mangle] | ||
pub fn new_uninit(x: u64) -> Arc<[u64; 1000]> { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does
project_index
emit GEP? Will it emit the inbounds and align info that was previously there?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes:
rust/compiler/rustc_codegen_ssa/src/mir/place.rs
Lines 378 to 402 in 970058e
What was there before was basically a reimplementation of
project_index
(which was sort of necessary because the GEP was threaded through the phi)