Avoid loads/stores of first class aggregates #20755

dotdash · 2015-01-08T15:36:55Z

Currently, small aggregates are passed to functions as immediate values
as is. This has two consequences.

One is that aggregates are passed component-wise by LLVM, so e.g. a
struct containing four u8 values (e.g. an RGBA struct) will be passed as
four individual values.

The other is that LLVM isn't very good at optimizing loads/stores of
first class attributes. What clang does is converting the aggregate to
an appropriately sized integer type (e.g. i32 for the four u8 values),
and using that for the function argument. This allows LLVM to create
code that is a lot better.

Fixes #20450 #20149 #16506 #13927

rust-highfive · 2015-01-08T15:36:59Z

r? @nikomatsakis

(rust_highfive has picked a reviewer for you, use r? to override)

huonw · 2015-01-09T00:25:28Z

Could you post an example of before/after IR?

huonw · 2015-01-09T00:28:13Z

src/librustc_trans/trans/type_.rs

@@ -82,6 +82,10 @@ impl Type {
        ty!(llvm::LLVMInt64TypeInContext(ccx.llcx()))
    }

+    pub fn ix(ccx: &CrateContext, num_bits: u64) -> Type {


Could you add a comment describing this, since the name ix isn't immediately obvious.

dotdash · 2015-01-09T00:31:56Z

For #20149

Before:

; Function Attrs: uwtable
define dereferenceable(1) i8* @_ZN8get_bool20h3a2efbe887530bffeaaE(%"enum.core::option::Option<[\22bool\22]>[#2]"* noalias dereferenceable(2)) unnamed_addr #0 {
entry-block:
  %1 = getelementptr inbounds %"enum.core::option::Option<[\22bool\22]>[#2]"* %0, i64 0, i32 0
  %2 = load i8* %1, align 1, !range !0, !alias.scope !1
  %3 = icmp eq i8 %2, 1
  br i1 %3, label %next-block, label %then-block-21-

then-block-21-:                                   ; preds = %entry-block
  store %"enum.core::option::Option<[\22bool\22]>[#2]" { i8 1, [0 x i8] undef, [1 x i8] c"\01" }, %"enum.core::option::Option<[\22bool\22]>[#2]"* %0, align 1
  %.pre = load i8* %1, align 1, !alias.scope !6
  %phitmp = icmp eq i8 %.pre, 1
  br label %next-block

next-block:                                       ; preds = %entry-block, %then-block-21-
  %4 = phi i1 [ true, %entry-block ], [ %phitmp, %then-block-21- ]
  %5 = getelementptr inbounds %"enum.core::option::Option<[\22bool\22]>[#2]"* %0, i64 0, i32 2, i64 0
  %sret_slot.0.i = select i1 %4, i8* %5, i8* null
  %6 = icmp eq i8* %sret_slot.0.i, null
  br i1 %6, label %match_else.i, label %"_ZN6option15Option$LT$T$GT$6unwrap20h3709860458348752072E.exit"

match_else.i:                                     ; preds = %next-block
  tail call void @_ZN9panicking5panic20h53242798fe6f4f1asdlE({ %str_slice, %str_slice, i64 }* noalias readonly dereferenceable(40) @"_ZN6option15Option$LT$T$GT$6unwrap14_MSG_FILE_LINE20hb2632d1dbc781dacwpoE")
  unreachable

"_ZN6option15Option$LT$T$GT$6unwrap20h3709860458348752072E.exit": ; preds = %next-block
  ret i8* %sret_slot.0.i
}

After:

define dereferenceable(1) i8* @_ZN8get_bool20h3a2efbe887530bffeaaE(%"enum.core::option::Option<[\22bool\22]>[#2]"* noalias dereferenceable(2)) unnamed_addr #0 {
entry-block:
  %1 = getelementptr inbounds %"enum.core::option::Option<[\22bool\22]>[#2]"* %0, i64 0, i32 0
  %2 = load i8* %1, align 1, !range !0, !alias.scope !1
  %3 = icmp eq i8 %2, 1
  br i1 %3, label %"_ZN6option15Option$LT$T$GT$6unwrap21h10756339029509221854E.exit", label %then-block-21-

then-block-21-:                                   ; preds = %entry-block
  %4 = bitcast %"enum.core::option::Option<[\22bool\22]>[#2]"* %0 to i16*
  store i16 257, i16* %4, align 2
  br label %"_ZN6option15Option$LT$T$GT$6unwrap21h10756339029509221854E.exit"

"_ZN6option15Option$LT$T$GT$6unwrap21h10756339029509221854E.exit": ; preds = %then-block-21-, %entry-block
  %5 = getelementptr inbounds %"enum.core::option::Option<[\22bool\22]>[#2]"* %0, i64 0, i32 2, i64 0
  ret i8* %5
}

For #16506:

Before:

define internal void @_ZN4main20h231ed58dfe11d80bPcaE() unnamed_addr #0 {
entry-block:
  tail call fastcc void @_ZN8gradient20h2530772adbfd5d63ZbaE(%struct.RGBA zeroinitializer, %struct.RGBA { i8 -1, i8 -1, i8 -1, i8 -1 })
  tail call fastcc void @_ZN8gradient20h2530772adbfd5d63ZbaE(%struct.RGBA { i8 -1, i8 -1, i8 -1, i8 -1 }, %struct.RGBA zeroinitializer)
  ret void
}

After:

define internal void @_ZN4main20h231ed58dfe11d80bPcaE() unnamed_addr #0 {
entry-block:
  tail call fastcc void @_ZN8gradient20h2530772adbfd5d63ZbaE(i32 0, i32 -1)
  tail call fastcc void @_ZN8gradient20h2530772adbfd5d63ZbaE(i32 -1, i32 0)
  ret void
}

For #13927

Before:

define internal fastcc zeroext i1 @_ZN3foo20he087cfc23527c69deaaE(%"enum.core::option::Option<[\22u16\22]>[#2]") unnamed_addr #0 {
match_case5.i:
  %.fca.0.extract4 = extractvalue %"enum.core::option::Option<[\22u16\22]>[#2]" %0, 0
  %.fca.2.0.extract6 = extractvalue %"enum.core::option::Option<[\22u16\22]>[#2]" %0, 2, 0
  %cond.i = icmp eq i16 %.fca.0.extract4, 1
  %1 = icmp eq i16 %.fca.2.0.extract6, 2
  %sret_slot.0.i = and i1 %cond.i, %1
  ret i1 %sret_slot.0.i
}

After:

define internal fastcc zeroext i1 @_ZN3foo20he087cfc23527c69deaaE(i32) unnamed_addr #0 {
match_case5.i:
  %1 = icmp eq i32 %0, 131073
  ret i1 %1
}

dotdash · 2015-01-09T00:39:56Z

Added the requested comments (also at the other locations that look similar to the one on which you put your comment)

huonw · 2015-01-09T00:57:17Z

Wow, that looks nice? What about with non-constant data? e.g.

#[inline(never)]
fn get_bool(slot: &mut Option<bool>, val: bool) -> &mut bool {
    if slot.is_none() {
        *slot = Some(x);
    }
    slot.as_mut().unwrap()
}

dotdash · 2015-01-09T08:23:40Z

define dereferenceable(1) i8* @_ZN8get_bool20hd181be98fe457b77eaaE(%"enum.core::option::Option<[\22bool\22]>[#2]"* noalias dereferenceable(2), i1 zeroext) unnamed_addr #0 {
entry-block:
  %2 = getelementptr inbounds %"enum.core::option::Option<[\22bool\22]>[#2]"* %0, i64 0, i32 0
  %3 = load i8* %2, align 1, !range !0, !alias.scope !1
  %4 = icmp eq i8 %3, 1
  br i1 %4, label %"_ZN6option15Option$LT$T$GT$6unwrap21h12513988046761130743E.exit", label %then-block-25-

then-block-25-:                                   ; preds = %entry-block
  %.sroa.3.0.insert.ext = zext i1 %1 to i16
  %.sroa.3.0.insert.shift = shl nuw nsw i16 %.sroa.3.0.insert.ext, 8
  %.sroa.0.0.insert.insert = or i16 %.sroa.3.0.insert.shift, 1
  %5 = bitcast %"enum.core::option::Option<[\22bool\22]>[#2]"* %0 to i16*
  store i16 %.sroa.0.0.insert.insert, i16* %5, align 2
  br label %"_ZN6option15Option$LT$T$GT$6unwrap21h12513988046761130743E.exit"

"_ZN6option15Option$LT$T$GT$6unwrap21h12513988046761130743E.exit": ; preds = %then-block-25-, %entry-block
  %6 = getelementptr inbounds %"enum.core::option::Option<[\22bool\22]>[#2]"* %0, i64 0, i32 2, i64 0
  ret i8* %6
}

For x86_64 that becomes:

    .type   _ZN8get_bool20hd181be98fe457b77eaaE,@function
_ZN8get_bool20hd181be98fe457b77eaaE:
    .cfi_startproc
    movzbl  (%rdi), %eax
    cmpl    $1, %eax
    je  .LBB0_2
    movzbl  %sil, %eax
    shll    $8, %eax
    orl $1, %eax
    movw    %ax, (%rdi)
.LBB0_2:
    incq    %rdi
    movq    %rdi, %rax
    retq
.Ltmp0:
    .size   _ZN8get_bool20hd181be98fe457b77eaaE, .Ltmp0-_ZN8get_bool20hd181be98fe457b77eaaE

… args Currently we pass a `struct S(u64)` as an immediate value on i686, but a `struct S { x: u64 }` is passed indirectly. This seems pretty wrong, as they both have the same underlying LLVM type `{ i64 }`, no sense in treating them differently.

dotdash · 2015-01-09T19:17:19Z

The new commit (the first one) fixes the build failure from the previous iteration, which happened exactly because TypeId is a struct with a single named u64 field and was passed indirectly on i686.

Currently, small aggregates are passed to functions as immediate values as is. This has two consequences. One is that aggregates are passed component-wise by LLVM, so e.g. a struct containing four u8 values (e.g. an RGBA struct) will be passed as four individual values. The other is that LLVM isn't very good at optimizing loads/stores of first class attributes. What clang does is converting the aggregate to an appropriately sized integer type (e.g. i32 for the four u8 values), and using that for the function argument. This allows LLVM to create code that is a lot better. Fixes rust-lang#20450 rust-lang#20149 rust-lang#16506 rust-lang#13927

Currently even small fixed-size arrays are passed as indirect parameters, which seems to be just an oversight. Let's handle them the same as structs of the same size, passing them as immediate values.

Currently, small aggregates are passed to functions as immediate values as is. This has two consequences. One is that aggregates are passed component-wise by LLVM, so e.g. a struct containing four u8 values (e.g. an RGBA struct) will be passed as four individual values. The other is that LLVM isn't very good at optimizing loads/stores of first class attributes. What clang does is converting the aggregate to an appropriately sized integer type (e.g. i32 for the four u8 values), and using that for the function argument. This allows LLVM to create code that is a lot better. Fixes #20450 #20149 #16506 #13927

Aatch · 2015-01-11T06:51:43Z

I'm going to say those failures were spurious, since it didn't fail on that bot last time.

Currently, small aggregates are passed to functions as immediate values as is. This has two consequences. One is that aggregates are passed component-wise by LLVM, so e.g. a struct containing four u8 values (e.g. an RGBA struct) will be passed as four individual values. The other is that LLVM isn't very good at optimizing loads/stores of first class attributes. What clang does is converting the aggregate to an appropriately sized integer type (e.g. i32 for the four u8 values), and using that for the function argument. This allows LLVM to create code that is a lot better. Fixes #20450 #20149 #16506 #13927

rust-highfive assigned nikomatsakis Jan 8, 2015

dotdash force-pushed the fca branch from 34d0d5e to 535e6fc Compare January 8, 2015 23:12

huonw reviewed Jan 9, 2015
View reviewed changes

dotdash force-pushed the fca branch from 535e6fc to f4e620f Compare January 9, 2015 00:38

dotdash force-pushed the fca branch from f4e620f to 6fba2a9 Compare January 9, 2015 17:50

dotdash added 2 commits January 11, 2015 01:44

Pass small fixed-size arrays as immediates

6f1b06e

Currently even small fixed-size arrays are passed as indirect parameters, which seems to be just an oversight. Let's handle them the same as structs of the same size, passing them as immediate values.

dotdash force-pushed the fca branch from 6fba2a9 to 6f1b06e Compare January 11, 2015 00:44

bors merged commit 6f1b06e into rust-lang:master Jan 11, 2015

This was referenced Jan 11, 2015

Unfortunate optimization with Option<bool> vs Option<uint> #20149

Closed

Small structs with many elements as function arguments seem inefficient #16506

Closed

== with Option<primitive> has two branches #13927

Closed

vhbit mentioned this pull request Feb 2, 2015

ICE: assertion failure in LLVM when returning a struct from "extern" function. #21454

Closed

dotdash deleted the fca branch February 4, 2015 12:41

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Avoid loads/stores of first class aggregates #20755

Avoid loads/stores of first class aggregates #20755

Uh oh!

dotdash commented Jan 8, 2015

Uh oh!

rust-highfive commented Jan 8, 2015

Uh oh!

huonw commented Jan 9, 2015

Uh oh!

huonw Jan 9, 2015

Uh oh!

dotdash commented Jan 9, 2015

Uh oh!

dotdash commented Jan 9, 2015

Uh oh!

huonw commented Jan 9, 2015

Uh oh!

dotdash commented Jan 9, 2015

Uh oh!

dotdash commented Jan 9, 2015

Uh oh!

Aatch commented Jan 11, 2015

Uh oh!

Uh oh!

Avoid loads/stores of first class aggregates #20755

Avoid loads/stores of first class aggregates #20755

Uh oh!

Conversation

dotdash commented Jan 8, 2015

Uh oh!

rust-highfive commented Jan 8, 2015

Uh oh!

huonw commented Jan 9, 2015

Uh oh!

huonw Jan 9, 2015

Choose a reason for hiding this comment

Uh oh!

dotdash commented Jan 9, 2015

Uh oh!

dotdash commented Jan 9, 2015

Uh oh!

huonw commented Jan 9, 2015

Uh oh!

dotdash commented Jan 9, 2015

Uh oh!

dotdash commented Jan 9, 2015

Uh oh!

Aatch commented Jan 11, 2015

Uh oh!

Uh oh!