Description
#[deriving(Eq)]
generates nested matches for enums, like this:
#[deriving(Eq)]
pub enum Foo { A1, A2, A3, }
#[automatically_derived]
impl ::std::cmp::Eq for Foo {
#[inline]
fn eq(&self, __arg_0: &Foo) -> ::bool {
match *self {
A1 => match *__arg_0 { A1 => true, _ => false },
A2 => match *__arg_0 { A2 => true, _ => false },
A3 => match *__arg_0 { A3 => true, _ => false }
}
}
#[inline]
fn ne(&self, __arg_0: &Foo) -> ::bool {
match *self {
A1 => match *__arg_0 { A1 => false, _ => true },
A2 => match *__arg_0 { A2 => false, _ => true },
A3 => match *__arg_0 { A3 => false, _ => true }
}
}
}
Although we emit range constraints for the loads, this doesn't get optimized to
a simple comparison but this:
define i8 @_ZN18Foo...std..cmp..Eq2eq20h9a2945fb10f6320blaa4v0.0E(i8* nocapture readonly, i8* nocapture readonly) unnamed_addr #0 {
entry-block:
%2 = load i8* %0, align 1, !range !0
switch i8 %2, label %match_else [
i8 0, label %match_case
i8 1, label %match_case3
]
match_else: ; preds = %entry-block
%3 = load i8* %1, align 1, !range !0
%cond20 = icmp eq i8 %3, 2
%. = zext i1 %cond20 to i8
br label %join18
match_case: ; preds = %entry-block
%4 = load i8* %1, align 1, !range !0
%cond19 = icmp eq i8 %4, 0
%.23 = zext i1 %cond19 to i8
br label %join18
match_case3: ; preds = %entry-block
%5 = load i8* %1, align 1, !range !0
%cond = icmp eq i8 %5, 1
%.24 = zext i1 %cond to i8
br label %join18
join18: ; preds = %match_case3, %match_case, %match_else
%__make_return_pointer.0 = phi i8 [ %., %match_else ], [ %.23, %match_case ], [ %.24, %match_case3 ]
ret i8 %__make_return_pointer.0
}
Which gets a pretty straight-forward translation to assembly:
.globl _ZN18Foo...std..cmp..Eq2eq20h9a2945fb10f6320blaa4v0.0E
.align 16, 0x90
.type _ZN18Foo...std..cmp..Eq2eq20h9a2945fb10f6320blaa4v0.0E,@function
_ZN18Foo...std..cmp..Eq2eq20h9a2945fb10f6320blaa4v0.0E:
.cfi_startproc
cmpq %fs:112, %rsp
ja .LBB0_2
movabsq $0, %r10
movabsq $0, %r11
callq __morestack
retq
.LBB0_2:
movb (%rdi), %al
testb %al, %al
jne .LBB0_3
cmpb $0, (%rsi)
sete %al
retq
.LBB0_3:
movzbl %al, %eax
cmpl $1, %eax
jne .LBB0_4
movzbl (%rsi), %eax
cmpl $1, %eax
sete %al
retq
.LBB0_4:
movzbl (%rsi), %eax
cmpl $2, %eax
sete %al
retq
.Ltmp0:
.size _ZN18Foo...std..cmp..Eq2eq20h9a2945fb10f6320blaa4v0.0E, .Ltmp0-_ZN18Foo...std..cmp..Eq2eq20h9a2945fb10f6320blaa4v0.0E
.cfi_endproc
For C-style enums, we could maybe special case the deriving code generate something like this instead:
fn eq(&self, rhs: &Foo) -> bool {
*self as u64 == *rhs as u64
}
But since the discriminant isn't exposed in the language (and is only virtual in case of e.g. Option<&int>), I don't see how to solve this for ADTs in general.
Exposing the discriminant through an intrinsic and doing an early equality check helps LLVM to generate better code, but it's still not optimal.
pub fn test1(a: &Foo, b: &Foo) -> bool {
let d1 = unsafe { std::intrinsics::get_disr(a) };
let d2 = unsafe { std::intrinsics::get_disr(b) };
if d1 != d2 {
return false;
}
match a {
&A1 => match b { &A1 => true, _ => false },
&A2 => match b { &A2 => true, _ => false },
&A3 => match b { &A3 => true, _ => false },
}
}
Gives:
define zeroext i1 @_ZN5test120h51356ea9d1622fa9haa4v0.0E(i8* nocapture readonly, i8* nocapture readonly) unnamed_addr #0 {
entry-block:
%2 = load i8* %0, align 1, !range !0
%3 = load i8* %1, align 1, !range !0
%4 = icmp eq i8 %2, %3
br i1 %4, label %next-block, label %return
next-block: ; preds = %entry-block
%5 = icmp ne i8 %2, 3
ret i1 %5
return: ; preds = %entry-block
ret i1 false
}
And:
.globl _ZN5test120h51356ea9d1622fa9haa4v0.0E
.align 16, 0x90
.type _ZN5test120h51356ea9d1622fa9haa4v0.0E,@function
_ZN5test120h51356ea9d1622fa9haa4v0.0E:
.cfi_startproc
cmpq %fs:112, %rsp
ja .LBB0_2
movabsq $0, %r10
movabsq $0, %r11
callq __morestack
retq
.LBB0_2:
movzbl (%rdi), %eax
movzbl (%rsi), %ecx
cmpl %ecx, %eax
jne .LBB0_4
movzbl %al, %eax
cmpl $3, %eax
setne %al
retq
.LBB0_4:
xorl %eax, %eax
retq
.Ltmp0:
.size _ZN5test120h51356ea9d1622fa9haa4v0.0E, .Ltmp0-_ZN5test120h51356ea9d1622fa9haa4v0.0E
.cfi_endproc
But I don't think exposing the discriminant is such a great idea, and having to do the extra check is weird and not quite natural. So I wonder if there's a better way to handle this.
Fun fact: Adding another variant to the enum (so we have pub enum Foo { A1, A2, A3, A4 }
) makes LLVM generate better code for some reason:
define zeroext i1 @_ZN5test120ha73e87eb2055d3a4iaa4v0.0E(i8* nocapture readonly, i8* nocapture readonly) unnamed_addr #0 {
entry-block:
%2 = load i8* %0, align 1, !range !0
%3 = load i8* %1, align 1, !range !0
%4 = icmp eq i8 %2, %3
br i1 %4, label %select.end, label %select.mid
select.mid: ; preds = %entry-block
br label %select.end
select.end: ; preds = %entry-block, %select.mid
%. = phi i1 [ true, %entry-block ], [ false, %select.mid ]
ret i1 %.
}
And adding another round of opt -O2
(LLVM seems to stop optimizing too early here) gives:
define zeroext i1 @_ZN5test120ha73e87eb2055d3a4iaa4v0.0E(i8* nocapture readonly, i8* nocapture readonly) unnamed_addr #0 {
entry-block:
%2 = load i8* %0, align 1, !range !0
%3 = load i8* %1, align 1, !range !0
%4 = icmp eq i8 %2, %3
ret i1 %4
}
EDIT: Forgot to mention that the code with the intrinsic is based upon my branch that uses i1
for bools. Just to avoid any confusion...