Closed
Description
The code:
#include<stdint.h>
void foo(uint64_t* use, uint64_t x, uint64_t y) {
use[0] = x & 0xffffffffffff;
if (y==0) return;
use[1] = y & 0xffffffffffff;
}
emits with -O3 -march=haswell
:
foo:
mov al, 48
bzhi rax, rsi, rax
mov qword ptr [rdi], rax
test rdx, rdx
je .LBB0_2
movabs rax, 281474976710655
and rdx, rax
mov qword ptr [rdi + 8], rdx
.LBB0_2:
ret
ending up using both bzhi
and movabs
+and
, which ends up worse than consistently picking either approach.
https://godbolt.org/z/c3jfc3Eoe
final LLVM IR being:
define dso_local void @foo(ptr nocapture noundef writeonly %use, i64 noundef %x, i64 noundef %y) local_unnamed_addr {
entry:
%const = bitcast i64 281474976710655 to i64
%and = and i64 %x, %const
store i64 %and, ptr %use, align 8
%cmp = icmp eq i64 %y, 0
br i1 %cmp, label %return, label %if.end
if.end: ; preds = %entry
%and1 = and i64 %y, %const
%arrayidx2 = getelementptr inbounds i8, ptr %use, i64 8
store i64 %and1, ptr %arrayidx2, align 8
br label %return
return: ; preds = %entry, %if.end
ret void
}
(another issue where constant hoisting similarly messes with constant-dependent backend optimizations)