Open
Description
Godbolt
Flags were -O3 -Wall -Wextra -march=icelake-client
#include <stdint.h>
int f0(uint64_t arg0, uint64_t arg1){
uint64_t tmp0;
int tmp1;
if(arg1 % 2 == 0)
__builtin_unreachable();
if(!__builtin_mul_overflow(arg0, arg1, &tmp0))
__builtin_unreachable();
tmp1 = __builtin_popcountg(tmp0);
return tmp1;
}
int f0_slow(uint64_t arg0, uint64_t arg1){
uint64_t tmp0;
int tmp1;
if(arg1 % 2 == 0)
__builtin_unreachable();
if(!__builtin_mul_overflow(arg0, arg1, &tmp0))
__builtin_unreachable();
if(arg1 == 1)
return __builtin_popcountg(arg0);
tmp1 = __builtin_popcountg(tmp0);
return tmp1;
}
define dso_local range(i32 0, 65) i32 @f0(i64 noundef %arg0, i64 noundef %arg1) local_unnamed_addr {
entry:
%rem = and i64 %arg1, 1
%cmp = icmp ne i64 %rem, 0
tail call void @llvm.assume(i1 %cmp)
%0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %arg0, i64 %arg1)
%1 = extractvalue { i64, i1 } %0, 1
%2 = extractvalue { i64, i1 } %0, 0
tail call void @llvm.assume(i1 %1)
%3 = tail call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 %2)
%cast = trunc nuw nsw i64 %3 to i32
ret i32 %cast
}
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1
declare i64 @llvm.ctpop.i64(i64) #1
define dso_local range(i32 0, 65) i32 @f0_slow(i64 noundef %arg0, i64 noundef %arg1) local_unnamed_addr {
entry:
%rem = and i64 %arg1, 1
%cmp = icmp ne i64 %rem, 0
tail call void @llvm.assume(i1 %cmp)
%0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %arg0, i64 %arg1)
%1 = extractvalue { i64, i1 } %0, 1
tail call void @llvm.assume(i1 %1)
%cmp3 = icmp eq i64 %arg1, 1
%2 = tail call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 %arg0)
%3 = extractvalue { i64, i1 } %0, 0
%4 = tail call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 %3)
%retval.0.in = select i1 %cmp3, i64 %2, i64 %4
%retval.0 = trunc nuw nsw i64 %retval.0.in to i32
ret i32 %retval.0
}
declare void @llvm.assume(i1 noundef) #2
GCC does use only one popcnt but LLVM does not optimize if(arg1 == 1)