Skip to content

Missed DCE when popcount of multiplication of two integers but arg1 is 1 in icelake-client #140917

Open
@BreadTom

Description

@BreadTom

Godbolt
Flags were -O3 -Wall -Wextra -march=icelake-client

#include <stdint.h>

int f0(uint64_t arg0, uint64_t arg1){
    uint64_t tmp0;
    int tmp1;
    if(arg1 % 2 == 0)
        __builtin_unreachable();
    if(!__builtin_mul_overflow(arg0, arg1, &tmp0))
        __builtin_unreachable();
    tmp1 = __builtin_popcountg(tmp0);
    return tmp1;
}

int f0_slow(uint64_t arg0, uint64_t arg1){
    uint64_t tmp0;
    int tmp1;
    if(arg1 % 2 == 0)
        __builtin_unreachable();
    if(!__builtin_mul_overflow(arg0, arg1, &tmp0))
        __builtin_unreachable();
    if(arg1 == 1)
        return __builtin_popcountg(arg0);
    tmp1 = __builtin_popcountg(tmp0);
    return tmp1;
}
define dso_local range(i32 0, 65) i32 @f0(i64 noundef %arg0, i64 noundef %arg1) local_unnamed_addr {
entry:
  %rem = and i64 %arg1, 1
  %cmp = icmp ne i64 %rem, 0
  tail call void @llvm.assume(i1 %cmp)
  %0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %arg0, i64 %arg1)
  %1 = extractvalue { i64, i1 } %0, 1
  %2 = extractvalue { i64, i1 } %0, 0
  tail call void @llvm.assume(i1 %1)
  %3 = tail call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 %2)
  %cast = trunc nuw nsw i64 %3 to i32
  ret i32 %cast
}

declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1

declare i64 @llvm.ctpop.i64(i64) #1

define dso_local range(i32 0, 65) i32 @f0_slow(i64 noundef %arg0, i64 noundef %arg1) local_unnamed_addr {
entry:
  %rem = and i64 %arg1, 1
  %cmp = icmp ne i64 %rem, 0
  tail call void @llvm.assume(i1 %cmp)
  %0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %arg0, i64 %arg1)
  %1 = extractvalue { i64, i1 } %0, 1
  tail call void @llvm.assume(i1 %1)
  %cmp3 = icmp eq i64 %arg1, 1
  %2 = tail call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 %arg0)
  %3 = extractvalue { i64, i1 } %0, 0
  %4 = tail call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 %3)
  %retval.0.in = select i1 %cmp3, i64 %2, i64 %4
  %retval.0 = trunc nuw nsw i64 %retval.0.in to i32
  ret i32 %retval.0
}

declare void @llvm.assume(i1 noundef) #2

GCC does use only one popcnt but LLVM does not optimize if(arg1 == 1)

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions