[InstCombine] Use known bits to determine exact int->fp cast

vfdff · vfdff · commit 404479b4b042 · 2022-06-30T09:45:11.000+08:00
Reviewed By: spatel, nikic Differential Revision: https://reviews.llvm.org/D127854
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1720,7 +1720,7 @@ static Type *getMinimumFPType(Value *V) {
 
 /// Return true if the cast from integer to FP can be proven to be exact for all
 /// possible inputs (the conversion does not lose any precision).
-static bool isKnownExactCastIntToFP(CastInst &I) {
+static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {
   CastInst::CastOps Opcode = I.getOpcode();
   assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
          "Unexpected cast");
@@ -1757,6 +1757,12 @@ static bool isKnownExactCastIntToFP(CastInst &I) {
   // TODO:
   // Try harder to find if the source integer type has less significant bits.
   // For example, compute number of sign bits or compute low bit mask.
+  KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
+  int LowBits =
+      (int)SrcTy->getScalarSizeInBits() - SrcKnown.countMinLeadingZeros();
+  if (LowBits <= DestNumSigBits)
+    return true;
+
   return false;
 }
 
@@ -1937,7 +1943,7 @@ Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
   Value *Src = FPT.getOperand(0);
   if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
     auto *FPCast = cast<CastInst>(Src);
-    if (isKnownExactCastIntToFP(*FPCast))
+    if (isKnownExactCastIntToFP(*FPCast, *this))
       return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
   }
 
@@ -1951,7 +1957,7 @@ Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
   Value *Src = FPExt.getOperand(0);
   if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
     auto *FPCast = cast<CastInst>(Src);
-    if (isKnownExactCastIntToFP(*FPCast))
+    if (isKnownExactCastIntToFP(*FPCast, *this))
       return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
   }
 
@@ -1978,7 +1984,7 @@ Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
 
   // This means this is also safe for a signed input and unsigned output, since
   // a negative input would lead to undefined behavior.
-  if (!isKnownExactCastIntToFP(*OpI)) {
+  if (!isKnownExactCastIntToFP(*OpI, *this)) {
     // The first cast may not round exactly based on the source integer width
     // and FP width, but the overflow UB rules can still allow this to fold.
     // If the destination type is narrow, that means the intermediate FP value
diff --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll
@@ -170,8 +170,7 @@ define half @sint_to_fptrunc(i32 %x) {
 define half @masked_sint_to_fptrunc1(i32 %x) {
 ; CHECK-LABEL: @masked_sint_to_fptrunc1(
 ; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 16777215
-; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fptrunc float [[F]] to half
+; CHECK-NEXT:    [[R:%.*]] = sitofp i32 [[M]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %m = and i32 %x, 16777215
@@ -183,8 +182,7 @@ define half @masked_sint_to_fptrunc1(i32 %x) {
 define half @masked_sint_to_fptrunc2(i32 %x) {
 ; CHECK-LABEL: @masked_sint_to_fptrunc2(
 ; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fptrunc float [[F]] to half
+; CHECK-NEXT:    [[R:%.*]] = sitofp i32 [[M]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %m = lshr i32 %x, 8
@@ -220,8 +218,7 @@ define double @sint_to_fpext(i32 %x) {
 define double @masked_sint_to_fpext1(i32 %x) {
 ; CHECK-LABEL: @masked_sint_to_fpext1(
 ; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 16777215
-; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[F]] to double
+; CHECK-NEXT:    [[R:%.*]] = sitofp i32 [[M]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %m = and i32 %x, 16777215
@@ -233,8 +230,7 @@ define double @masked_sint_to_fpext1(i32 %x) {
 define double @masked_sint_to_fpext2(i32 %x) {
 ; CHECK-LABEL: @masked_sint_to_fpext2(
 ; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[F]] to double
+; CHECK-NEXT:    [[R:%.*]] = sitofp i32 [[M]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %m = lshr i32 %x, 8
@@ -270,8 +266,7 @@ define half @uint_to_fptrunc(i32 %x) {
 define half @masked_uint_to_fptrunc1(i32 %x) {
 ; CHECK-LABEL: @masked_uint_to_fptrunc1(
 ; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 16777215
-; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fptrunc float [[F]] to half
+; CHECK-NEXT:    [[R:%.*]] = uitofp i32 [[M]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %m = and i32 %x, 16777215
@@ -283,8 +278,7 @@ define half @masked_uint_to_fptrunc1(i32 %x) {
 define half @masked_uint_to_fptrunc2(i32 %x) {
 ; CHECK-LABEL: @masked_uint_to_fptrunc2(
 ; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fptrunc float [[F]] to half
+; CHECK-NEXT:    [[R:%.*]] = uitofp i32 [[M]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %m = lshr i32 %x, 8
@@ -320,8 +314,7 @@ define double @uint_to_fpext(i32 %x) {
 define double @masked_uint_to_fpext1(i32 %x) {
 ; CHECK-LABEL: @masked_uint_to_fpext1(
 ; CHECK-NEXT:    [[M:%.*]] = and i32 [[X:%.*]], 16777215
-; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[F]] to double
+; CHECK-NEXT:    [[R:%.*]] = uitofp i32 [[M]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %m = and i32 %x, 16777215
@@ -333,8 +326,7 @@ define double @masked_uint_to_fpext1(i32 %x) {
 define double @masked_uint_to_fpext2(i32 %x) {
 ; CHECK-LABEL: @masked_uint_to_fpext2(
 ; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[M]] to float
-; CHECK-NEXT:    [[R:%.*]] = fpext float [[F]] to double
+; CHECK-NEXT:    [[R:%.*]] = uitofp i32 [[M]] to double
 ; CHECK-NEXT:    ret double [[R]]
 ;
   %m = lshr i32 %x, 8
diff --git a/llvm/test/Transforms/InstCombine/sitofp.ll b/llvm/test/Transforms/InstCombine/sitofp.ll
@@ -218,16 +218,38 @@ define i55 @test19(i64 %A) {
   ret i55 %C
 }
 
-; TODO: The mask guarantees that the input is small enough to eliminate the FP casts.
+; The mask guarantees that the input is small enough to eliminate the FP casts.
 
 define i25 @masked_input(i25 %A) {
 ; CHECK-LABEL: @masked_input(
 ; CHECK-NEXT:    [[M:%.*]] = and i25 [[A:%.*]], 65535
+; CHECK-NEXT:    ret i25 [[M]]
+;
+  %m = and i25 %A, 65535
+  %B = uitofp i25 %m to float
+  %C = fptoui float %B to i25
+  ret i25 %C
+}
+
+define i25 @max_masked_input(i25 %A) {
+; CHECK-LABEL: @max_masked_input(
+; CHECK-NEXT:    [[M:%.*]] = and i25 [[A:%.*]], 16777215
+; CHECK-NEXT:    ret i25 [[M]]
+;
+  %m = and i25 %A, 16777215    ; max intermediate 16777215 (= 1 << 24)-1
+  %B = uitofp i25 %m to float
+  %C = fptoui float %B to i25
+  ret i25 %C
+}
+
+define i25 @overflow_masked_input(i25 %A) {
+; CHECK-LABEL: @overflow_masked_input(
+; CHECK-NEXT:    [[M:%.*]] = and i25 [[A:%.*]], -16777216
 ; CHECK-NEXT:    [[B:%.*]] = uitofp i25 [[M]] to float
 ; CHECK-NEXT:    [[C:%.*]] = fptoui float [[B]] to i25
 ; CHECK-NEXT:    ret i25 [[C]]
 ;
-  %m = and i25 %A, 65535
+  %m = and i25 %A, 16777216  ; Negative test - intermediate 16777216 (= 1 << 24)
   %B = uitofp i25 %m to float
   %C = fptoui float %B to i25
   ret i25 %C