-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[InstCombine] Fix for folding select
into floating point binary operators.
#83200
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Fix for folding select
into floating point binary operators.
#83200
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (michele-scandale) ChangesFolding a Patch is 53.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83200.diff 10 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 71fa9b9ba41ebb..298cd157ced141 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -527,8 +527,11 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
// instructions have different flags and add tests to ensure the
// behaviour is correct.
FastMathFlags FMF;
- if (isa<FPMathOperator>(&SI))
+ if (isa<FPMathOperator>(&SI)) {
FMF = SI.getFastMathFlags();
+ if (!computeKnownFPClass(FalseVal, FMF, fcNan, &SI).isKnownNeverNaN())
+ return nullptr;
+ }
Constant *C = ConstantExpr::getBinOpIdentity(
TVI->getOpcode(), TVI->getType(), true, FMF.noSignedZeros());
Value *OOp = TVI->getOperand(2 - OpToFold);
diff --git a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
index a7adcd1ac61e7f..dedd12f8cc7a3d 100644
--- a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
+++ b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
@@ -427,13 +427,13 @@ define float @fmul_by_snan_if_0_oeq_zero_f32(float %x) {
define float @fmul_by_var_if_0_oeq_zero_f32(float %x, float %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul float %x, %y
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
@@ -441,14 +441,14 @@ define float @fmul_by_fabs_var_if_0_oeq_zero_f32(float %x, float %y) {
; CHECK-LABEL: @fmul_by_fabs_var_if_0_oeq_zero_f32(
; CHECK-NEXT: [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y_FABS]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y_FABS]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%y.fabs = call float @llvm.fabs.f32(float %y)
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul float %x, %y.fabs
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
@@ -467,13 +467,13 @@ define float @fmul_by_fabs_nnan_ninf_var_if_0_oeq_zero_f32(float %x, float %y) {
define float @fmul_by_var_if_0_oeq_zero_f32_nsz_fmul(float %x, float %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nsz_fmul(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nsz float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul nsz float %x, %y
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
@@ -481,13 +481,13 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_fmul(float %x, float %y) {
define float @fmul_by_var_if_0_oeq_zero_f32_nsz_ninf_fmul(float %x, float %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nsz_ninf_fmul(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul ninf nsz float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul nsz ninf float %x, %y
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
@@ -495,13 +495,13 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_ninf_fmul(float %x, float %y) {
define float @fmul_by_var_if_0_oeq_zero_f32_nsz_nnan_fmul(float %x, float %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nsz_nnan_fmul(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan nsz float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul nsz nnan float %x, %y
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
@@ -509,13 +509,13 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_nnan_fmul(float %x, float %y) {
define float @fmul_by_var_if_0_oeq_zero_f32_nnan_ninf_fmul(float %x, float %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nnan_ninf_fmul(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul nnan ninf float %x, %y
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
@@ -558,26 +558,26 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_nsz_inverted(f
define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(float %x, float %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf nsz float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul nnan ninf nsz float %x, %y
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted(float %x, float %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf nsz float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul nnan ninf nsz float %y, %x
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
@@ -585,26 +585,26 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted(float %x
define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero(float %x, float nofpclass(nzero) %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul nnan ninf float %x, %y
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero_negsub(float %x, float nofpclass(nzero nsub) %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero_negsub(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul nnan ninf float %x, %y
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
@@ -622,26 +622,26 @@ define float @fmul_by_var_if_0_oeq_zero_f32_known_never_nan_inf_select_nsz(float
define float @fmul_by_var_if_0_oeq_zero_f32_fmul_known_never_nan_inf_negzero(float %x, float nofpclass(nan inf nzero) %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_known_never_nan_inf_negzero(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul float %x, %y
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
define float @fmul_by_var_if_0_oeq_zero_f32_fmul_known_never_nan_inf_negzero_nsub(float %x, float nofpclass(nan inf nzero nsub) %y) {
; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_known_never_nan_inf_negzero_nsub(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul float %x, %y
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
@@ -692,26 +692,26 @@ define float @fmul_by_var_if_not_one_0_zero_f32_assume_finite_fmul_nsz(float %x,
define float @fmul_by_self_if_0_oeq_zero_f32(float %x) {
; CHECK-LABEL: @fmul_by_self_if_0_oeq_zero_f32(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul float %x, %x
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
define float @fmul_by_self_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(float %x) {
; CHECK-LABEL: @fmul_by_self_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(
; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00
+; CHECK-NEXT: [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00
; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf nsz float [[SCALED_X]], [[X]]
; CHECK-NEXT: ret float [[SCALED_IF_DENORMAL]]
;
%x.is.zero = fcmp oeq float %x, 0.0
%scaled.x = fmul nnan ninf nsz float %x, %x
- %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+ %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
ret float %scaled.if.denormal
}
diff --git a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
index 496854c7d731ab..77ff16a8b2e3d8 100644
--- a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
+++ b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
@@ -1,8 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-define float @select_fadd(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fadd(
+define float @select_maybe_nan_fadd(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_maybe_nan_fadd(
+; CHECK-NEXT: [[C:%.*]] = fadd float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT: ret float [[D]]
+;
+ %C = fadd float %A, %B
+ %D = select i1 %cond, float %C, float %A
+ ret float %D
+}
+
+define float @select_fpclass_fadd(i1 %cond, float nofpclass(nan) %A, float %B) {
+; CHECK-LABEL: @select_fpclass_fadd(
; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
; CHECK-NEXT: [[D:%.*]] = fadd float [[C]], [[A:%.*]]
; CHECK-NEXT: ret float [[D]]
@@ -12,41 +23,52 @@ define float @select_fadd(i1 %cond, float %A, float %B) {
ret float %D
}
-define float @select_fadd_swapped(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fadd_swapped(
-; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
+define float @select_nnan_fadd(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fadd(
+; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
; CHECK-NEXT: [[D:%.*]] = fadd float [[C]], [[A:%.*]]
; CHECK-NEXT: ret float [[D]]
;
%C = fadd float %A, %B
- %D = select i1 %cond, float %A, float %C
+ %D = select nnan i1 %cond, float %C, float %A
ret float %D
}
-define float @select_fadd_fast_math(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fadd_fast_math(
-; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
+define float @select_nnan_fadd_swapped(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fadd_swapped(
+; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
+; CHECK-NEXT: [[D:%.*]] = fadd float [[C]], [[A:%.*]]
+; CHECK-NEXT: ret float [[D]]
+;
+ %C = fadd float %A, %B
+ %D = select nnan i1 %cond, float %A, float %C
+ ret float %D
+}
+
+define float @select_nnan_fadd_fast_math(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fadd_fast_math(
+; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
; CHECK-NEXT: [[D:%.*]] = fadd fast float [[C]], [[A:%.*]]
; CHECK-NEXT: ret float [[D]]
;
%C = fadd fast float %A, %B
- %D = select i1 %cond, float %C, float %A
+ %D = select nnan i1 %cond, float %C, float %A
ret float %D
}
-define float @select_fadd_swapped_fast_math(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fadd_swapped_fast_math(
-; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
+define float @select_nnan_fadd_swapped_fast_math(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fadd_swapped_fast_math(
+; CHECK-NEXT: [[C:%.*]] = select nnan i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
; CHECK-NEXT: [[D:%.*]] = fadd fast float [[C]], [[A:%.*]]
; CHECK-NEXT: ret float [[D]]
;
%C = fadd fast float %A, %B
- %D = select i1 %cond, float %A, float %C
+ %D = select nnan i1 %cond, float %A, float %C
ret float %D
}
-define <4 x float> @select_nsz_fadd_v4f32(<4 x i1> %cond, <4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: @select_nsz_fadd_v4f32(
+define <4 x float> @select_nnan_nsz_fadd_v4f32(<4 x i1> %cond, <4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @select_nnan_nsz_fadd_v4f32(
; CHECK-NEXT: [[C:%.*]] = select nnan nsz <4 x i1> [[COND:%.*]], <4 x float> [[B:%.*]], <4 x float> zeroinitializer
; CHECK-NEXT: [[D:%.*]] = fadd nnan nsz <4 x float> [[C]], [[A:%.*]]
; CHECK-NEXT: ret <4 x float> [[D]]
@@ -56,202 +78,202 @@ define <4 x float> @select_nsz_fadd_v4f32(<4 x i1> %cond, <4 x float> %A, <4 x f
ret <4 x float> %D
}
-define <vscale x 4 x float> @select_nsz_fadd_nxv4f32(<vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %B) {
-; CHECK-LABEL: @select_nsz_fadd_nxv4f32(
+define <vscale x 4 x float> @select_nnan_nsz_fadd_nxv4f32(<vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %B) {
+; CHECK-LABEL: @select_nnan_nsz_fadd_nxv4f32(
; CHECK-NEXT: [[C:%.*]] = select nnan nsz <vscale x 4 x i1> [[COND:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> zeroinitializer
; CHECK-NEXT: [[D:%.*]] = fadd nnan nsz <vscale x 4 x float> [[C]], [[A:%.*]]
; CHECK-NEXT: ret <vscale x 4 x float> [[D]]
;
- %C = fadd nsz nnan <vscale x 4 x float> %A, %B
- %D = select nsz nnan <vscale x 4 x i1> %cond, <vscale x 4 x float> %C, <vscale x 4 x float> %A
+ %C = fadd nnan nsz <vscale x 4 x float> %A, %B
+ %D = select nnan nsz <vscale x 4 x i1> %cond, <vscale x 4 x float> %C, <vscale x 4 x float> %A
ret <vscale x 4 x float> %D
}
-define <vscale x 4 x float> @select_nsz_fadd_nxv4f32_swapops(<vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %B) {
-; CHECK-LABEL: @select_nsz_fadd_nxv4f32_swapops(
+define <vscale x 4 x float> @select_nnan_nsz_fadd_nxv4f32_swapops(<vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %B) {
+; CHECK-LABEL: @select_nnan_nsz_fadd_nxv4f32_swapops(
; CHECK-NEXT: [[C:%.*]] = select fast <vscale x 4 x i1> [[COND:%.*]], <vscale x 4 x float> zeroinitializer, <vscale x 4 x float> [[B:%.*]]
; CHECK-NEXT: [[D:%.*]] = fadd fast <vscale x 4 x float> [[C]], [[A:%.*]]
; CHECK-NEXT: ret <vscale x 4 x float> [[D]]
;
%C = fadd fast <vscale x 4 x float> %A, %B
- %D = select fast <vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %C
+ %D = select nnan fast <vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %C
ret <vscale x 4 x float> %D
}
-define float @select_fmul(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fm...
[truncated]
|
FMF = SI.getFastMathFlags(); | ||
if (!computeKnownFPClass(FalseVal, FMF, fcNan, &SI).isKnownNeverNaN()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Think it be better to place this below after line 543 to avoid relatively expensive analysis until after all the basic checks have been performed.
llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
Show resolved
Hide resolved
llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
Show resolved
Hide resolved
Ping |
(!OOpIsAPInt || !isSelect01(C->getUniqueInteger(), *OOpC))) | ||
return nullptr; | ||
|
||
// Cannot fold if the false value might be a NaN. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe explain why
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated the comment. I'll wait few days before landing the change in case there are further comments. Thanks.
…rators. Folding a `select` into a floating point binary operators can only be done if the result is preserved for both case. In particular, if the other operand of the `select` can be a NaN, then the transformation won't preserve the result value.
a45bbd2
to
6cabf37
Compare
Ping. Is it ready to land? |
…rators. (llvm#83200) Folding a `select` into a floating point binary operators can only be done if the result is preserved for both case. In particular, if the other operand of the `select` can be a NaN, then the transformation won't preserve the result value.
We've noticed a 10-20% regression of the https://github.com/llvm/llvm-test-suite/tree/main/MultiSource/Benchmarks/Ptrdist/ks benchmark on AArch64 after this commit. We don't consider this an important microbenchmark, but I wanted to let you know just in case. |
I've found that the change introduced here has come with a huge performance penalty at |
|
Yes, I've tried |
Seems all we need is this change:
I had been lost in that logic at first, then I've found that it's the |
@dtcxzyw could you tell me if there is any followup planned for this? |
Testcase for this? |
@arsenm I'm using the following code (compiled with With the commit introduced by this PR, the reduction loop cannot be vectorized:
Normally, only a slight (yet consistent) change in the measured performance could be observed from that, unless veclib is used, in which case lack of the vectorization results in a dramatic performance degradation. To make things easier, one of the possible ways of extracting the loop itself into a function:
|
Can you turn this into a reduced IR test, in a PR with your proposed patch above |
I'd need to know if such change makes sense before working on a PR with a proper test case. Namely, is obtaining information from |
You need a proper test case to make sense of the situation. It would be easier to reason about and discuss with a concrete 3 line IR testcase than this
computeKnownFPClass knows a few tricks operating directly on the select based on the compared values. It would help to see the reduced example |
Ok, so here it is:
without this commit, it is transformed into:
With this commit, no change is being done by the InstCombine pass. giving it some more context:
|
If I add the
the transformation succeed even with your patch. It's because of this:
|
Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from llvm#83200.
Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from llvm#83200.
// have preserved the exact NaN bit-pattern. | ||
// Avoid the folding if the false value might be a NaN. | ||
if (isa<FPMathOperator>(&SI) && | ||
!computeKnownFPClass(FalseVal, FMF, fcNan, &SI).isKnownNeverNaN()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This ought to try to make use of the nnan flag on the select before trying the computeKnownFPClass query. There's a wrapper around computeKnownFPClass which takes an additional FastMathFlags
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm sorry, I'm sometimes lost here.
There's a wrapper around computeKnownFPClass which takes an additional FastMathFlags
Did you mean
The wrapper around computeKnownFPClass called here takes an additional FastMathFlags
? That's how I see here, InstCombineSelect.cpp defines two wrappers around computeKnownFPClass
:
KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF,
FPClassTest Interested = fcAllFlags,
const Instruction *CtxI = nullptr,
unsigned Depth = 0) const
and
KnownFPClass computeKnownFPClass(Value *Val,
FPClassTest Interested = fcAllFlags,
const Instruction *CtxI = nullptr,
unsigned Depth = 0) const
The one called here is the one which takes FastMathFlags FMF
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This ought to try to make use of the nnan flag on the select before trying the computeKnownFPClass query
Could you say what use would that be?
NewSel->takeName(TVI); | ||
BinaryOperator *BO = | ||
BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel); | ||
BO->copyIRFlags(TVI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this can or in the value flags from the select
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
alive2 is really slow here without known-no-infs on the inputs though, so I'm not sure the exact flag rewrite rules yet
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe (nnan_bo | nnan_select) | (others_flags_bo & other_flags_select)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this can or in the value flags from the select
I don't even know what does it mean :(
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this can or in the value flags from the select
I don't even know what does it mean :(
Ok, I guess I've deciphered it. I guess what you propose is to 'or' the flags copied from TVI with value flags from the select. Trouble is, SI don't have any FMF flags, they were not generated by simplifyCFG pass since they were lost in SROA pass.
Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from llvm#83200.
Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from llvm#83200.
Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from llvm#83200.
Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from llvm#83200.
Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from llvm#83200.
…3686) Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from #83200.
…m#113686) Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from llvm#83200.
…m#113686) Given a recursive phi with select: %p = phi [ 0, entry ], [ %sel, loop] %sel = select %c, %other, %p The fp state can be calculated using the knowledge that the select/phi pair can only be the initial state (0 here) or from %other. This adds a short-cut into computeKnownFPClass for PHI to detect that the select is recursive back to the phi, and if so use the state from the other operand. This helps to address a regression from llvm#83200.
Folding a
select
into a floating point binary operators can only be done if the result is preserved for both case. In particular, if the other operand of theselect
can be a NaN, then the transformation won't preserve the result value.