Skip to content

Commit d13947b

Browse files
authored
[InstCombine] Enable more fabs fold when the user ignores sign bit of zero/NaN (#139861)
When the only user of select is a fcmp or a fp operation with nnan/nsz, the sign bit of zero/NaN can be ignored. Alive2: https://alive2.llvm.org/ce/z/ZcxeIv Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=7add1bcd02b1f72d580bb2e64a1fe4a8bdc085d9&to=cb419c7cbddce778673f3d4b414ed9b8064b8d6e&stat=instructions:u Closes #133367.
1 parent a0b6cfd commit d13947b

File tree

3 files changed

+225
-5
lines changed

3 files changed

+225
-5
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2773,6 +2773,47 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op,
27732773
return nullptr;
27742774
}
27752775

2776+
/// Return true if the sign bit of result can be ignored when the result is
2777+
/// zero.
2778+
static bool ignoreSignBitOfZero(Instruction &I) {
2779+
if (I.hasNoSignedZeros())
2780+
return true;
2781+
2782+
// Check if the sign bit is ignored by the only user.
2783+
if (!I.hasOneUse())
2784+
return false;
2785+
Instruction *User = I.user_back();
2786+
2787+
// fcmp treats both positive and negative zero as equal.
2788+
if (User->getOpcode() == Instruction::FCmp)
2789+
return true;
2790+
2791+
if (auto *FPOp = dyn_cast<FPMathOperator>(User))
2792+
return FPOp->hasNoSignedZeros();
2793+
2794+
return false;
2795+
}
2796+
2797+
/// Return true if the sign bit of result can be ignored when the result is NaN.
2798+
static bool ignoreSignBitOfNaN(Instruction &I) {
2799+
if (I.hasNoNaNs())
2800+
return true;
2801+
2802+
// Check if the sign bit is ignored by the only user.
2803+
if (!I.hasOneUse())
2804+
return false;
2805+
Instruction *User = I.user_back();
2806+
2807+
// fcmp ignores the sign bit of NaN.
2808+
if (User->getOpcode() == Instruction::FCmp)
2809+
return true;
2810+
2811+
if (auto *FPOp = dyn_cast<FPMathOperator>(User))
2812+
return FPOp->hasNoNaNs();
2813+
2814+
return false;
2815+
}
2816+
27762817
// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
27772818
// fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work.
27782819
static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
@@ -2797,7 +2838,7 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
27972838
// of NAN, but IEEE-754 specifies the signbit of NAN values with
27982839
// fneg/fabs operations.
27992840
if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X))) &&
2800-
(cast<FPMathOperator>(CondVal)->hasNoNaNs() || SI.hasNoNaNs() ||
2841+
(cast<FPMathOperator>(CondVal)->hasNoNaNs() || ignoreSignBitOfNaN(SI) ||
28012842
isKnownNeverNaN(X, /*Depth=*/0,
28022843
IC.getSimplifyQuery().getWithInstruction(
28032844
cast<Instruction>(CondVal))))) {
@@ -2844,7 +2885,7 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
28442885
// Note: We require "nnan" for this fold because fcmp ignores the signbit
28452886
// of NAN, but IEEE-754 specifies the signbit of NAN values with
28462887
// fneg/fabs operations.
2847-
if (!SI.hasNoSignedZeros() || !SI.hasNoNaNs())
2888+
if (!ignoreSignBitOfZero(SI) || !ignoreSignBitOfNaN(SI))
28482889
return nullptr;
28492890

28502891
if (Swap)

llvm/test/Transforms/InstCombine/fabs.ll

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,3 +1276,182 @@ define <2 x float> @test_select_neg_negx_x_wrong_type(<2 x float> %value) {
12761276
%value.addr.0.i = select i1 %a1, <2 x float> %fneg.i, <2 x float> %value
12771277
ret <2 x float> %value.addr.0.i
12781278
}
1279+
1280+
define i1 @test_fabs_used_by_fcmp(float %x, float %y) {
1281+
; CHECK-LABEL: @test_fabs_used_by_fcmp(
1282+
; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
1283+
; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[SEL]], [[Y:%.*]]
1284+
; CHECK-NEXT: ret i1 [[CMP2]]
1285+
;
1286+
%cmp = fcmp oge float %x, 0.000000e+00
1287+
%neg = fneg float %x
1288+
%sel = select i1 %cmp, float %x, float %neg
1289+
%cmp2 = fcmp olt float %sel, %y
1290+
ret i1 %cmp2
1291+
}
1292+
1293+
define float @test_fabs_used_by_fpop_nnan_nsz(float %x, float %y) {
1294+
; CHECK-LABEL: @test_fabs_used_by_fpop_nnan_nsz(
1295+
; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
1296+
; CHECK-NEXT: [[ADD:%.*]] = fadd nnan nsz float [[SEL]], [[Y:%.*]]
1297+
; CHECK-NEXT: ret float [[ADD]]
1298+
;
1299+
%cmp = fcmp oge float %x, 0.000000e+00
1300+
%neg = fneg float %x
1301+
%sel = select i1 %cmp, float %x, float %neg
1302+
%add = fadd nnan nsz float %sel, %y
1303+
ret float %add
1304+
}
1305+
1306+
define i1 @test_fabs_fsub_used_by_fcmp(float %x, float %y) {
1307+
; CHECK-LABEL: @test_fabs_fsub_used_by_fcmp(
1308+
; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
1309+
; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[SEL]], [[Y:%.*]]
1310+
; CHECK-NEXT: ret i1 [[CMP2]]
1311+
;
1312+
%cmp = fcmp ogt float %x, 0.000000e+00
1313+
%neg = fsub float 0.000000e+00, %x
1314+
%sel = select i1 %cmp, float %x, float %neg
1315+
%cmp2 = fcmp olt float %sel, %y
1316+
ret i1 %cmp2
1317+
}
1318+
1319+
define float @test_fabs_fsub_used_by_fpop_nnan(float %x, float %y) {
1320+
; CHECK-LABEL: @test_fabs_fsub_used_by_fpop_nnan(
1321+
; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
1322+
; CHECK-NEXT: [[ADD:%.*]] = fadd nnan float [[SEL]], [[Y:%.*]]
1323+
; CHECK-NEXT: ret float [[ADD]]
1324+
;
1325+
%cmp = fcmp ogt float %x, 0.000000e+00
1326+
%neg = fsub float 0.000000e+00, %x
1327+
%sel = select i1 %cmp, float %x, float %neg
1328+
%add = fadd nnan float %sel, %y
1329+
ret float %add
1330+
}
1331+
1332+
; TODO: fadd ignores the sign bit of NaN.
1333+
define float @test_fabs_used_by_fpop_nsz(float %x, float %y) {
1334+
; CHECK-LABEL: @test_fabs_used_by_fpop_nsz(
1335+
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
1336+
; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
1337+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
1338+
; CHECK-NEXT: [[ADD:%.*]] = fadd nsz float [[SEL]], [[Y:%.*]]
1339+
; CHECK-NEXT: ret float [[ADD]]
1340+
;
1341+
%cmp = fcmp oge float %x, 0.000000e+00
1342+
%neg = fneg float %x
1343+
%sel = select i1 %cmp, float %x, float %neg
1344+
%add = fadd nsz float %sel, %y
1345+
ret float %add
1346+
}
1347+
1348+
; TODO: copysign ignores the sign bit of NaN magnitude.
1349+
define float @test_fabs_used_by_fcopysign_mag(float %x, float %y) {
1350+
; CHECK-LABEL: @test_fabs_used_by_fcopysign_mag(
1351+
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X1:%.*]], 0.000000e+00
1352+
; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X1]]
1353+
; CHECK-NEXT: [[X:%.*]] = select i1 [[CMP]], float [[X1]], float [[NEG]]
1354+
; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X]], float [[Y:%.*]])
1355+
; CHECK-NEXT: ret float [[COPYSIGN]]
1356+
;
1357+
%cmp = fcmp oge float %x, 0.000000e+00
1358+
%neg = fneg float %x
1359+
%sel = select i1 %cmp, float %x, float %neg
1360+
%copysign = call float @llvm.copysign.f32(float %sel, float %y)
1361+
ret float %copysign
1362+
}
1363+
1364+
1365+
; Negative tests
1366+
1367+
define float @test_fabs_used_by_fpop_nnan(float %x, float %y) {
1368+
; CHECK-LABEL: @test_fabs_used_by_fpop_nnan(
1369+
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
1370+
; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
1371+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
1372+
; CHECK-NEXT: [[ADD:%.*]] = fadd nnan float [[SEL]], [[Y:%.*]]
1373+
; CHECK-NEXT: ret float [[ADD]]
1374+
;
1375+
%cmp = fcmp oge float %x, 0.000000e+00
1376+
%neg = fneg float %x
1377+
%sel = select i1 %cmp, float %x, float %neg
1378+
%add = fadd nnan float %sel, %y
1379+
ret float %add
1380+
}
1381+
1382+
define i1 @test_fabs_used_by_fcmp_multiuse(float %x, float %y) {
1383+
; CHECK-LABEL: @test_fabs_used_by_fcmp_multiuse(
1384+
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
1385+
; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
1386+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
1387+
; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[SEL]], [[Y:%.*]]
1388+
; CHECK-NEXT: call void @use(float [[SEL]])
1389+
; CHECK-NEXT: ret i1 [[CMP2]]
1390+
;
1391+
%cmp = fcmp oge float %x, 0.000000e+00
1392+
%neg = fneg float %x
1393+
%sel = select i1 %cmp, float %x, float %neg
1394+
%cmp2 = fcmp olt float %sel, %y
1395+
call void @use(float %sel)
1396+
ret i1 %cmp2
1397+
}
1398+
1399+
define float @test_fabs_used_by_fcopysign_sign(float %x, float %y) {
1400+
; CHECK-LABEL: @test_fabs_used_by_fcopysign_sign(
1401+
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
1402+
; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
1403+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
1404+
; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[Y:%.*]], float [[SEL]])
1405+
; CHECK-NEXT: ret float [[COPYSIGN]]
1406+
;
1407+
%cmp = fcmp oge float %x, 0.000000e+00
1408+
%neg = fneg float %x
1409+
%sel = select i1 %cmp, float %x, float %neg
1410+
%copysign = call float @llvm.copysign.f32(float %y, float %sel)
1411+
ret float %copysign
1412+
}
1413+
1414+
define float @test_fabs_used_by_maxnum(float %x, float %y) {
1415+
; CHECK-LABEL: @test_fabs_used_by_maxnum(
1416+
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
1417+
; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
1418+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
1419+
; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[Y:%.*]], float [[SEL]])
1420+
; CHECK-NEXT: ret float [[MAX]]
1421+
;
1422+
%cmp = fcmp oge float %x, 0.000000e+00
1423+
%neg = fneg float %x
1424+
%sel = select i1 %cmp, float %x, float %neg
1425+
%max = call float @llvm.maxnum.f32(float %y, float %sel)
1426+
ret float %max
1427+
}
1428+
1429+
define float @test_fabs_used_by_canonicalize(float %x) {
1430+
; CHECK-LABEL: @test_fabs_used_by_canonicalize(
1431+
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
1432+
; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
1433+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
1434+
; CHECK-NEXT: [[CANON:%.*]] = call float @llvm.canonicalize.f32(float [[SEL]])
1435+
; CHECK-NEXT: ret float [[CANON]]
1436+
;
1437+
%cmp = fcmp oge float %x, 0.000000e+00
1438+
%neg = fneg float %x
1439+
%sel = select i1 %cmp, float %x, float %neg
1440+
%canon = call float @llvm.canonicalize.f32(float %sel)
1441+
ret float %canon
1442+
}
1443+
1444+
define float @test_fabs_used_by_select(float %x, i1 %cond) {
1445+
; CHECK-LABEL: @test_fabs_used_by_select(
1446+
; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
1447+
; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
1448+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
1449+
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[COND:%.*]], float [[SEL]], float 0.000000e+00
1450+
; CHECK-NEXT: ret float [[SEL2]]
1451+
;
1452+
%cmp = fcmp oge float %x, 0.000000e+00
1453+
%neg = fneg float %x
1454+
%sel = select i1 %cmp, float %x, float %neg
1455+
%sel2 = select i1 %cond, float %sel, float 0.000000e+00
1456+
ret float %sel2
1457+
}

llvm/test/Transforms/InstCombine/fneg.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ define float @select_common_op_fneg_false(float %x, i1 %b) {
709709

710710
define float @fabs(float %a) {
711711
; CHECK-LABEL: @fabs(
712-
; CHECK-NEXT: [[FNEG1:%.*]] = call nnan ninf nsz float @llvm.fabs.f32(float [[A:%.*]])
712+
; CHECK-NEXT: [[FNEG1:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]])
713713
; CHECK-NEXT: ret float [[FNEG1]]
714714
;
715715
%fneg = fneg float %a
@@ -721,7 +721,7 @@ define float @fabs(float %a) {
721721

722722
define float @fnabs(float %a) {
723723
; CHECK-LABEL: @fnabs(
724-
; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.fabs.f32(float [[A:%.*]])
724+
; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]])
725725
; CHECK-NEXT: [[FNEG1:%.*]] = fneg fast float [[TMP1]]
726726
; CHECK-NEXT: ret float [[FNEG1]]
727727
;
@@ -734,7 +734,7 @@ define float @fnabs(float %a) {
734734

735735
define float @fnabs_1(float %a) {
736736
; CHECK-LABEL: @fnabs_1(
737-
; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.fabs.f32(float [[A:%.*]])
737+
; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]])
738738
; CHECK-NEXT: [[FNEG1:%.*]] = fneg fast float [[TMP1]]
739739
; CHECK-NEXT: ret float [[FNEG1]]
740740
;

0 commit comments

Comments
 (0)