Skip to content

Commit 46a929f

Browse files
authored
[SelectionDAG] Fix isKnownNeverZeroFloat for vectors (#78308)
Return true iff all of vector elements are constant AND not zero Fixes #77805 Previously, it'd return `true` (as in - the value is known to be never zero) for any build_vector/splat_vector with non-constant elements.
1 parent 9e9907f commit 46a929f

File tree

2 files changed

+27
-26
lines changed

2 files changed

+27
-26
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5233,22 +5233,8 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
52335233
"Floating point type expected");
52345234

52355235
// If the value is a constant, we can obviously see if it is a zero or not.
5236-
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
5237-
return !C->isZero();
5238-
5239-
// Return false if we find any zero in a vector.
5240-
if (Op->getOpcode() == ISD::BUILD_VECTOR ||
5241-
Op->getOpcode() == ISD::SPLAT_VECTOR) {
5242-
for (const SDValue &OpVal : Op->op_values()) {
5243-
if (OpVal.isUndef())
5244-
return false;
5245-
if (auto *C = dyn_cast<ConstantFPSDNode>(OpVal))
5246-
if (C->isZero())
5247-
return false;
5248-
}
5249-
return true;
5250-
}
5251-
return false;
5236+
return ISD::matchUnaryFpPredicate(
5237+
Op, [](ConstantFPSDNode *C) { return !C->isZero(); });
52525238
}
52535239

52545240
bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {

llvm/test/CodeGen/X86/fminimum-fmaximum.ll

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,36 +1328,51 @@ define <4 x float> @test_fmaximum_v4f32_splat(<4 x float> %x, float %y) {
13281328
; SSE2-LABEL: test_fmaximum_v4f32_splat:
13291329
; SSE2: # %bb.0:
13301330
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
1331-
; SSE2-NEXT: movaps %xmm0, %xmm2
1331+
; SSE2-NEXT: pxor %xmm2, %xmm2
1332+
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1333+
; SSE2-NEXT: movdqa %xmm2, %xmm3
1334+
; SSE2-NEXT: pandn %xmm0, %xmm3
1335+
; SSE2-NEXT: movaps %xmm1, %xmm4
1336+
; SSE2-NEXT: andps %xmm2, %xmm4
1337+
; SSE2-NEXT: orps %xmm3, %xmm4
1338+
; SSE2-NEXT: pand %xmm2, %xmm0
1339+
; SSE2-NEXT: andnps %xmm1, %xmm2
1340+
; SSE2-NEXT: por %xmm2, %xmm0
1341+
; SSE2-NEXT: movdqa %xmm0, %xmm1
1342+
; SSE2-NEXT: maxps %xmm4, %xmm1
1343+
; SSE2-NEXT: movdqa %xmm0, %xmm2
13321344
; SSE2-NEXT: cmpunordps %xmm0, %xmm2
1333-
; SSE2-NEXT: movaps %xmm0, %xmm3
1334-
; SSE2-NEXT: andps %xmm2, %xmm3
1335-
; SSE2-NEXT: maxps %xmm1, %xmm0
1336-
; SSE2-NEXT: andnps %xmm0, %xmm2
1337-
; SSE2-NEXT: orps %xmm3, %xmm2
1338-
; SSE2-NEXT: movaps %xmm2, %xmm0
1345+
; SSE2-NEXT: andps %xmm2, %xmm0
1346+
; SSE2-NEXT: andnps %xmm1, %xmm2
1347+
; SSE2-NEXT: orps %xmm2, %xmm0
13391348
; SSE2-NEXT: retq
13401349
;
13411350
; AVX1-LABEL: test_fmaximum_v4f32_splat:
13421351
; AVX1: # %bb.0:
13431352
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
1344-
; AVX1-NEXT: vmaxps %xmm1, %xmm0, %xmm1
1353+
; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
1354+
; AVX1-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
1355+
; AVX1-NEXT: vmaxps %xmm2, %xmm0, %xmm1
13451356
; AVX1-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
13461357
; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
13471358
; AVX1-NEXT: retq
13481359
;
13491360
; AVX512-LABEL: test_fmaximum_v4f32_splat:
13501361
; AVX512: # %bb.0:
13511362
; AVX512-NEXT: vbroadcastss %xmm1, %xmm1
1352-
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm1
1363+
; AVX512-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
1364+
; AVX512-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
1365+
; AVX512-NEXT: vmaxps %xmm2, %xmm0, %xmm1
13531366
; AVX512-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
13541367
; AVX512-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
13551368
; AVX512-NEXT: retq
13561369
;
13571370
; X86-LABEL: test_fmaximum_v4f32_splat:
13581371
; X86: # %bb.0:
13591372
; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
1360-
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm1
1373+
; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
1374+
; X86-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
1375+
; X86-NEXT: vmaxps %xmm2, %xmm0, %xmm1
13611376
; X86-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
13621377
; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
13631378
; X86-NEXT: retl

0 commit comments

Comments
 (0)