Skip to content

Commit 2401b61

Browse files
authored
AMDGPU: Fix creating minimum3/maximum3 nodes pre-gfx12 (#93027)
These would fail to select.
1 parent 7a1022a commit 2401b61

File tree

4 files changed

+3647
-4
lines changed

4 files changed

+3647
-4
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,6 +1312,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13121312
// \returns true if the target has IEEE fminimum/fmaximum instructions
13131313
bool hasIEEEMinMax() const { return getGeneration() >= GFX12; }
13141314

1315+
// \returns true if the target has IEEE fminimum3/fmaximum3 instructions
1316+
bool hasIEEEMinMax3() const { return hasIEEEMinMax(); }
1317+
13151318
// \returns true if the target has WG_RR_MODE kernel descriptor mode bit
13161319
bool hasRrWGMode() const { return getGeneration() >= GFX12; }
13171320

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13199,6 +13199,33 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
1319913199
return SDValue();
1320013200
}
1320113201

13202+
/// \return true if the subtarget supports minimum3 and maximum3 with the given
13203+
/// base min/max opcode \p Opc for type \p VT.
13204+
static bool supportsMin3Max3(const GCNSubtarget &Subtarget, unsigned Opc,
13205+
EVT VT) {
13206+
switch (Opc) {
13207+
case ISD::FMINNUM:
13208+
case ISD::FMAXNUM:
13209+
case ISD::FMINNUM_IEEE:
13210+
case ISD::FMAXNUM_IEEE:
13211+
case AMDGPUISD::FMIN_LEGACY:
13212+
case AMDGPUISD::FMAX_LEGACY:
13213+
return (VT == MVT::f32) || (VT == MVT::f16 && Subtarget.hasMin3Max3_16());
13214+
case ISD::FMINIMUM:
13215+
case ISD::FMAXIMUM:
13216+
return (VT == MVT::f32 || VT == MVT::f16) && Subtarget.hasIEEEMinMax3();
13217+
case ISD::SMAX:
13218+
case ISD::SMIN:
13219+
case ISD::UMAX:
13220+
case ISD::UMIN:
13221+
return (VT == MVT::i32) || (VT == MVT::i16 && Subtarget.hasMin3Max3_16());
13222+
default:
13223+
return false;
13224+
}
13225+
13226+
llvm_unreachable("not a min/max opcode");
13227+
}
13228+
1320213229
SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
1320313230
DAGCombinerInfo &DCI) const {
1320413231
SelectionDAG &DAG = DCI.DAG;
@@ -13211,10 +13238,7 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
1321113238
// Only do this if the inner op has one use since this will just increases
1321213239
// register pressure for no benefit.
1321313240

13214-
if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
13215-
!VT.isVector() &&
13216-
(VT == MVT::i32 || VT == MVT::f32 ||
13217-
((VT == MVT::f16 || VT == MVT::i16) && Subtarget->hasMin3Max3_16()))) {
13241+
if (supportsMin3Max3(*Subtarget, Opc, VT)) {
1321813242
// max(max(a, b), c) -> max3(a, b, c)
1321913243
// min(min(a, b), c) -> min3(a, b, c)
1322013244
if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {

0 commit comments

Comments
 (0)