Skip to content

Commit e57b327

Browse files
authored
AMDGPU: Legalize fminimum and fmaximum f32 for gfx950 (#117634)
Select to minimum3/maximum3. Leave f16/v2f16 for later since it's complicated by only having the vector version.
1 parent 44ef12b commit e57b327

File tree

6 files changed

+1977
-1622
lines changed

6 files changed

+1977
-1622
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
855855
setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM},
856856
{MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
857857
Custom);
858+
} else {
859+
// FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum
860+
if (Subtarget->hasMinimum3Maximum3F32())
861+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
858862
}
859863

860864
setOperationAction(ISD::INTRINSIC_WO_CHAIN,

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,23 @@ def : IntClampPat<V_MQSAD_PK_U16_U8_e64, int_amdgcn_mqsad_pk_u16_u8>;
12341234
def : IntClampPat<V_QSAD_PK_U16_U8_e64, int_amdgcn_qsad_pk_u16_u8>;
12351235
def : IntClampPat<V_MQSAD_U32_U8_e64, int_amdgcn_mqsad_u32_u8>;
12361236

1237+
//===----------------------------------------------------------------------===//
1238+
// Floating-point operation Patterns
1239+
//===----------------------------------------------------------------------===//
1240+
1241+
// Implement fminimum(x, y) by using minimum3(x, y, y)
1242+
class MinimumMaximumByMinimum3Maximum3<SDPatternOperator node, ValueType vt,
1243+
Instruction inst> : GCNPat<
1244+
(vt (node (VOP3Mods vt:$src0, i32:$src0_mods), (VOP3Mods vt:$src1, i32:$src1_mods))),
1245+
(inst $src0_mods, $src0, $src1_mods, $src1, $src1_mods, $src1)
1246+
>;
1247+
1248+
// Prefer the real 2 operand form if legal
1249+
let SubtargetPredicate = HasMinimum3Maximum3F32, AddedComplexity = -1000 in {
1250+
def : MinimumMaximumByMinimum3Maximum3<fminimum, f32, V_MINIMUM3_F32_e64>;
1251+
def : MinimumMaximumByMinimum3Maximum3<fmaximum, f32, V_MAXIMUM3_F32_e64>;
1252+
}
1253+
12371254
//===----------------------------------------------------------------------===//
12381255
// Target-specific instruction encodings.
12391256
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)