Skip to content

Commit 238794c

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Legalize fminimum and fmaximum f32 for gfx950 (llvm#117634)
Select to minimum3/maximum3. Leave f16/v2f16 for later since it's complicated by only having the vector version.
1 parent 1dffe4c commit 238794c

File tree

6 files changed

+1977
-1622
lines changed

6 files changed

+1977
-1622
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
855855
setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM},
856856
{MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
857857
Custom);
858+
} else {
859+
// FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum
860+
if (Subtarget->hasMinimum3Maximum3F32())
861+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
858862
}
859863

860864
setOperationAction(ISD::INTRINSIC_WO_CHAIN,

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,6 +1227,23 @@ def : IntClampPat<V_MQSAD_PK_U16_U8_e64, int_amdgcn_mqsad_pk_u16_u8>;
12271227
def : IntClampPat<V_QSAD_PK_U16_U8_e64, int_amdgcn_qsad_pk_u16_u8>;
12281228
def : IntClampPat<V_MQSAD_U32_U8_e64, int_amdgcn_mqsad_u32_u8>;
12291229

1230+
//===----------------------------------------------------------------------===//
1231+
// Floating-point operation Patterns
1232+
//===----------------------------------------------------------------------===//
1233+
1234+
// Implement fminimum(x, y) by using minimum3(x, y, y)
1235+
class MinimumMaximumByMinimum3Maximum3<SDPatternOperator node, ValueType vt,
1236+
Instruction inst> : GCNPat<
1237+
(vt (node (VOP3Mods vt:$src0, i32:$src0_mods), (VOP3Mods vt:$src1, i32:$src1_mods))),
1238+
(inst $src0_mods, $src0, $src1_mods, $src1, $src1_mods, $src1)
1239+
>;
1240+
1241+
// Prefer the real 2 operand form if legal
1242+
let SubtargetPredicate = HasMinimum3Maximum3F32, AddedComplexity = -1000 in {
1243+
def : MinimumMaximumByMinimum3Maximum3<fminimum, f32, V_MINIMUM3_F32_e64>;
1244+
def : MinimumMaximumByMinimum3Maximum3<fmaximum, f32, V_MAXIMUM3_F32_e64>;
1245+
}
1246+
12301247
//===----------------------------------------------------------------------===//
12311248
// Target-specific instruction encodings.
12321249
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)