Skip to content

Commit 26b0bef

Browse files
authored
AMDGPU: Use pattern to select instruction for intrinsic llvm.fptrunc.round (#105761)
Use GCNPat instead of Custom Lowering to select instructions for intrinsic llvm.fptrunc.round. "SupportedRoundMode : TImmLeaf" is used as a predicate to select only when the rounding mode is supported. "as_hw_round_mode : SDNodeXForm" is developed to translate the round modes to the corresponding ones that hardware recognizes.
1 parent 22ba351 commit 26b0bef

16 files changed

+128
-161
lines changed

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ def : GINodeEquiv<G_FFLOOR, ffloor>;
161161
def : GINodeEquiv<G_FRINT, frint>;
162162
def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
163163
def : GINodeEquiv<G_INTRINSIC_TRUNC, ftrunc>;
164+
def : GINodeEquiv<G_INTRINSIC_FPTRUNC_ROUND, fptrunc_round>;
164165
def : GINodeEquiv<G_INTRINSIC_ROUND, fround>;
165166
def : GINodeEquiv<G_INTRINSIC_ROUNDEVEN, froundeven>;
166167
def : GINodeEquiv<G_INTRINSIC_LRINT, lrint>;

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ def SDTFPUnaryOp : SDTypeProfile<1, 1, [ // fneg, fsqrt, etc
158158
def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fpround
159159
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
160160
]>;
161+
def SDTFPTruncRoundOp : SDTypeProfile<1, 2, [
162+
SDTCisFP<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
163+
]>;
161164
def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fpextend
162165
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>
163166
]>;
@@ -552,6 +555,8 @@ def llround : SDNode<"ISD::LLROUND" , SDTFPToIntOp>;
552555
def lrint : SDNode<"ISD::LRINT" , SDTFPToIntOp>;
553556
def llrint : SDNode<"ISD::LLRINT" , SDTFPToIntOp>;
554557

558+
def fptrunc_round : SDNode<"ISD::FPTRUNC_ROUND", SDTFPTruncRoundOp>;
559+
555560
def fpround : SDNode<"ISD::FP_ROUND" , SDTFPRoundOp>;
556561
def fpextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>;
557562
def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>;

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,6 @@ def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_UBYTE, SIsbuffer_load_ubyte>;
297297
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_SSHORT, SIsbuffer_load_short>;
298298
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_USHORT, SIsbuffer_load_ushort>;
299299

300-
def : GINodeEquiv<G_FPTRUNC_ROUND, SIfptrunc_round>;
301-
302300
class GISelSop2Pat <
303301
SDPatternOperator node,
304302
Instruction inst,
@@ -419,3 +417,6 @@ def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameInde
419417

420418
def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
421419
GISDNodeXFormEquiv<FPPow2ToExponentXForm>;
420+
421+
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
422+
GISDNodeXFormEquiv<as_hw_round_mode>;

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5511,7 +5511,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
55115511
NODE_NAME_CASE(CONST_DATA_PTR)
55125512
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
55135513
NODE_NAME_CASE(LDS)
5514-
NODE_NAME_CASE(FPTRUNC_ROUND)
55155514
NODE_NAME_CASE(DUMMY_CHAIN)
55165515
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
55175516
NODE_NAME_CASE(LOAD_D16_HI)

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,6 @@ enum NodeType : unsigned {
553553
CONST_DATA_PTR,
554554
PC_ADD_REL_OFFSET,
555555
LDS,
556-
FPTRUNC_ROUND,
557556

558557
DUMMY_CHAIN,
559558
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5594,6 +5594,16 @@ void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,
55945594
MIB.addImm(ExpVal);
55955595
}
55965596

5597+
void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
5598+
const MachineInstr &MI,
5599+
int OpIdx) const {
5600+
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
5601+
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
5602+
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
5603+
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
5604+
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
5605+
}
5606+
55975607
bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const {
55985608
return TII.isInlineConstant(Imm);
55995609
}

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
359359
void renderFPPow2ToExponent(MachineInstrBuilder &MIB, const MachineInstr &MI,
360360
int OpIdx) const;
361361

362+
void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
363+
int OpIdx) const;
364+
362365
bool isInlineImmediate(const APInt &Imm) const;
363366
bool isInlineImmediate(const APFloat &Imm) const;
364367

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,7 +1137,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
11371137
.lower();
11381138

11391139
getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND)
1140-
.customFor({S16, S32})
1140+
.legalFor({S16, S32})
11411141
.scalarize(0)
11421142
.lower();
11431143

@@ -2179,8 +2179,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21792179
return legalizeCTLZ_CTTZ(MI, MRI, B);
21802180
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
21812181
return legalizeCTLZ_ZERO_UNDEF(MI, MRI, B);
2182-
case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
2183-
return legalizeFPTruncRound(MI, B);
21842182
case TargetOpcode::G_STACKSAVE:
21852183
return legalizeStackSave(MI, B);
21862184
case TargetOpcode::G_GET_FPENV:
@@ -7093,35 +7091,6 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
70937091
return true;
70947092
}
70957093

7096-
bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI,
7097-
MachineIRBuilder &B) const {
7098-
MachineRegisterInfo &MRI = *B.getMRI();
7099-
Register Src = MI.getOperand(1).getReg();
7100-
if (MRI.getType(Src) != LLT::scalar(32))
7101-
return false;
7102-
7103-
// Only support towardzero, tonearest, upward and downward.
7104-
int RoundMode = MI.getOperand(2).getImm();
7105-
if (RoundMode != (int)RoundingMode::TowardZero &&
7106-
RoundMode != (int)RoundingMode::NearestTiesToEven &&
7107-
RoundMode != (int)RoundingMode::TowardPositive &&
7108-
RoundMode != (int)RoundingMode::TowardNegative)
7109-
return false;
7110-
7111-
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
7112-
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
7113-
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
7114-
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
7115-
unsigned HW_Mode = (RoundMode + 3) % 4;
7116-
B.buildInstr(AMDGPU::G_FPTRUNC_ROUND)
7117-
.addDef(MI.getOperand(0).getReg())
7118-
.addUse(Src)
7119-
.addImm(HW_Mode);
7120-
7121-
MI.eraseFromParent();
7122-
return true;
7123-
}
7124-
71257094
bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
71267095
MachineIRBuilder &B) const {
71277096
const SITargetLowering *TLI = ST.getTargetLowering();

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,6 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
212212

213213
bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
214214

215-
bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
216215
bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
217216
bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const;
218217

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5255,7 +5255,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
52555255
OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
52565256
break;
52575257
}
5258-
case AMDGPU::G_FPTRUNC_ROUND:
5258+
case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
52595259
return getDefaultMappingVOP(MI);
52605260
case AMDGPU::G_PREFETCH:
52615261
OpdsMapping[0] = getSGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
598598

599599
// F16 - VOP1 Actions.
600600
setOperationAction({ISD::FP_ROUND, ISD::STRICT_FP_ROUND, ISD::FCOS,
601-
ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND},
601+
ISD::FSIN, ISD::FROUND},
602602
MVT::f16, Custom);
603603

604604
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::f16, Promote);
@@ -5797,8 +5797,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
57975797
case ISD::FP_ROUND:
57985798
case ISD::STRICT_FP_ROUND:
57995799
return lowerFP_ROUND(Op, DAG);
5800-
case ISD::FPTRUNC_ROUND:
5801-
return lowerFPTRUNC_ROUND(Op, DAG);
58025800
case ISD::TRAP:
58035801
return lowerTRAP(Op, DAG);
58045802
case ISD::DEBUGTRAP:
@@ -6648,30 +6646,6 @@ SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
66486646
DAG.getTargetConstant(0, DL, MVT::i32));
66496647
}
66506648

6651-
SDValue SITargetLowering::lowerFPTRUNC_ROUND(SDValue Op,
6652-
SelectionDAG &DAG) const {
6653-
if (Op.getOperand(0)->getValueType(0) != MVT::f32)
6654-
return SDValue();
6655-
6656-
// Only support towardzero, tonearest, upward and downward.
6657-
int RoundMode = Op.getConstantOperandVal(1);
6658-
if (RoundMode != (int)RoundingMode::TowardZero &&
6659-
RoundMode != (int)RoundingMode::NearestTiesToEven &&
6660-
RoundMode != (int)RoundingMode::TowardPositive &&
6661-
RoundMode != (int)RoundingMode::TowardNegative)
6662-
return SDValue();
6663-
6664-
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
6665-
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
6666-
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
6667-
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
6668-
unsigned HW_Mode = (RoundMode + 3) % 4;
6669-
SDLoc DL(Op);
6670-
SDValue RoundFlag = DAG.getTargetConstant(HW_Mode, DL, MVT::i32);
6671-
return DAG.getNode(AMDGPUISD::FPTRUNC_ROUND, DL, Op.getNode()->getVTList(),
6672-
Op->getOperand(0), RoundFlag);
6673-
}
6674-
66756649
SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
66766650
assert(Op.getValueType() == MVT::f16 &&
66776651
"Do not know how to custom lower FP_ROUND for non-f16 type");

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
145145

146146
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
147147
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
148-
SDValue lowerFPTRUNC_ROUND(SDValue Op, SelectionDAG &DAG) const;
149148
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
150149
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
151150
SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -304,12 +304,6 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
304304
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
305305
>;
306306

307-
def SDTFPRoundModeOp : SDTypeProfile<1, 2, [
308-
SDTCisFP<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
309-
]>;
310-
311-
def SIfptrunc_round : SDNode<"AMDGPUISD::FPTRUNC_ROUND", SDTFPRoundModeOp>;
312-
313307
//===----------------------------------------------------------------------===//
314308
// ValueType helpers
315309
//===----------------------------------------------------------------------===//
@@ -796,6 +790,22 @@ return CurDAG->getTargetConstant(
796790
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
797791
}]>;
798792

793+
def as_hw_round_mode : SDNodeXForm<timm, [{
794+
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
795+
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
796+
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
797+
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
798+
return CurDAG->getTargetConstant((N->getSExtValue() + 3) % 4, SDLoc(N),
799+
MVT::i32);
800+
}]>;
801+
802+
def SupportedRoundMode : TImmLeaf<i32, [{
803+
return Imm == (int)RoundingMode::TowardZero ||
804+
Imm == (int)RoundingMode::NearestTiesToEven ||
805+
Imm == (int)RoundingMode::TowardPositive ||
806+
Imm == (int)RoundingMode::TowardNegative;
807+
}]>;
808+
799809
class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
800810
uint64_t Imm = N->getZExtValue();
801811
unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,12 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
229229
// in the ModeRegister pass.
230230
let Uses = [MODE, EXEC] in {
231231
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
232-
(ins VGPR_32:$src0, i32imm:$round),
233-
[(set f16:$vdst, (SIfptrunc_round f32:$src0, i32:$round))]>;
232+
(ins VGPR_32:$src0, i32imm:$round)>;
234233
} // End Uses = [MODE, EXEC]
235234

235+
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
236+
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
237+
236238
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
237239
// restoring it after we're done.
238240
let Defs = [SCC], isConvergent = 1 in {
@@ -4055,11 +4057,6 @@ def G_SI_CALL : AMDGPUGenericInstruction {
40554057
let isConvergent = 1;
40564058
}
40574059

4058-
def G_FPTRUNC_ROUND : AMDGPUGenericInstruction {
4059-
let OutOperandList = (outs type0:$vdst);
4060-
let InOperandList = (ins type1:$src0, untyped_imm_0:$round);
4061-
let hasSideEffects = 0;
4062-
}
40634060

40644061
//============================================================================//
40654062
// Dummy Instructions

llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefixes=SDAG-FAIL
2-
; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=GISEL-FAIL
1+
; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL
2+
; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL
33

44
define amdgpu_gs void @test_fptrunc_round_f64(double %a, ptr addrspace(1) %out) {
5-
; SDAG-FAIL: LLVM ERROR: Cannot select
6-
; GISEL-FAIL: unable to legalize instruction
5+
; FAIL: LLVM ERROR: Cannot select
76
%res = call half @llvm.fptrunc.round.f16.f64(double %a, metadata !"round.upward")
87
store half %res, ptr addrspace(1) %out, align 4
98
ret void

0 commit comments

Comments
 (0)