Skip to content

Commit c35b358

Browse files
committed
AMDGPU/GlobalISel: Legalize FDIV16
Reviewers: arsenm Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, volkan, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69347
1 parent 118ceea commit c35b358

File tree

5 files changed

+388
-136
lines changed

5 files changed

+388
-136
lines changed

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,13 @@ class MachineIRBuilder {
517517
/// \return The newly created instruction.
518518
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op);
519519

520+
/// Build and insert \p Res = G_FPEXT \p Op
521+
MachineInstrBuilder buildFPExt(const DstOp &Res, const SrcOp &Op,
522+
Optional<unsigned> Flags = None) {
523+
return buildInstr(TargetOpcode::G_FPEXT, {Res}, {Op}, Flags);
524+
}
525+
526+
520527
/// Build and insert a G_PTRTOINT instruction.
521528
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src) {
522529
return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src});
@@ -867,7 +874,8 @@ class MachineIRBuilder {
867874
/// \pre \p Res must be smaller than \p Op
868875
///
869876
/// \return The newly created instruction.
870-
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op);
877+
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op,
878+
Optional<unsigned> FLags = None);
871879

872880
/// Build and insert \p Res = G_TRUNC \p Op
873881
///

llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -698,8 +698,9 @@ MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
698698
}
699699

700700
MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res,
701-
const SrcOp &Op) {
702-
return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op);
701+
const SrcOp &Op,
702+
Optional<unsigned> Flags) {
703+
return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags);
703704
}
704705

705706
MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,10 +1823,16 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
18231823
MachineRegisterInfo &MRI,
18241824
MachineIRBuilder &B) const {
18251825
B.setInstr(MI);
1826+
Register Dst = MI.getOperand(0).getReg();
1827+
LLT DstTy = MRI.getType(Dst);
1828+
LLT S16 = LLT::scalar(16);
18261829

18271830
if (legalizeFastUnsafeFDIV(MI, MRI, B))
18281831
return true;
18291832

1833+
if (DstTy == S16)
1834+
return legalizeFDIV16(MI, MRI, B);
1835+
18301836
return false;
18311837
}
18321838

@@ -1890,6 +1896,39 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
18901896
return false;
18911897
}
18921898

1899+
bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
1900+
MachineRegisterInfo &MRI,
1901+
MachineIRBuilder &B) const {
1902+
B.setInstr(MI);
1903+
Register Res = MI.getOperand(0).getReg();
1904+
Register LHS = MI.getOperand(1).getReg();
1905+
Register RHS = MI.getOperand(2).getReg();
1906+
1907+
uint16_t Flags = MI.getFlags();
1908+
1909+
LLT S16 = LLT::scalar(16);
1910+
LLT S32 = LLT::scalar(32);
1911+
1912+
auto LHSExt = B.buildFPExt(S32, LHS, Flags);
1913+
auto RHSExt = B.buildFPExt(S32, RHS, Flags);
1914+
1915+
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
1916+
.addUse(RHSExt.getReg(0))
1917+
.setMIFlags(Flags);
1918+
1919+
auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags);
1920+
auto RDst = B.buildFPTrunc(S16, QUOT, Flags);
1921+
1922+
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
1923+
.addUse(RDst.getReg(0))
1924+
.addUse(RHS)
1925+
.addUse(LHS)
1926+
.setMIFlags(Flags);
1927+
1928+
MI.eraseFromParent();
1929+
return true;
1930+
}
1931+
18931932
bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
18941933
MachineRegisterInfo &MRI,
18951934
MachineIRBuilder &B) const {

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
8383

8484
bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
8585
MachineIRBuilder &B) const;
86+
bool legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI,
87+
MachineIRBuilder &B) const;
8688
bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
8789
MachineIRBuilder &B) const;
8890
bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI,

0 commit comments

Comments
 (0)