Skip to content

Commit e9cb440

Browse files
authored
[X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (#100079)
Also add tests for G_SITOFP and G_FPTOSI
1 parent 3477eb7 commit e9cb440

File tree

7 files changed

+979
-10
lines changed

7 files changed

+979
-10
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,8 @@ class LegalizerHelper {
394394
LegalizeResult lowerRotate(MachineInstr &MI);
395395

396396
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
397+
LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI);
398+
LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI);
397399
LegalizeResult lowerUITOFP(MachineInstr &MI);
398400
LegalizeResult lowerSITOFP(MachineInstr &MI);
399401
LegalizeResult lowerFPTOUI(MachineInstr &MI);

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7169,6 +7169,78 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
71697169
return Legalized;
71707170
}
71717171

7172+
// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
7173+
// operations and G_SITOFP
7174+
LegalizerHelper::LegalizeResult
7175+
LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) {
7176+
auto [Dst, Src] = MI.getFirst2Regs();
7177+
const LLT S64 = LLT::scalar(64);
7178+
const LLT S32 = LLT::scalar(32);
7179+
const LLT S1 = LLT::scalar(1);
7180+
7181+
assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
7182+
7183+
// For i64 < INT_MAX we simply reuse SITOFP.
7184+
// Otherwise, divide i64 by 2, round result by ORing with the lowest bit
7185+
// saved before division, convert to float by SITOFP, multiply the result
7186+
// by 2.
7187+
auto One = MIRBuilder.buildConstant(S64, 1);
7188+
auto Zero = MIRBuilder.buildConstant(S64, 0);
7189+
// Result if Src < INT_MAX
7190+
auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
7191+
// Result if Src >= INT_MAX
7192+
auto Halved = MIRBuilder.buildLShr(S64, Src, One);
7193+
auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
7194+
auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
7195+
auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
7196+
auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
7197+
// Check if the original value is larger than INT_MAX by comparing with
7198+
// zero to pick one of the two conversions.
7199+
auto IsLarge =
7200+
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
7201+
MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
7202+
7203+
MI.eraseFromParent();
7204+
return Legalized;
7205+
}
7206+
7207+
// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
7208+
// IEEE double representation.
7209+
LegalizerHelper::LegalizeResult
7210+
LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) {
7211+
auto [Dst, Src] = MI.getFirst2Regs();
7212+
const LLT S64 = LLT::scalar(64);
7213+
const LLT S32 = LLT::scalar(32);
7214+
7215+
assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
7216+
7217+
// We create double value from 32 bit parts with 32 exponent difference.
7218+
// Note that + and - are float operations that adjust the implicit leading
7219+
// one, the bases 2^52 and 2^84 are for illustrative purposes.
7220+
//
7221+
// X = 2^52 * 1.0...LowBits
7222+
// Y = 2^84 * 1.0...HighBits
7223+
// Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
7224+
// = - 2^52 * 1.0...HighBits
7225+
// Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
7226+
auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
7227+
auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
7228+
auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
7229+
auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
7230+
auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
7231+
7232+
auto LowBits = MIRBuilder.buildTrunc(S32, Src);
7233+
LowBits = MIRBuilder.buildZExt(S64, LowBits);
7234+
auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
7235+
auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
7236+
auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
7237+
auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
7238+
MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
7239+
7240+
MI.eraseFromParent();
7241+
return Legalized;
7242+
}
7243+
71727244
LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
71737245
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
71747246

@@ -7183,13 +7255,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
71837255
if (SrcTy != LLT::scalar(64))
71847256
return UnableToLegalize;
71857257

7186-
if (DstTy == LLT::scalar(32)) {
7258+
if (DstTy == LLT::scalar(32))
71877259
// TODO: SelectionDAG has several alternative expansions to port which may
7188-
// be more reasonble depending on the available instructions. If a target
7189-
// has sitofp, does not have CTLZ, or can efficiently use f64 as an
7190-
// intermediate type, this is probably worse.
7191-
return lowerU64ToF32BitOps(MI);
7192-
}
7260+
// be more reasonable depending on the available instructions. We also need
7261+
// a more advanced mechanism to choose an optimal version depending on
7262+
// target features such as sitofp or CTLZ availability.
7263+
return lowerU64ToF32WithSITOFP(MI);
7264+
7265+
if (DstTy == LLT::scalar(64))
7266+
return lowerU64ToF64BitFloatOps(MI);
71937267

71947268
return UnableToLegalize;
71957269
}

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,62 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
498498
.clampScalar(0, s32, sMaxScalar)
499499
.widenScalarToNextPow2(1);
500500

501+
// For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types
502+
// <= s32 manually. Otherwise, in custom handler there is no way to
503+
// understand whether s32 is an original type and we need to promote it to
504+
// s64 or s32 is obtained after widening and we shouldn't widen it to s64.
505+
//
506+
// For AVX512 we simply widen types as there is direct mapping from opcodes
507+
// to asm instructions.
508+
getActionDefinitionsBuilder(G_UITOFP)
509+
.legalIf([=](const LegalityQuery &Query) {
510+
return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
511+
typeInSet(1, {s32, s64})(Query);
512+
})
513+
.customIf([=](const LegalityQuery &Query) {
514+
return !HasAVX512 &&
515+
((HasSSE1 && typeIs(0, s32)(Query)) ||
516+
(HasSSE2 && typeIs(0, s64)(Query))) &&
517+
scalarNarrowerThan(1, Is64Bit ? 64 : 32)(Query);
518+
})
519+
.lowerIf([=](const LegalityQuery &Query) {
520+
// Lower conversions from s64
521+
return !HasAVX512 &&
522+
((HasSSE1 && typeIs(0, s32)(Query)) ||
523+
(HasSSE2 && typeIs(0, s64)(Query))) &&
524+
(Is64Bit && typeIs(1, s64)(Query));
525+
})
526+
.clampScalar(0, s32, HasSSE2 ? s64 : s32)
527+
.widenScalarToNextPow2(0)
528+
.clampScalar(1, s32, sMaxScalar)
529+
.widenScalarToNextPow2(1);
530+
531+
getActionDefinitionsBuilder(G_FPTOUI)
532+
.legalIf([=](const LegalityQuery &Query) {
533+
return HasAVX512 && typeInSet(0, {s32, s64})(Query) &&
534+
typeInSet(1, {s32, s64})(Query);
535+
})
536+
.customIf([=](const LegalityQuery &Query) {
537+
return !HasAVX512 &&
538+
((HasSSE1 && typeIs(1, s32)(Query)) ||
539+
(HasSSE2 && typeIs(1, s64)(Query))) &&
540+
scalarNarrowerThan(0, Is64Bit ? 64 : 32)(Query);
541+
})
542+
// TODO: replace with customized legalization using
543+
// specifics of cvttsd2si. The selection of this node requires
544+
// a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced
545+
// support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand.
546+
.lowerIf([=](const LegalityQuery &Query) {
547+
return !HasAVX512 &&
548+
((HasSSE1 && typeIs(1, s32)(Query)) ||
549+
(HasSSE2 && typeIs(1, s64)(Query))) &&
550+
(Is64Bit && typeIs(0, s64)(Query));
551+
})
552+
.clampScalar(0, s32, sMaxScalar)
553+
.widenScalarToNextPow2(0)
554+
.clampScalar(1, s32, HasSSE2 ? s64 : s32)
555+
.widenScalarToNextPow2(1);
556+
501557
// vector ops
502558
getActionDefinitionsBuilder(G_BUILD_VECTOR)
503559
.customIf([=](const LegalityQuery &Query) {
@@ -590,6 +646,10 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
590646
return false;
591647
case TargetOpcode::G_BUILD_VECTOR:
592648
return legalizeBuildVector(MI, MRI, Helper);
649+
case TargetOpcode::G_FPTOUI:
650+
return legalizeFPTOUI(MI, MRI, Helper);
651+
case TargetOpcode::G_UITOFP:
652+
return legalizeUITOFP(MI, MRI, Helper);
593653
}
594654
llvm_unreachable("expected switch to return");
595655
}
@@ -645,6 +705,45 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
645705
return true;
646706
}
647707

708+
bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
709+
MachineRegisterInfo &MRI,
710+
LegalizerHelper &Helper) const {
711+
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
712+
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
713+
unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
714+
const LLT s32 = LLT::scalar(32);
715+
const LLT s64 = LLT::scalar(64);
716+
717+
// Simply reuse FPTOSI when it is possible to widen the type
718+
if (DstSizeInBits <= 32) {
719+
auto Casted = MIRBuilder.buildFPTOSI(DstTy == s32 ? s64 : s32, Src);
720+
MIRBuilder.buildTrunc(Dst, Casted);
721+
MI.eraseFromParent();
722+
return true;
723+
}
724+
725+
return false;
726+
}
727+
728+
bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
729+
MachineRegisterInfo &MRI,
730+
LegalizerHelper &Helper) const {
731+
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
732+
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
733+
const LLT s32 = LLT::scalar(32);
734+
const LLT s64 = LLT::scalar(64);
735+
736+
// Simply reuse SITOFP when it is possible to widen the type
737+
if (SrcTy.getSizeInBits() <= 32) {
738+
auto Ext = MIRBuilder.buildZExt(SrcTy == s32 ? s64 : s32, Src);
739+
MIRBuilder.buildSITOFP(Dst, Ext);
740+
MI.eraseFromParent();
741+
return true;
742+
}
743+
744+
return false;
745+
}
746+
648747
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
649748
MachineInstr &MI) const {
650749
return true;

llvm/lib/Target/X86/GISel/X86LegalizerInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ class X86LegalizerInfo : public LegalizerInfo {
3939
private:
4040
bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
4141
LegalizerHelper &Helper) const;
42+
43+
bool legalizeFPTOUI(MachineInstr &MI, MachineRegisterInfo &MRI,
44+
LegalizerHelper &Helper) const;
45+
46+
bool legalizeUITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
47+
LegalizerHelper &Helper) const;
4248
};
4349
} // namespace llvm
4450
#endif

llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,18 +296,20 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
296296
getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx);
297297
break;
298298
case TargetOpcode::G_SITOFP:
299-
case TargetOpcode::G_FPTOSI: {
299+
case TargetOpcode::G_FPTOSI:
300+
case TargetOpcode::G_UITOFP:
301+
case TargetOpcode::G_FPTOUI: {
300302
// Some of the floating-point instructions have mixed GPR and FP
301303
// operands: fine-tune the computed mapping.
302304
auto &Op0 = MI.getOperand(0);
303305
auto &Op1 = MI.getOperand(1);
304306
const LLT Ty0 = MRI.getType(Op0.getReg());
305307
const LLT Ty1 = MRI.getType(Op1.getReg());
306308

307-
bool FirstArgIsFP = Opc == TargetOpcode::G_SITOFP;
308-
bool SecondArgIsFP = Opc == TargetOpcode::G_FPTOSI;
309+
bool FirstArgIsFP =
310+
Opc == TargetOpcode::G_SITOFP || Opc == TargetOpcode::G_UITOFP;
309311
OpRegBankIdx[0] = getPartialMappingIdx(MI, Ty0, /* isFP= */ FirstArgIsFP);
310-
OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ SecondArgIsFP);
312+
OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ !FirstArgIsFP);
311313
break;
312314
}
313315
case TargetOpcode::G_FCMP: {

0 commit comments

Comments
 (0)