Skip to content

Commit e9eec14

Browse files
authored
[LoongArch] [CodeGen] Add options for Clang to generate LoongArch-specific frecipe & frsqrte instructions (#109917)
Two options: `-mfrecipe` & `-mno-frecipe`. Enable or Disable frecipe.{s/d} and frsqrte.{s/d} instructions. The default is `-mno-frecipe`.
1 parent 7dbfa7b commit e9eec14

15 files changed

+1411
-0
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5387,6 +5387,10 @@ def mno_lasx : Flag<["-"], "mno-lasx">, Group<m_loongarch_Features_Group>,
53875387
let Flags = [TargetSpecific] in {
53885388
def msimd_EQ : Joined<["-"], "msimd=">, Group<m_loongarch_Features_Group>,
53895389
HelpText<"Select the SIMD extension(s) to be enabled in LoongArch either 'none', 'lsx', 'lasx'.">;
5390+
def mfrecipe : Flag<["-"], "mfrecipe">, Group<m_loongarch_Features_Group>,
5391+
HelpText<"Enable frecipe.{s/d} and frsqrte.{s/d}">;
5392+
def mno_frecipe : Flag<["-"], "mno-frecipe">, Group<m_loongarch_Features_Group>,
5393+
HelpText<"Disable frecipe.{s/d} and frsqrte.{s/d}">;
53905394
def mannotate_tablejump : Flag<["-"], "mannotate-tablejump">, Group<m_loongarch_Features_Group>,
53915395
HelpText<"Enable annotate table jump instruction to correlate it with the jump table.">;
53925396
def mno_annotate_tablejump : Flag<["-"], "mno-annotate-tablejump">, Group<m_loongarch_Features_Group>,

clang/lib/Driver/ToolChains/Arch/LoongArch.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,15 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
251251
} else /*-mno-lasx*/
252252
Features.push_back("-lasx");
253253
}
254+
255+
// Select frecipe feature determined by -m[no-]frecipe.
256+
if (const Arg *A =
257+
Args.getLastArg(options::OPT_mfrecipe, options::OPT_mno_frecipe)) {
258+
if (A->getOption().matches(options::OPT_mfrecipe))
259+
Features.push_back("+frecipe");
260+
else
261+
Features.push_back("-frecipe");
262+
}
254263
}
255264

256265
std::string loongarch::postProcessTargetCPUString(const std::string &CPU,
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/// Test -m[no]frecipe options.
2+
3+
// RUN: %clang --target=loongarch64 -mfrecipe -fsyntax-only %s -### 2>&1 | \
4+
// RUN: FileCheck %s --check-prefix=CC1-FRECIPE
5+
// RUN: %clang --target=loongarch64 -mno-frecipe -fsyntax-only %s -### 2>&1 | \
6+
// RUN: FileCheck %s --check-prefix=CC1-NO-FRECIPE
7+
// RUN: %clang --target=loongarch64 -mno-frecipe -mfrecipe -fsyntax-only %s -### 2>&1 | \
8+
// RUN: FileCheck %s --check-prefix=CC1-FRECIPE
9+
// RUN: %clang --target=loongarch64 -mfrecipe -mno-frecipe -fsyntax-only %s -### 2>&1 | \
10+
// RUN: FileCheck %s --check-prefix=CC1-NO-FRECIPE
11+
12+
// RUN: %clang --target=loongarch64 -mfrecipe -S -emit-llvm %s -o - | \
13+
// RUN: FileCheck %s --check-prefix=IR-FRECIPE
14+
// RUN: %clang --target=loongarch64 -mno-frecipe -S -emit-llvm %s -o - | \
15+
// RUN: FileCheck %s --check-prefix=IR-NO-FRECIPE
16+
// RUN: %clang --target=loongarch64 -mno-frecipe -mfrecipe -S -emit-llvm %s -o - | \
17+
// RUN: FileCheck %s --check-prefix=IR-FRECIPE
18+
// RUN: %clang --target=loongarch64 -mfrecipe -mno-frecipe -S -emit-llvm %s -o - | \
19+
// RUN: FileCheck %s --check-prefix=IR-NO-FRECIPE
20+
21+
22+
// CC1-FRECIPE: "-target-feature" "+frecipe"
23+
// CC1-NO-FRECIPE: "-target-feature" "-frecipe"
24+
25+
// IR-FRECIPE: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+frecipe{{(,.*)?}}"
26+
// IR-NO-FRECIPE: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-frecipe{{(,.*)?}}"
27+
28+
int foo(void) {
29+
return 42;
30+
}

llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,16 @@ def SDT_LoongArchMOVGR2FR_W_LA64
1919
def SDT_LoongArchMOVFR2GR_S_LA64
2020
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
2121
def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
22+
def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
23+
def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
2224

2325
def loongarch_movgr2fr_w_la64
2426
: SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
2527
def loongarch_movfr2gr_s_la64
2628
: SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>;
2729
def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
30+
def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
31+
def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;
2832

2933
//===----------------------------------------------------------------------===//
3034
// Instructions
@@ -286,6 +290,8 @@ let Predicates = [HasFrecipe] in {
286290
// FP approximate reciprocal operation
287291
def : Pat<(int_loongarch_frecipe_s FPR32:$src), (FRECIPE_S FPR32:$src)>;
288292
def : Pat<(int_loongarch_frsqrte_s FPR32:$src), (FRSQRTE_S FPR32:$src)>;
293+
def : Pat<(loongarch_frecipe FPR32:$src), (FRECIPE_S FPR32:$src)>;
294+
def : Pat<(loongarch_frsqrte FPR32:$src), (FRSQRTE_S FPR32:$src)>;
289295
}
290296

291297
// fmadd.s: fj * fk + fa

llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,8 @@ let Predicates = [HasFrecipe] in {
253253
// FP approximate reciprocal operation
254254
def : Pat<(int_loongarch_frecipe_d FPR64:$src), (FRECIPE_D FPR64:$src)>;
255255
def : Pat<(int_loongarch_frsqrte_d FPR64:$src), (FRSQRTE_D FPR64:$src)>;
256+
def : Pat<(loongarch_frecipe FPR64:$src), (FRECIPE_D FPR64:$src)>;
257+
def : Pat<(loongarch_frsqrte FPR64:$src), (FRSQRTE_D FPR64:$src)>;
256258
}
257259

258260
// fmadd.d: fj * fk + fa

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4697,6 +4697,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
46974697
NODE_NAME_CASE(VANY_ZERO)
46984698
NODE_NAME_CASE(VALL_NONZERO)
46994699
NODE_NAME_CASE(VANY_NONZERO)
4700+
NODE_NAME_CASE(FRECIPE)
4701+
NODE_NAME_CASE(FRSQRTE)
47004702
}
47014703
#undef NODE_NAME_CASE
47024704
return nullptr;
@@ -5900,6 +5902,71 @@ Register LoongArchTargetLowering::getExceptionSelectorRegister(
59005902
return LoongArch::R5;
59015903
}
59025904

5905+
//===----------------------------------------------------------------------===//
5906+
// Target Optimization Hooks
5907+
//===----------------------------------------------------------------------===//
5908+
5909+
static int getEstimateRefinementSteps(EVT VT,
5910+
const LoongArchSubtarget &Subtarget) {
5911+
// Feature FRECIPE instrucions relative accuracy is 2^-14.
5912+
// IEEE float has 23 digits and double has 52 digits.
5913+
int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
5914+
return RefinementSteps;
5915+
}
5916+
5917+
SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
5918+
SelectionDAG &DAG, int Enabled,
5919+
int &RefinementSteps,
5920+
bool &UseOneConstNR,
5921+
bool Reciprocal) const {
5922+
if (Subtarget.hasFrecipe()) {
5923+
SDLoc DL(Operand);
5924+
EVT VT = Operand.getValueType();
5925+
5926+
if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
5927+
(VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
5928+
(VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
5929+
(VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
5930+
(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
5931+
5932+
if (RefinementSteps == ReciprocalEstimate::Unspecified)
5933+
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
5934+
5935+
SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
5936+
if (Reciprocal)
5937+
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
5938+
5939+
return Estimate;
5940+
}
5941+
}
5942+
5943+
return SDValue();
5944+
}
5945+
5946+
SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
5947+
SelectionDAG &DAG,
5948+
int Enabled,
5949+
int &RefinementSteps) const {
5950+
if (Subtarget.hasFrecipe()) {
5951+
SDLoc DL(Operand);
5952+
EVT VT = Operand.getValueType();
5953+
5954+
if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
5955+
(VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
5956+
(VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
5957+
(VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
5958+
(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
5959+
5960+
if (RefinementSteps == ReciprocalEstimate::Unspecified)
5961+
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
5962+
5963+
return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
5964+
}
5965+
}
5966+
5967+
return SDValue();
5968+
}
5969+
59035970
//===----------------------------------------------------------------------===//
59045971
// LoongArch Inline Assembly Support
59055972
//===----------------------------------------------------------------------===//

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ enum NodeType : unsigned {
141141
VALL_NONZERO,
142142
VANY_NONZERO,
143143

144+
// Floating point approximate reciprocal operation
145+
FRECIPE,
146+
FRSQRTE
147+
144148
// Intrinsic operations end =============================================
145149
};
146150
} // end namespace LoongArchISD
@@ -216,6 +220,17 @@ class LoongArchTargetLowering : public TargetLowering {
216220
Register
217221
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
218222

223+
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
224+
return true;
225+
}
226+
227+
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
228+
int &RefinementSteps, bool &UseOneConstNR,
229+
bool Reciprocal) const override;
230+
231+
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
232+
int &RefinementSteps) const override;
233+
219234
ISD::NodeType getExtendForAtomicOps() const override {
220235
return ISD::SIGN_EXTEND;
221236
}

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
// Target nodes.
1314
def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;
1415

1516
def lasxsplati8
@@ -2094,6 +2095,15 @@ foreach Inst = ["XVFRECIPE_S", "XVFRSQRTE_S"] in
20942095
foreach Inst = ["XVFRECIPE_D", "XVFRSQRTE_D"] in
20952096
def : Pat<(deriveLASXIntrinsic<Inst>.ret (v4f64 LASX256:$xj)),
20962097
(!cast<LAInst>(Inst) LASX256:$xj)>;
2098+
2099+
def : Pat<(loongarch_vfrecipe v8f32:$src),
2100+
(XVFRECIPE_S v8f32:$src)>;
2101+
def : Pat<(loongarch_vfrecipe v4f64:$src),
2102+
(XVFRECIPE_D v4f64:$src)>;
2103+
def : Pat<(loongarch_vfrsqrte v8f32:$src),
2104+
(XVFRSQRTE_S v8f32:$src)>;
2105+
def : Pat<(loongarch_vfrsqrte v4f64:$src),
2106+
(XVFRSQRTE_D v4f64:$src)>;
20972107
}
20982108

20992109
def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
2323
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
2424
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
2525
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
26+
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
27+
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
2628

2729
// Target nodes.
2830
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
@@ -50,6 +52,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
5052

5153
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
5254
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
55+
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
56+
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
5357

5458
def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
5559
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
@@ -2238,6 +2242,15 @@ foreach Inst = ["VFRECIPE_S", "VFRSQRTE_S"] in
22382242
foreach Inst = ["VFRECIPE_D", "VFRSQRTE_D"] in
22392243
def : Pat<(deriveLSXIntrinsic<Inst>.ret (v2f64 LSX128:$vj)),
22402244
(!cast<LAInst>(Inst) LSX128:$vj)>;
2245+
2246+
def : Pat<(loongarch_vfrecipe v4f32:$src),
2247+
(VFRECIPE_S v4f32:$src)>;
2248+
def : Pat<(loongarch_vfrecipe v2f64:$src),
2249+
(VFRECIPE_D v2f64:$src)>;
2250+
def : Pat<(loongarch_vfrsqrte v4f32:$src),
2251+
(VFRSQRTE_S v4f32:$src)>;
2252+
def : Pat<(loongarch_vfrsqrte v2f64:$src),
2253+
(VFRSQRTE_D v2f64:$src)>;
22412254
}
22422255

22432256
// load
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch32 --mattr=+f,-d,-frecipe < %s | FileCheck %s --check-prefix=LA32F
3+
; RUN: llc --mtriple=loongarch32 --mattr=+f,-d,+frecipe < %s | FileCheck %s --check-prefix=LA32F-FRECIPE
4+
; RUN: llc --mtriple=loongarch64 --mattr=+d,-frecipe < %s | FileCheck %s --check-prefix=LA64D
5+
; RUN: llc --mtriple=loongarch64 --mattr=+d,+frecipe < %s | FileCheck %s --check-prefix=LA64D-FRECIPE
6+
7+
;; Exercise the 'fdiv' LLVM IR: https://llvm.org/docs/LangRef.html#fdiv-instruction
8+
9+
define float @fdiv_s(float %x, float %y) {
10+
; LA32F-LABEL: fdiv_s:
11+
; LA32F: # %bb.0:
12+
; LA32F-NEXT: fdiv.s $fa0, $fa0, $fa1
13+
; LA32F-NEXT: ret
14+
;
15+
; LA32F-FRECIPE-LABEL: fdiv_s:
16+
; LA32F-FRECIPE: # %bb.0:
17+
; LA32F-FRECIPE-NEXT: frecipe.s $fa2, $fa1
18+
; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2
19+
; LA32F-FRECIPE-NEXT: fnmsub.s $fa0, $fa1, $fa3, $fa0
20+
; LA32F-FRECIPE-NEXT: fmadd.s $fa0, $fa2, $fa0, $fa3
21+
; LA32F-FRECIPE-NEXT: ret
22+
;
23+
; LA64D-LABEL: fdiv_s:
24+
; LA64D: # %bb.0:
25+
; LA64D-NEXT: fdiv.s $fa0, $fa0, $fa1
26+
; LA64D-NEXT: ret
27+
;
28+
; LA64D-FRECIPE-LABEL: fdiv_s:
29+
; LA64D-FRECIPE: # %bb.0:
30+
; LA64D-FRECIPE-NEXT: frecipe.s $fa2, $fa1
31+
; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2
32+
; LA64D-FRECIPE-NEXT: fnmsub.s $fa0, $fa1, $fa3, $fa0
33+
; LA64D-FRECIPE-NEXT: fmadd.s $fa0, $fa2, $fa0, $fa3
34+
; LA64D-FRECIPE-NEXT: ret
35+
%div = fdiv fast float %x, %y
36+
ret float %div
37+
}
38+
39+
define double @fdiv_d(double %x, double %y) {
40+
; LA32F-LABEL: fdiv_d:
41+
; LA32F: # %bb.0:
42+
; LA32F-NEXT: addi.w $sp, $sp, -16
43+
; LA32F-NEXT: .cfi_def_cfa_offset 16
44+
; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
45+
; LA32F-NEXT: .cfi_offset 1, -4
46+
; LA32F-NEXT: bl %plt(__divdf3)
47+
; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
48+
; LA32F-NEXT: addi.w $sp, $sp, 16
49+
; LA32F-NEXT: ret
50+
;
51+
; LA32F-FRECIPE-LABEL: fdiv_d:
52+
; LA32F-FRECIPE: # %bb.0:
53+
; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -16
54+
; LA32F-FRECIPE-NEXT: .cfi_def_cfa_offset 16
55+
; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
56+
; LA32F-FRECIPE-NEXT: .cfi_offset 1, -4
57+
; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
58+
; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
59+
; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 16
60+
; LA32F-FRECIPE-NEXT: ret
61+
;
62+
; LA64D-LABEL: fdiv_d:
63+
; LA64D: # %bb.0:
64+
; LA64D-NEXT: fdiv.d $fa0, $fa0, $fa1
65+
; LA64D-NEXT: ret
66+
;
67+
; LA64D-FRECIPE-LABEL: fdiv_d:
68+
; LA64D-FRECIPE: # %bb.0:
69+
; LA64D-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
70+
; LA64D-FRECIPE-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI1_0)
71+
; LA64D-FRECIPE-NEXT: frecipe.d $fa3, $fa1
72+
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa1, $fa3, $fa2
73+
; LA64D-FRECIPE-NEXT: fnmsub.d $fa2, $fa2, $fa3, $fa3
74+
; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa0, $fa2
75+
; LA64D-FRECIPE-NEXT: fnmsub.d $fa0, $fa1, $fa3, $fa0
76+
; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa2, $fa0, $fa3
77+
; LA64D-FRECIPE-NEXT: ret
78+
%div = fdiv fast double %x, %y
79+
ret double %div
80+
}

0 commit comments

Comments
 (0)