Skip to content

Commit 3a0e015

Browse files
authored
[NVPTX] Lower -1/x to neg.f64(rcp.rn.f64) instead of fdiv (#98343)
The NVPTX backend lowers 1/x to rcp.rn.f64 instruction instead of slower fdiv instruction. However, in the case of -1/x, it uses the slower fdiv instruction. After this change, -1/x will be lowered into neg.f64 (rcp.rn.f64).
1 parent 331ba43 commit 3a0e015

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,18 @@ def DoubleConst1 : PatLeaf<(fpimm), [{
11501150
return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() &&
11511151
N->getValueAPF().convertToDouble() == 1.0;
11521152
}]>;
1153+
// Constant -1.0 (double)
1154+
def DoubleConstNeg1 : PatLeaf<(fpimm), [{
1155+
return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() &&
1156+
N->getValueAPF().convertToDouble() == -1.0;
1157+
}]>;
1158+
1159+
1160+
// Constant -X -> X (double)
1161+
def NegDoubleConst : SDNodeXForm<fpimm, [{
1162+
return CurDAG->getTargetConstantFP(-(N->getValueAPF()),
1163+
SDLoc(N), MVT::f64);
1164+
}]>;
11531165

11541166
// Loads FP16 constant into a register.
11551167
//
@@ -1225,6 +1237,11 @@ def FDIV64ri :
12251237
"div.rn.f64 \t$dst, $a, $b;",
12261238
[(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>;
12271239

1240+
// fdiv will be converted to rcp
1241+
// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
1242+
def : Pat<(fdiv DoubleConstNeg1:$a, Float64Regs:$b),
1243+
(FNEGf64 (FDIV641r (NegDoubleConst node:$a), Float64Regs:$b))>;
1244+
12281245
//
12291246
// F32 Approximate reciprocal
12301247
//

llvm/test/CodeGen/NVPTX/rcp-opt.ll

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
4+
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
;; Check if fneg (fdiv 1, X) lowers to fneg (rcp.rn X).
8+
9+
define double @test1(double %in) {
10+
; CHECK-LABEL: test1(
11+
; CHECK: {
12+
; CHECK-NEXT: .reg .f64 %fd<4>;
13+
; CHECK-EMPTY:
14+
; CHECK-NEXT: // %bb.0:
15+
; CHECK-NEXT: ld.param.f64 %fd1, [test1_param_0];
16+
; CHECK-NEXT: rcp.rn.f64 %fd2, %fd1;
17+
; CHECK-NEXT: neg.f64 %fd3, %fd2;
18+
; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3;
19+
; CHECK-NEXT: ret;
20+
%div = fdiv double 1.000000e+00, %in
21+
%neg = fsub double -0.000000e+00, %div
22+
ret double %neg
23+
}
24+
25+
;; Check if fdiv -1, X lowers to fneg (rcp.rn X).
26+
27+
define double @test2(double %in) {
28+
; CHECK-LABEL: test2(
29+
; CHECK: {
30+
; CHECK-NEXT: .reg .f64 %fd<4>;
31+
; CHECK-EMPTY:
32+
; CHECK-NEXT: // %bb.0:
33+
; CHECK-NEXT: ld.param.f64 %fd1, [test2_param_0];
34+
; CHECK-NEXT: rcp.rn.f64 %fd2, %fd1;
35+
; CHECK-NEXT: neg.f64 %fd3, %fd2;
36+
; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3;
37+
; CHECK-NEXT: ret;
38+
%div = fdiv double -1.000000e+00, %in
39+
ret double %div
40+
}
41+
42+
;; Check if fdiv 1, (fneg X) lowers to fneg (rcp.rn X).
43+
44+
define double @test3(double %in) {
45+
; CHECK-LABEL: test3(
46+
; CHECK: {
47+
; CHECK-NEXT: .reg .f64 %fd<4>;
48+
; CHECK-EMPTY:
49+
; CHECK-NEXT: // %bb.0:
50+
; CHECK-NEXT: ld.param.f64 %fd1, [test3_param_0];
51+
; CHECK-NEXT: rcp.rn.f64 %fd2, %fd1;
52+
; CHECK-NEXT: neg.f64 %fd3, %fd2;
53+
; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3;
54+
; CHECK-NEXT: ret;
55+
%neg = fsub double -0.000000e+00, %in
56+
%div = fdiv double 1.000000e+00, %neg
57+
ret double %div
58+
}

0 commit comments

Comments
 (0)