Skip to content

Commit ce7afff

Browse files
committed
[NVPTX] Support copysign PTX instruction
1 parent d1b9adb commit ce7afff

File tree

3 files changed

+55
-2
lines changed

3 files changed

+55
-2
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -838,8 +838,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
838838
setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
839839
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
840840
setOperationAction(ISD::FCOPYSIGN, MVT::v2bf16, Expand);
841-
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
842-
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
841+
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
842+
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
843843

844844
// These map to corresponding instructions for f32/f64. f16 must be
845845
// promoted to f32. v2f16 is expanded to f16, which is then promoted

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,20 @@ def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
977977
def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
978978
Float64Regs, int_nvvm_fabs_d>;
979979

980+
//
981+
// copysign
982+
//
983+
984+
def COPYSIGN_F :
985+
NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1),
986+
"copysign.f32 \t$dst, $src0, $src1;",
987+
[(set Float32Regs:$dst, (fcopysign Float32Regs:$src0, Float32Regs:$src1))]>;
988+
989+
def COPYSIGN_D :
990+
NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1),
991+
"copysign.f64 \t$dst, $src0, $src1;",
992+
[(set Float64Regs:$dst, (fcopysign Float64Regs:$src0, Float64Regs:$src1))]>;
993+
980994
//
981995
// Abs, Neg bf16, bf16x2
982996
//

llvm/test/CodeGen/NVPTX/copysign.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
4+
5+
target triple = "nvptx64-nvidia-cuda"
6+
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
7+
8+
define float @fcopysign_f(float %a, float %b) {
9+
; CHECK-LABEL: fcopysign_f(
10+
; CHECK: {
11+
; CHECK-NEXT: .reg .f32 %f<4>;
12+
; CHECK-EMPTY:
13+
; CHECK-NEXT: // %bb.0:
14+
; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_param_0];
15+
; CHECK-NEXT: ld.param.f32 %f2, [fcopysign_f_param_1];
16+
; CHECK-NEXT: copysign.f32 %f3, %f1, %f2;
17+
; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3;
18+
; CHECK-NEXT: ret;
19+
%val = call float @llvm.copysign.f32(float %a, float %b)
20+
ret float %val
21+
}
22+
23+
define double @fcopysign_d(double %a, double %b) {
24+
; CHECK-LABEL: fcopysign_d(
25+
; CHECK: {
26+
; CHECK-NEXT: .reg .f64 %fd<4>;
27+
; CHECK-EMPTY:
28+
; CHECK-NEXT: // %bb.0:
29+
; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_param_0];
30+
; CHECK-NEXT: ld.param.f64 %fd2, [fcopysign_d_param_1];
31+
; CHECK-NEXT: copysign.f64 %fd3, %fd1, %fd2;
32+
; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3;
33+
; CHECK-NEXT: ret;
34+
%val = call double @llvm.copysign.f64(double %a, double %b)
35+
ret double %val
36+
}
37+
38+
declare float @llvm.copysign.f32(float, float)
39+
declare double @llvm.copysign.f64(double, double)

0 commit comments

Comments
 (0)