Skip to content

Commit ae988cd

Browse files
committed
address pr comments
1 parent 4d4d087 commit ae988cd

File tree

9 files changed

+359
-16
lines changed

9 files changed

+359
-16
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,18 @@ TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", FIXED(4), "_ZGV_LLVM_N4v")
143143
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
144144
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8), "_ZGV_LLVM_N8v")
145145

146+
TLI_DEFINE_VECFUNC("tan", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
147+
TLI_DEFINE_VECFUNC("tan", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v")
148+
149+
TLI_DEFINE_VECFUNC("tanf", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
150+
TLI_DEFINE_VECFUNC("tanf", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v")
151+
152+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
153+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v")
154+
155+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
156+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v")
157+
146158
TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
147159
TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4), "_ZGV_LLVM_N4vv")
148160

@@ -304,6 +316,22 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", FIXED(4), "_ZGV_LLVM_N4v")
304316
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", FIXED(8), "_ZGV_LLVM_N8v")
305317
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", FIXED(16), "_ZGV_LLVM_N16v")
306318

319+
TLI_DEFINE_VECFUNC("tan", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v")
320+
TLI_DEFINE_VECFUNC("tan", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v")
321+
TLI_DEFINE_VECFUNC("tan", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v")
322+
323+
TLI_DEFINE_VECFUNC("tanf", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v")
324+
TLI_DEFINE_VECFUNC("tanf", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v")
325+
TLI_DEFINE_VECFUNC("tanf", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v")
326+
327+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v")
328+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v")
329+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v")
330+
331+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v")
332+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v")
333+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v")
334+
307335
TLI_DEFINE_VECFUNC("pow", "__svml_pow2", FIXED(2), "_ZGV_LLVM_N2vv")
308336
TLI_DEFINE_VECFUNC("pow", "__svml_pow4", FIXED(4), "_ZGV_LLVM_N4vv")
309337
TLI_DEFINE_VECFUNC("pow", "__svml_pow8", FIXED(8), "_ZGV_LLVM_N8vv")
@@ -1238,6 +1266,13 @@ TLI_DEFINE_VECFUNC("tanf", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
12381266
TLI_DEFINE_VECFUNC("tanf", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
12391267
TLI_DEFINE_VECFUNC("tanf", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
12401268

1269+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
1270+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1271+
TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1272+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd8_tan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1273+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd4_tan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1274+
TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd2_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1275+
12411276
TLI_DEFINE_VECFUNC("asin", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
12421277
TLI_DEFINE_VECFUNC("asinf", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
12431278
TLI_DEFINE_VECFUNC("asinf", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,7 +1338,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
13381338
#endif
13391339
report_fatal_error("Do not know how to expand the result of this "
13401340
"operator!");
1341-
1341+
// clang-format off
13421342
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
13431343
case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
13441344
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
@@ -1408,9 +1408,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
14081408
case ISD::STRICT_FSUB:
14091409
case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
14101410
case ISD::STRICT_FTAN:
1411-
case ISD::FTAN:
1412-
ExpandFloatRes_FTAN(N, Lo, Hi);
1413-
break;
1411+
case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break;
14141412
case ISD::STRICT_FTRUNC:
14151413
case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
14161414
case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
@@ -1420,6 +1418,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
14201418
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
14211419
case ISD::STRICT_FREM:
14221420
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
1421+
// clang-format on
14231422
}
14241423

14251424
// If Lo/Hi is null, the sub-method took care of registering results etc.

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
617617
SDValue &Lo, SDValue &Hi);
618618
void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
619619
SDValue &Lo, SDValue &Hi);
620+
// clang-format off
620621
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
621622
void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
622623
void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -649,10 +650,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
649650
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
650651
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
651652
void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
652-
void ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo, SDValue &Hi);
653+
void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi);
653654
void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
654655
void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
655656
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
657+
// clang-format on
656658

657659
// Float Operand Expansion.
658660
bool ExpandFloatOperand(SDNode *N, unsigned OpNo);

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
650650
// non-optsize case.
651651
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
652652

653+
// clang-format off
653654
for (auto VT : { MVT::f32, MVT::f64 }) {
654655
// Use ANDPD to simulate FABS.
655656
setOperationAction(ISD::FABS, VT, Custom);
@@ -668,8 +669,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
668669
setOperationAction(ISD::FSIN , VT, Expand);
669670
setOperationAction(ISD::FCOS , VT, Expand);
670671
setOperationAction(ISD::FSINCOS, VT, Expand);
671-
setOperationAction(ISD::FTAN, VT, Expand);
672+
setOperationAction(ISD::FTAN , VT, Expand);
672673
}
674+
// clang-format on
673675

674676
// Half type will be promoted by default.
675677
setF16Action(MVT::f16, Promote);
@@ -741,10 +743,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
741743
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
742744

743745
// We don't support sin/cos/fmod
746+
// clang-format off
744747
setOperationAction(ISD::FSIN , MVT::f32, Expand);
745748
setOperationAction(ISD::FCOS , MVT::f32, Expand);
746749
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
747-
setOperationAction(ISD::FTAN, MVT::f32, Expand);
750+
setOperationAction(ISD::FTAN , MVT::f32, Expand);
751+
// clang-format on
748752

749753
if (UseX87) {
750754
// Always expand sin/cos functions even though x87 has an instruction.

llvm/test/CodeGen/X86/llvm.tan.ll

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,57 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
33

4-
define half @use_tanf16(half %a) {
4+
define half @use_tanf16(half %a) nounwind {
55
; CHECK-LABEL: use_tanf16:
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: pushq %rax
8-
; CHECK-NEXT: .cfi_def_cfa_offset 16
98
; CHECK-NEXT: callq __extendhfsf2@PLT
109
; CHECK-NEXT: callq tanf@PLT
1110
; CHECK-NEXT: callq __truncsfhf2@PLT
1211
; CHECK-NEXT: popq %rax
13-
; CHECK-NEXT: .cfi_def_cfa_offset 8
1412
; CHECK-NEXT: retq
1513
%x = call half @llvm.tan.f16(half %a)
1614
ret half %x
1715
}
1816

19-
define float @use_tanf32(float %a) {
17+
define float @use_tanf32(float %a) nounwind {
2018
; CHECK-LABEL: use_tanf32:
2119
; CHECK: # %bb.0:
2220
; CHECK-NEXT: jmp tanf@PLT # TAILCALL
2321
%x = call float @llvm.tan.f32(float %a)
2422
ret float %x
2523
}
2624

27-
define double @use_tanf64(double %a) {
25+
define double @use_tanf64(double %a) nounwind {
2826
; CHECK-LABEL: use_tanf64:
2927
; CHECK: # %bb.0:
3028
; CHECK-NEXT: jmp tan@PLT # TAILCALL
3129
%x = call double @llvm.tan.f64(double %a)
3230
ret double %x
3331
}
3432

35-
define fp128 @use_tanfp128(fp128 %a) {
33+
define double @use_tanf80(double %a) nounwind {
34+
; CHECK-LABEL: use_tanf80:
35+
; CHECK: # %bb.0:
36+
; CHECK-NEXT: jmp tan@PLT # TAILCALL
37+
%x = call double @llvm.tan.f80(double %a)
38+
ret double %x
39+
}
40+
41+
define fp128 @use_tanfp128(fp128 %a) nounwind {
3642
; CHECK-LABEL: use_tanfp128:
3743
; CHECK: # %bb.0:
3844
; CHECK-NEXT: jmp tanf128@PLT # TAILCALL
3945
%x = call fp128 @llvm.tan.f128(fp128 %a)
4046
ret fp128 %x
4147
}
4248

43-
define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) {
49+
define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) nounwind {
4450
; CHECK-LABEL: use_tanppc_fp128:
4551
; CHECK: # %bb.0:
4652
; CHECK-NEXT: pushq %rax
47-
; CHECK-NEXT: .cfi_def_cfa_offset 16
4853
; CHECK-NEXT: callq tanl@PLT
4954
; CHECK-NEXT: popq %rax
50-
; CHECK-NEXT: .cfi_def_cfa_offset 8
5155
; CHECK-NEXT: retq
5256
%x = call ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %a)
5357
ret ppc_fp128 %x

llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ declare float @cosf(float) #0
1515
declare double @llvm.cos.f64(double) #0
1616
declare float @llvm.cos.f32(float) #0
1717

18+
declare double @tan(double) #0
19+
declare float @tanf(float) #0
20+
declare double @llvm.tan.f64(double) #0
21+
declare float @llvm.tan.f32(float) #0
22+
1823
declare double @pow(double, double) #0
1924
declare float @powf(float, float) #0
2025
declare double @llvm.pow.f64(double, double) #0
@@ -264,6 +269,114 @@ for.end:
264269
ret void
265270
}
266271

272+
define void @tan_f64(ptr nocapture %varray) {
273+
; CHECK-LABEL: @tan_f64(
274+
; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
275+
; CHECK: ret void
276+
;
277+
; CHECK-AVX512-VF8-LABEL: @tan_f64(
278+
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
279+
; CHECK-AVX512-VF8: ret void
280+
;
281+
entry:
282+
br label %for.body
283+
284+
for.body:
285+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
286+
%tmp = trunc i64 %iv to i32
287+
%conv = sitofp i32 %tmp to double
288+
%call = tail call double @tan(double %conv)
289+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
290+
store double %call, ptr %arrayidx, align 4
291+
%iv.next = add nuw nsw i64 %iv, 1
292+
%exitcond = icmp eq i64 %iv.next, 1000
293+
br i1 %exitcond, label %for.end, label %for.body
294+
295+
for.end:
296+
ret void
297+
}
298+
299+
define void @tan_f32(ptr nocapture %varray) {
300+
; CHECK-LABEL: @tan_f32(
301+
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
302+
; CHECK: ret void
303+
;
304+
; CHECK-AVX512-VF16-LABEL: @tan_f32(
305+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
306+
; CHECK-AVX512-VF16: ret void
307+
;
308+
entry:
309+
br label %for.body
310+
311+
for.body:
312+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
313+
%tmp = trunc i64 %iv to i32
314+
%conv = sitofp i32 %tmp to float
315+
%call = tail call float @tanf(float %conv)
316+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
317+
store float %call, ptr %arrayidx, align 4
318+
%iv.next = add nuw nsw i64 %iv, 1
319+
%exitcond = icmp eq i64 %iv.next, 1000
320+
br i1 %exitcond, label %for.end, label %for.body
321+
322+
for.end:
323+
ret void
324+
}
325+
326+
define void @tan_f64_intrinsic(ptr nocapture %varray) {
327+
; CHECK-LABEL: @tan_f64_intrinsic(
328+
; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
329+
; CHECK: ret void
330+
;
331+
; CHECK-AVX512-VF8-LABEL: @tan_f64_intrinsic(
332+
; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
333+
; CHECK-AVX512-VF8: ret void
334+
;
335+
entry:
336+
br label %for.body
337+
338+
for.body:
339+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
340+
%tmp = trunc i64 %iv to i32
341+
%conv = sitofp i32 %tmp to double
342+
%call = tail call double @llvm.tan.f64(double %conv)
343+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
344+
store double %call, ptr %arrayidx, align 4
345+
%iv.next = add nuw nsw i64 %iv, 1
346+
%exitcond = icmp eq i64 %iv.next, 1000
347+
br i1 %exitcond, label %for.end, label %for.body
348+
349+
for.end:
350+
ret void
351+
}
352+
353+
define void @tan_f32_intrinsic(ptr nocapture %varray) {
354+
; CHECK-LABEL: @tan_f32_intrinsic(
355+
; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
356+
; CHECK: ret void
357+
;
358+
; CHECK-AVX512-VF16-LABEL: @tan_f32_intrinsic(
359+
; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
360+
; CHECK-AVX512-VF16: ret void
361+
;
362+
entry:
363+
br label %for.body
364+
365+
for.body:
366+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
367+
%tmp = trunc i64 %iv to i32
368+
%conv = sitofp i32 %tmp to float
369+
%call = tail call float @llvm.tan.f32(float %conv)
370+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
371+
store float %call, ptr %arrayidx, align 4
372+
%iv.next = add nuw nsw i64 %iv, 1
373+
%exitcond = icmp eq i64 %iv.next, 1000
374+
br i1 %exitcond, label %for.end, label %for.body
375+
376+
for.end:
377+
ret void
378+
}
379+
267380
define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
268381
; CHECK-LABEL: @pow_f64(
269382
; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])

0 commit comments

Comments
 (0)