Skip to content

Revert "[SME] Add intrinsics for FCVT(wid.) and FCVTL" #93196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -2265,10 +2265,6 @@ let TargetGuard = "sme2" in {
def SVCVT_S32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "i", MergeNone, "aarch64_sve_fcvtzs_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
}

let TargetGuard = "sme-f16f16" in {
def SVCVT_F32_X2 : SInst<"svcvt_{d}[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvt_widen_x2", [ IsStreaming],[]>;
}

//
// Multi-vector floating-point convert from single-precision to interleaved half-precision/BFloat16
//
Expand All @@ -2277,13 +2273,6 @@ let TargetGuard = "sme2" in {
def SVCVTN_BF16_X2 : SInst<"svcvtn_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvtn_x2", [IsOverloadNone, IsStreaming],[]>;
}

//
//Multi-vector floating-point convert from half-precision to deinterleaved single-precision.
//
let TargetGuard = "sme-f16f16" in {
def SVCVTL_F32_X2 : SInst<"svcvtl_f32[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvtl_widen_x2", [ IsStreaming],[]>;
}

//
// Multi-vector saturating extract narrow
//
Expand Down
22 changes: 0 additions & 22 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,25 +497,3 @@ svuint8_t test_qcvt_u8_s32_x4(svint32x4_t zn) __arm_streaming {
svuint16_t test_qcvt_u16_s64_x4(svint64x4_t zn) __arm_streaming {
return SVE_ACLE_FUNC(svqcvt_u16,_s64_x4,,)(zn);
}

// CHECK-LABEL: @test_cvt_f32_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32(<vscale x 8 x half> [[ZN:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
// CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z15test_cvt_f32_x2u13__SVFloat16_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32(<vscale x 8 x half> [[ZN:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]]
//
__attribute__((target("sme-f16f16"))) svfloat32x2_t test_cvt_f32_x2(svfloat16_t zn) __arm_streaming {
return SVE_ACLE_FUNC(svcvt_f32,_f16_x2,,)(zn);
}
40 changes: 0 additions & 40 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c

This file was deleted.

14 changes: 1 addition & 13 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -3121,11 +3121,6 @@ let TargetPrefix = "aarch64" in {
: DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty],
[llvm_nxv4f32_ty, llvm_nxv4f32_ty],
[IntrNoMem]>;

class SME2_CVT_WIDENING_VG2_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[LLVMSubdivide2VectorType<0>], [IntrNoMem]>;


class SME2_CVT_VG4_SINGLE_Intrinsic
: DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>],
Expand Down Expand Up @@ -3417,13 +3412,6 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_suvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
def int_aarch64_sme_usvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;


//
//Multi-vector floating-point convert from half-precision to deinterleaved single-precision.
//

def int_aarch64_sve_fcvtl_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic;

//
// Multi-vector floating-point CVT from single-precision to interleaved half-precision/BFloat16
//
Expand All @@ -3443,7 +3431,7 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sve_fcvtzu_x4 : SME2_CVT_X4_Intrinsic;
def int_aarch64_sve_scvtf_x4 : SME2_CVT_X4_Intrinsic;
def int_aarch64_sve_ucvtf_x4 : SME2_CVT_X4_Intrinsic;
def int_aarch64_sve_fcvt_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic;

//
// Multi-vector saturating extract narrow
//
Expand Down
6 changes: 0 additions & 6 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5717,12 +5717,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_sve_ucvtf_x4:
SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
return;
case Intrinsic::aarch64_sve_fcvt_widen_x2:
SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
return;
case Intrinsic::aarch64_sve_fcvtl_widen_x2:
SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
return;
case Intrinsic::aarch64_sve_sclamp_single_x2:
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
Node->getValueType(0),
Expand Down
11 changes: 1 addition & 10 deletions llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme-f16f16 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s

;
; FCVT
Expand Down Expand Up @@ -139,15 +139,6 @@ define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale
ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res
}

define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_widen_x2_f16(<vscale x 8 x half> %zn0) {
; CHECK-LABEL: multi_vector_cvt_widen_x2_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvt { z0.s, z1.s }, z0.h
; CHECK-NEXT: ret
%res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32(<vscale x 8 x half> %zn0)
ret {<vscale x 4 x float>, <vscale x 4 x float>} %res
}

declare <vscale x 8 x half> @llvm.aarch64.sve.fcvt.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvt.x2(<vscale x 4 x float>, <vscale x 4 x float>)
declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>)
Expand Down
11 changes: 0 additions & 11 deletions llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll

This file was deleted.

Loading