Skip to content

Commit fd7d65a

Browse files
tlivelyalexcrichton
authored andcommitted
[WebAssembly] Codegen for f64x2.convert_low_i32x4_{s,u}
Add a custom DAG combine and ISD opcode for detecting patterns like (uint_to_fp (extract_subvector ...)) before the extract_subvector is expanded to ensure that they will ultimately lower to f64x2.convert_low_i32x4_{s,u} instructions. Since these instructions are no longer prototypes and can now be produced via standard IR, this commit also removes the target intrinsics and builtins that had been used to prototype the instructions. Differential Revision: https://reviews.llvm.org/D100425
1 parent 78fe029 commit fd7d65a

File tree

9 files changed

+74
-63
lines changed

9 files changed

+74
-63
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

-2
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,6 @@ TARGET_BUILTIN(__builtin_wasm_extend_high_s_i32x4_i64x2, "V2LLiV4i", "nc", "simd
196196
TARGET_BUILTIN(__builtin_wasm_extend_low_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128")
197197
TARGET_BUILTIN(__builtin_wasm_extend_high_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128")
198198

199-
TARGET_BUILTIN(__builtin_wasm_convert_low_s_i32x4_f64x2, "V2dV4i", "nc", "simd128")
200-
TARGET_BUILTIN(__builtin_wasm_convert_low_u_i32x4_f64x2, "V2dV4Ui", "nc", "simd128")
201199
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
202200
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
203201
TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128")

clang/lib/CodeGen/CGBuiltin.cpp

-15
Original file line numberDiff line numberDiff line change
@@ -17181,21 +17181,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
1718117181
Function *Callee = CGM.getIntrinsic(IntNo);
1718217182
return Builder.CreateCall(Callee, Vec);
1718317183
}
17184-
case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2:
17185-
case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: {
17186-
Value *Vec = EmitScalarExpr(E->getArg(0));
17187-
unsigned IntNo;
17188-
switch (BuiltinID) {
17189-
case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2:
17190-
IntNo = Intrinsic::wasm_convert_low_signed;
17191-
break;
17192-
case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2:
17193-
IntNo = Intrinsic::wasm_convert_low_unsigned;
17194-
break;
17195-
}
17196-
Function *Callee = CGM.getIntrinsic(IntNo);
17197-
return Builder.CreateCall(Callee, Vec);
17198-
}
1719917184
case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4:
1720017185
case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: {
1720117186
Value *Vec = EmitScalarExpr(E->getArg(0));

clang/test/CodeGen/builtins-wasm.c

-12
Original file line numberDiff line numberDiff line change
@@ -914,18 +914,6 @@ u64x2 extend_high_u_i32x4_i64x2(u32x4 x) {
914914
// WEBASSEMBLY: ret
915915
}
916916

917-
f64x2 convert_low_s_i32x4_f64x2(i32x4 x) {
918-
return __builtin_wasm_convert_low_s_i32x4_f64x2(x);
919-
// WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %x)
920-
// WEBASSEMBLY: ret
921-
}
922-
923-
f64x2 convert_low_u_i32x4_f64x2(u32x4 x) {
924-
return __builtin_wasm_convert_low_u_i32x4_f64x2(x);
925-
// WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %x)
926-
// WEBASSEMBLY: ret
927-
}
928-
929917
i32x4 trunc_sat_zero_s_f64x2_i32x4(f64x2 x) {
930918
return __builtin_wasm_trunc_sat_zero_s_f64x2_i32x4(x);
931919
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double> %x)

llvm/include/llvm/IR/IntrinsicsWebAssembly.td

-6
Original file line numberDiff line numberDiff line change
@@ -275,12 +275,6 @@ def int_wasm_extadd_pairwise_unsigned :
275275
[IntrNoMem, IntrSpeculatable]>;
276276

277277
// TODO: Remove these if possible if they are merged to the spec.
278-
def int_wasm_convert_low_signed :
279-
Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty],
280-
[IntrNoMem, IntrSpeculatable]>;
281-
def int_wasm_convert_low_unsigned :
282-
Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty],
283-
[IntrNoMem, IntrSpeculatable]>;
284278
def int_wasm_trunc_sat_zero_signed :
285279
Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty],
286280
[IntrNoMem, IntrSpeculatable]>;

llvm/lib/Target/WebAssembly/WebAssemblyISD.def

+2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ HANDLE_NODETYPE(EXTEND_LOW_S)
3333
HANDLE_NODETYPE(EXTEND_LOW_U)
3434
HANDLE_NODETYPE(EXTEND_HIGH_S)
3535
HANDLE_NODETYPE(EXTEND_HIGH_U)
36+
HANDLE_NODETYPE(CONVERT_LOW_S)
37+
HANDLE_NODETYPE(CONVERT_LOW_U)
3638
HANDLE_NODETYPE(THROW)
3739
HANDLE_NODETYPE(CATCH)
3840
HANDLE_NODETYPE(MEMORY_COPY)

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

+41
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
130130
setTargetDAGCombine(ISD::SIGN_EXTEND);
131131
setTargetDAGCombine(ISD::ZERO_EXTEND);
132132

133+
// Combine {s,u}int_to_fp of extract_vectors into conversion ops
134+
setTargetDAGCombine(ISD::SINT_TO_FP);
135+
setTargetDAGCombine(ISD::UINT_TO_FP);
136+
133137
// Support saturating add for i8x16 and i16x8
134138
for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
135139
for (auto T : {MVT::v16i8, MVT::v8i16})
@@ -2016,6 +2020,40 @@ performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
20162020
return DAG.getNode(Op, SDLoc(N), ResVT, Source);
20172021
}
20182022

2023+
static SDValue
2024+
performVectorConvertLowCombine(SDNode *N,
2025+
TargetLowering::DAGCombinerInfo &DCI) {
2026+
auto &DAG = DCI.DAG;
2027+
assert(N->getOpcode() == ISD::SINT_TO_FP ||
2028+
N->getOpcode() == ISD::UINT_TO_FP);
2029+
2030+
// Combine ({s,u}int_to_fp (extract_subvector ... 0)) to an
2031+
// f64x2.convert_low_i32x4_{s,u} SDNode.
2032+
auto Extract = N->getOperand(0);
2033+
if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2034+
return SDValue();
2035+
auto Source = Extract.getOperand(0);
2036+
auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2037+
if (IndexNode == nullptr)
2038+
return SDValue();
2039+
auto Index = IndexNode->getZExtValue();
2040+
2041+
// The types must be correct.
2042+
EVT ResVT = N->getValueType(0);
2043+
if (ResVT != MVT::v2f64 || Extract.getValueType() != MVT::v2i32)
2044+
return SDValue();
2045+
2046+
// The extracted vector must be the low half.
2047+
if (Index != 0)
2048+
return SDValue();
2049+
2050+
unsigned Op = N->getOpcode() == ISD::SINT_TO_FP
2051+
? WebAssemblyISD::CONVERT_LOW_S
2052+
: WebAssemblyISD::CONVERT_LOW_U;
2053+
2054+
return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2055+
}
2056+
20192057
SDValue
20202058
WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
20212059
DAGCombinerInfo &DCI) const {
@@ -2027,5 +2065,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
20272065
case ISD::SIGN_EXTEND:
20282066
case ISD::ZERO_EXTEND:
20292067
return performVectorExtendCombine(N, DCI);
2068+
case ISD::SINT_TO_FP:
2069+
case ISD::UINT_TO_FP:
2070+
return performVectorConvertLowCombine(N, DCI);
20302071
}
20312072
}

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

+9-8
Original file line numberDiff line numberDiff line change
@@ -1104,16 +1104,21 @@ multiclass SIMDConvert<Vec vec, Vec arg, SDNode op, string name,
11041104
defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>;
11051105
defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>;
11061106

1107-
// Integer to floating point: convert
1108-
defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>;
1109-
defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>;
1110-
11111107
// Lower llvm.wasm.trunc.sat.* to saturating instructions
11121108
def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
11131109
(fp_to_sint_I32x4 $src)>;
11141110
def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
11151111
(fp_to_uint_I32x4 $src)>;
11161112

1113+
// Integer to floating point: convert
1114+
def convert_low_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
1115+
def convert_low_s : SDNode<"WebAssemblyISD::CONVERT_LOW_S", convert_low_t>;
1116+
def convert_low_u : SDNode<"WebAssemblyISD::CONVERT_LOW_U", convert_low_t>;
1117+
defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>;
1118+
defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>;
1119+
defm "" : SIMDConvert<F64x2, I32x4, convert_low_s, "convert_low_i32x4_s", 0xfe>;
1120+
defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>;
1121+
11171122
// Extending operations
11181123
def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
11191124
def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>;
@@ -1268,10 +1273,6 @@ defm "" : SIMDConvert<I32x4, F64x2, int_wasm_trunc_sat_zero_signed,
12681273
"trunc_sat_zero_f64x2_s", 0xfc>;
12691274
defm "" : SIMDConvert<I32x4, F64x2, int_wasm_trunc_sat_zero_unsigned,
12701275
"trunc_sat_zero_f64x2_u", 0xfd>;
1271-
defm "" : SIMDConvert<F64x2, I32x4, int_wasm_convert_low_signed,
1272-
"convert_low_i32x4_s", 0xfe>;
1273-
defm "" : SIMDConvert<F64x2, I32x4, int_wasm_convert_low_unsigned,
1274-
"convert_low_i32x4_u", 0xff>;
12751276

12761277
//===----------------------------------------------------------------------===//
12771278
// Saturating Rounding Q-Format Multiplication

llvm/test/CodeGen/WebAssembly/simd-conversions.ll

+22
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,25 @@ define <2 x i64> @trunc_sat_u_v2i64(<2 x double> %x) {
8181
%a = fptoui <2 x double> %x to <2 x i64>
8282
ret <2 x i64> %a
8383
}
84+
85+
; CHECK-LABEL: convert_low_s_v2f64:
86+
; NO-SIMD128-NOT: f64x2
87+
; SIMD128-NEXT: .functype convert_low_s_v2f64 (v128) -> (v128){{$}}
88+
; SIMD128-NEXT: f64x2.convert_low_i32x4_s $push[[R:[0-9]+]]=, $0
89+
; SIMD128-NEXT: return $pop[[R]]
90+
define <2 x double> @convert_low_s_v2f64(<4 x i32> %x) {
91+
%v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
92+
%a = sitofp <2 x i32> %v to <2 x double>
93+
ret <2 x double> %a
94+
}
95+
96+
; CHECK-LABEL: convert_low_u_v2f64:
97+
; NO-SIMD128-NOT: f64x2
98+
; SIMD128-NEXT: .functype convert_low_u_v2f64 (v128) -> (v128){{$}}
99+
; SIMD128-NEXT: f64x2.convert_low_i32x4_u $push[[R:[0-9]+]]=, $0
100+
; SIMD128-NEXT: return $pop[[R]]
101+
define <2 x double> @convert_low_u_v2f64(<4 x i32> %x) {
102+
%v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
103+
%a = uitofp <2 x i32> %v to <2 x double>
104+
ret <2 x double> %a
105+
}

llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

-20
Original file line numberDiff line numberDiff line change
@@ -843,26 +843,6 @@ define <2 x double> @nearest_v2f64(<2 x double> %a) {
843843
ret <2 x double> %v
844844
}
845845

846-
; CHECK-LABEL: convert_low_signed_v2f64:
847-
; CHECK-NEXT: .functype convert_low_signed_v2f64 (v128) -> (v128){{$}}
848-
; CHECK-NEXT: f64x2.convert_low_i32x4_s $push[[R:[0-9]+]]=, $0{{$}}
849-
; CHECK-NEXT: return $pop[[R]]{{$}}
850-
declare <2 x double> @llvm.wasm.convert.low.signed(<4 x i32>)
851-
define <2 x double> @convert_low_signed_v2f64(<4 x i32> %a) {
852-
%v = call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %a)
853-
ret <2 x double> %v
854-
}
855-
856-
; CHECK-LABEL: convert_low_unsigned_v2f64:
857-
; CHECK-NEXT: .functype convert_low_unsigned_v2f64 (v128) -> (v128){{$}}
858-
; CHECK-NEXT: f64x2.convert_low_i32x4_u $push[[R:[0-9]+]]=, $0{{$}}
859-
; CHECK-NEXT: return $pop[[R]]{{$}}
860-
declare <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32>)
861-
define <2 x double> @convert_low_unsigned_v2f64(<4 x i32> %a) {
862-
%v = call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %a)
863-
ret <2 x double> %v
864-
}
865-
866846
; CHECK-LABEL: promote_low_v2f64:
867847
; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}}
868848
; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}}

0 commit comments

Comments
 (0)