Skip to content

Commit 60bce6e

Browse files
authored
[WebAssembly] Implement all f16x8 binary instructions. (#93360)
This reuses most of the code that was created for f32x4 and f64x2 binary instructions and tries to follow how they were implemented. add/sub/mul/div - use regular LL instructions min/max - use the minimum/maximum intrinsic, and also have builtins pmin/pmax - use the wasm.pmax/pmin intrinsics and also have builtins Specified at: https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md
1 parent 722a5fc commit 60bce6e

File tree

7 files changed

+163
-9
lines changed

7 files changed

+163
-9
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128")
135135
TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128")
136136
TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128")
137137
TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128")
138+
TARGET_BUILTIN(__builtin_wasm_min_f16x8, "V8hV8hV8h", "nc", "half-precision")
139+
TARGET_BUILTIN(__builtin_wasm_max_f16x8, "V8hV8hV8h", "nc", "half-precision")
140+
TARGET_BUILTIN(__builtin_wasm_pmin_f16x8, "V8hV8hV8h", "nc", "half-precision")
141+
TARGET_BUILTIN(__builtin_wasm_pmax_f16x8, "V8hV8hV8h", "nc", "half-precision")
138142

139143
TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128")
140144
TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20806,6 +20806,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2080620806
}
2080720807
case WebAssembly::BI__builtin_wasm_min_f32:
2080820808
case WebAssembly::BI__builtin_wasm_min_f64:
20809+
case WebAssembly::BI__builtin_wasm_min_f16x8:
2080920810
case WebAssembly::BI__builtin_wasm_min_f32x4:
2081020811
case WebAssembly::BI__builtin_wasm_min_f64x2: {
2081120812
Value *LHS = EmitScalarExpr(E->getArg(0));
@@ -20816,6 +20817,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2081620817
}
2081720818
case WebAssembly::BI__builtin_wasm_max_f32:
2081820819
case WebAssembly::BI__builtin_wasm_max_f64:
20820+
case WebAssembly::BI__builtin_wasm_max_f16x8:
2081920821
case WebAssembly::BI__builtin_wasm_max_f32x4:
2082020822
case WebAssembly::BI__builtin_wasm_max_f64x2: {
2082120823
Value *LHS = EmitScalarExpr(E->getArg(0));
@@ -20824,6 +20826,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2082420826
CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
2082520827
return Builder.CreateCall(Callee, {LHS, RHS});
2082620828
}
20829+
case WebAssembly::BI__builtin_wasm_pmin_f16x8:
2082720830
case WebAssembly::BI__builtin_wasm_pmin_f32x4:
2082820831
case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
2082920832
Value *LHS = EmitScalarExpr(E->getArg(0));
@@ -20832,6 +20835,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2083220835
CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
2083320836
return Builder.CreateCall(Callee, {LHS, RHS});
2083420837
}
20838+
case WebAssembly::BI__builtin_wasm_pmax_f16x8:
2083520839
case WebAssembly::BI__builtin_wasm_pmax_f32x4:
2083620840
case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
2083720841
Value *LHS = EmitScalarExpr(E->getArg(0));

clang/test/CodeGen/builtins-wasm.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,30 @@ float extract_lane_f16x8(f16x8 a, int i) {
825825
// WEBASSEMBLY-NEXT: ret float %0
826826
return __builtin_wasm_extract_lane_f16x8(a, i);
827827
}
828+
829+
f16x8 min_f16x8(f16x8 a, f16x8 b) {
830+
// WEBASSEMBLY: %0 = tail call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b)
831+
// WEBASSEMBLY-NEXT: ret <8 x half> %0
832+
return __builtin_wasm_min_f16x8(a, b);
833+
}
834+
835+
f16x8 max_f16x8(f16x8 a, f16x8 b) {
836+
// WEBASSEMBLY: %0 = tail call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b)
837+
// WEBASSEMBLY-NEXT: ret <8 x half> %0
838+
return __builtin_wasm_max_f16x8(a, b);
839+
}
840+
841+
f16x8 pmin_f16x8(f16x8 a, f16x8 b) {
842+
// WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.pmin.v8f16(<8 x half> %a, <8 x half> %b)
843+
// WEBASSEMBLY-NEXT: ret <8 x half> %0
844+
return __builtin_wasm_pmin_f16x8(a, b);
845+
}
846+
847+
f16x8 pmax_f16x8(f16x8 a, f16x8 b) {
848+
// WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.pmax.v8f16(<8 x half> %a, <8 x half> %b)
849+
// WEBASSEMBLY-NEXT: ret <8 x half> %0
850+
return __builtin_wasm_pmax_f16x8(a, b);
851+
}
828852
__externref_t externref_null() {
829853
return __builtin_wasm_ref_null_extern();
830854
// WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern()

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
145145
setTruncStoreAction(T, MVT::f16, Expand);
146146
}
147147

148+
if (Subtarget->hasHalfPrecision()) {
149+
setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
150+
setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
151+
}
152+
148153
// Expand unavailable integer operations.
149154
for (auto Op :
150155
{ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,33 +16,34 @@
1616
multiclass ABSTRACT_SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
1717
list<dag> pattern_r, string asmstr_r,
1818
string asmstr_s, bits<32> simdop,
19-
Predicate simd_level> {
19+
list<Predicate> reqs> {
2020
defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
2121
!if(!ge(simdop, 0x100),
2222
!or(0xfd0000, !and(0xffff, simdop)),
2323
!or(0xfd00, !and(0xff, simdop)))>,
24-
Requires<[simd_level]>;
24+
Requires<reqs>;
2525
}
2626

2727
multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
2828
list<dag> pattern_r, string asmstr_r = "",
29-
string asmstr_s = "", bits<32> simdop = -1> {
29+
string asmstr_s = "", bits<32> simdop = -1,
30+
list<Predicate> reqs = []> {
3031
defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
31-
asmstr_s, simdop, HasSIMD128>;
32+
asmstr_s, simdop, !listconcat([HasSIMD128], reqs)>;
3233
}
3334

3435
multiclass RELAXED_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
3536
list<dag> pattern_r, string asmstr_r = "",
3637
string asmstr_s = "", bits<32> simdop = -1> {
3738
defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
38-
asmstr_s, simdop, HasRelaxedSIMD>;
39+
asmstr_s, simdop, [HasRelaxedSIMD]>;
3940
}
4041

4142
multiclass HALF_PRECISION_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
4243
list<dag> pattern_r, string asmstr_r = "",
4344
string asmstr_s = "", bits<32> simdop = -1> {
4445
defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
45-
asmstr_s, simdop, HasHalfPrecision>;
46+
asmstr_s, simdop, [HasHalfPrecision]>;
4647
}
4748

4849

@@ -152,6 +153,19 @@ def F64x2 : Vec {
152153
let prefix = "f64x2";
153154
}
154155

156+
def F16x8 : Vec {
157+
let vt = v8f16;
158+
let int_vt = v8i16;
159+
let lane_vt = f32;
160+
let lane_rc = F32;
161+
let lane_bits = 16;
162+
let lane_idx = LaneIdx8;
163+
let lane_load = int_wasm_loadf16_f32;
164+
let splat = PatFrag<(ops node:$x), (v8f16 (splat_vector (f16 $x)))>;
165+
let prefix = "f16x8";
166+
}
167+
168+
// TODO: Include F16x8 here when half precision is better supported.
155169
defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2];
156170
defvar IntVecs = [I8x16, I16x8, I32x4, I64x2];
157171

@@ -781,13 +795,19 @@ def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
781795
// Bitwise operations
782796
//===----------------------------------------------------------------------===//
783797

784-
multiclass SIMDBinary<Vec vec, SDPatternOperator node, string name, bits<32> simdop> {
798+
multiclass SIMDBinary<Vec vec, SDPatternOperator node, string name,
799+
bits<32> simdop, list<Predicate> reqs = []> {
785800
defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
786801
(outs), (ins),
787802
[(set (vec.vt V128:$dst),
788803
(node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))],
789804
vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
790-
vec.prefix#"."#name, simdop>;
805+
vec.prefix#"."#name, simdop, reqs>;
806+
}
807+
808+
multiclass HalfPrecisionBinary<Vec vec, SDPatternOperator node, string name,
809+
bits<32> simdop> {
810+
defm "" : SIMDBinary<vec, node, name, simdop, [HasHalfPrecision]>;
791811
}
792812

793813
multiclass SIMDBitwise<SDPatternOperator node, string name, bits<32> simdop,
@@ -1199,6 +1219,7 @@ def : Pat<(v2f64 (froundeven (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>;
11991219
multiclass SIMDBinaryFP<SDPatternOperator node, string name, bits<32> baseInst> {
12001220
defm "" : SIMDBinary<F32x4, node, name, baseInst>;
12011221
defm "" : SIMDBinary<F64x2, node, name, !add(baseInst, 12)>;
1222+
defm "" : HalfPrecisionBinary<F16x8, node, name, !add(baseInst, 80)>;
12021223
}
12031224

12041225
// Addition: add
@@ -1242,7 +1263,7 @@ defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
12421263
// Also match the pmin/pmax cases where the operands are int vectors (but the
12431264
// comparison is still a floating point comparison). This can happen when using
12441265
// the wasm_simd128.h intrinsics because v128_t is an integer vector.
1245-
foreach vec = [F32x4, F64x2] in {
1266+
foreach vec = [F32x4, F64x2, F16x8] in {
12461267
defvar pmin = !cast<NI>("PMIN_"#vec);
12471268
defvar pmax = !cast<NI>("PMAX_"#vec);
12481269
def : Pat<(vec.int_vt (vselect
@@ -1266,6 +1287,10 @@ def : Pat<(v2f64 (int_wasm_pmin (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
12661287
(PMIN_F64x2 V128:$lhs, V128:$rhs)>;
12671288
def : Pat<(v2f64 (int_wasm_pmax (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
12681289
(PMAX_F64x2 V128:$lhs, V128:$rhs)>;
1290+
def : Pat<(v8f16 (int_wasm_pmin (v8f16 V128:$lhs), (v8f16 V128:$rhs))),
1291+
(PMIN_F16x8 V128:$lhs, V128:$rhs)>;
1292+
def : Pat<(v8f16 (int_wasm_pmax (v8f16 V128:$lhs), (v8f16 V128:$rhs))),
1293+
(PMAX_F16x8 V128:$lhs, V128:$rhs)>;
12691294

12701295
//===----------------------------------------------------------------------===//
12711296
// Conversions

llvm/test/CodeGen/WebAssembly/half-precision.ll

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,71 @@ define float @extract_lane_v8f16(<8 x half> %v) {
3535
%r = call float @llvm.wasm.extract.lane.f16x8(<8 x half> %v, i32 1)
3636
ret float %r
3737
}
38+
39+
; CHECK-LABEL: add_v8f16:
40+
; CHECK: f16x8.add $push0=, $0, $1
41+
; CHECK-NEXT: return $pop0
42+
define <8 x half> @add_v8f16(<8 x half> %a, <8 x half> %b) {
43+
%r = fadd <8 x half> %a, %b
44+
ret <8 x half> %r
45+
}
46+
47+
; CHECK-LABEL: sub_v8f16:
48+
; CHECK: f16x8.sub $push0=, $0, $1
49+
; CHECK-NEXT: return $pop0
50+
define <8 x half> @sub_v8f16(<8 x half> %a, <8 x half> %b) {
51+
%r = fsub <8 x half> %a, %b
52+
ret <8 x half> %r
53+
}
54+
55+
; CHECK-LABEL: mul_v8f16:
56+
; CHECK: f16x8.mul $push0=, $0, $1
57+
; CHECK-NEXT: return $pop0
58+
define <8 x half> @mul_v8f16(<8 x half> %a, <8 x half> %b) {
59+
%r = fmul <8 x half> %a, %b
60+
ret <8 x half> %r
61+
}
62+
63+
; CHECK-LABEL: div_v8f16:
64+
; CHECK: f16x8.div $push0=, $0, $1
65+
; CHECK-NEXT: return $pop0
66+
define <8 x half> @div_v8f16(<8 x half> %a, <8 x half> %b) {
67+
%r = fdiv <8 x half> %a, %b
68+
ret <8 x half> %r
69+
}
70+
71+
; CHECK-LABEL: min_intrinsic_v8f16:
72+
; CHECK: f16x8.min $push0=, $0, $1
73+
; CHECK-NEXT: return $pop0
74+
declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
75+
define <8 x half> @min_intrinsic_v8f16(<8 x half> %x, <8 x half> %y) {
76+
%a = call <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y)
77+
ret <8 x half> %a
78+
}
79+
80+
; CHECK-LABEL: max_intrinsic_v8f16:
81+
; CHECK: f16x8.max $push0=, $0, $1
82+
; CHECK-NEXT: return $pop0
83+
declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
84+
define <8 x half> @max_intrinsic_v8f16(<8 x half> %x, <8 x half> %y) {
85+
%a = call <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y)
86+
ret <8 x half> %a
87+
}
88+
89+
; CHECK-LABEL: pmin_intrinsic_v8f16:
90+
; CHECK: f16x8.pmin $push0=, $0, $1
91+
; CHECK-NEXT: return $pop0
92+
declare <8 x half> @llvm.wasm.pmin.v8f16(<8 x half>, <8 x half>)
93+
define <8 x half> @pmin_intrinsic_v8f16(<8 x half> %a, <8 x half> %b) {
94+
%v = call <8 x half> @llvm.wasm.pmin.v8f16(<8 x half> %a, <8 x half> %b)
95+
ret <8 x half> %v
96+
}
97+
98+
; CHECK-LABEL: pmax_intrinsic_v8f16:
99+
; CHECK: f16x8.pmax $push0=, $0, $1
100+
; CHECK-NEXT: return $pop0
101+
declare <8 x half> @llvm.wasm.pmax.v8f16(<8 x half>, <8 x half>)
102+
define <8 x half> @pmax_intrinsic_v8f16(<8 x half> %a, <8 x half> %b) {
103+
%v = call <8 x half> @llvm.wasm.pmax.v8f16(<8 x half> %a, <8 x half> %b)
104+
ret <8 x half> %v
105+
}

llvm/test/MC/WebAssembly/simd-encodings.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -851,4 +851,28 @@ main:
851851
# CHECK: f16x8.extract_lane 1 # encoding: [0xfd,0xa1,0x02,0x01]
852852
f16x8.extract_lane 1
853853

854+
# CHECK: f16x8.add # encoding: [0xfd,0xb4,0x02]
855+
f16x8.add
856+
857+
# CHECK: f16x8.sub # encoding: [0xfd,0xb5,0x02]
858+
f16x8.sub
859+
860+
# CHECK: f16x8.mul # encoding: [0xfd,0xb6,0x02]
861+
f16x8.mul
862+
863+
# CHECK: f16x8.div # encoding: [0xfd,0xb7,0x02]
864+
f16x8.div
865+
866+
# CHECK: f16x8.min # encoding: [0xfd,0xb8,0x02]
867+
f16x8.min
868+
869+
# CHECK: f16x8.max # encoding: [0xfd,0xb9,0x02]
870+
f16x8.max
871+
872+
# CHECK: f16x8.pmin # encoding: [0xfd,0xba,0x02]
873+
f16x8.pmin
874+
875+
# CHECK: f16x8.pmax # encoding: [0xfd,0xbb,0x02]
876+
f16x8.pmax
877+
854878
end_function

0 commit comments

Comments
 (0)