[WebAssembly] Codegen for pmin and pmax

tlively · tlively · commit 85157c007903 · 2021-07-23T14:49:21.000-07:00
Replace the clang builtins and LLVM intrinsics for {f32x4,f64x2}.{pmin,pmax} with standard codegen patterns. Since wasm_simd128.h uses an integer vector as the standard single vector type, the IR for the pmin and pmax intrinsic functions contains bitcasts that would not be there otherwise. Add extra codegen patterns that can still select the pmin and pmax instructions in the presence of these bitcasts. Differential Revision: https://reviews.llvm.org/D106612
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -144,12 +144,8 @@ TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128")
 
 TARGET_BUILTIN(__builtin_wasm_min_f32x4, "V4fV4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_max_f32x4, "V4fV4fV4f", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_pmin_f32x4, "V4fV4fV4f", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_pmax_f32x4, "V4fV4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128")
 
 TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17589,22 +17589,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
         CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
     return Builder.CreateCall(Callee, {LHS, RHS});
   }
-  case WebAssembly::BI__builtin_wasm_pmin_f32x4:
-  case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
-    Value *LHS = EmitScalarExpr(E->getArg(0));
-    Value *RHS = EmitScalarExpr(E->getArg(1));
-    Function *Callee =
-        CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
-    return Builder.CreateCall(Callee, {LHS, RHS});
-  }
-  case WebAssembly::BI__builtin_wasm_pmax_f32x4:
-  case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
-    Value *LHS = EmitScalarExpr(E->getArg(0));
-    Value *RHS = EmitScalarExpr(E->getArg(1));
-    Function *Callee =
-        CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
-    return Builder.CreateCall(Callee, {LHS, RHS});
-  }
   case WebAssembly::BI__builtin_wasm_ceil_f32x4:
   case WebAssembly::BI__builtin_wasm_floor_f32x4:
   case WebAssembly::BI__builtin_wasm_trunc_f32x4:
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
@@ -1150,12 +1150,14 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_max(v128_t __a,
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmin(v128_t __a,
                                                             v128_t __b) {
-  return (v128_t)__builtin_wasm_pmin_f32x4((__f32x4)__a, (__f32x4)__b);
+  __i32x4 __mask = (__i32x4)((__f32x4)__b < (__f32x4)__a);
+  return (v128_t)((((__i32x4)__b) & __mask) | (((__i32x4)__a) & ~__mask));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmax(v128_t __a,
                                                             v128_t __b) {
-  return (v128_t)__builtin_wasm_pmax_f32x4((__f32x4)__a, (__f32x4)__b);
+  __i32x4 __mask = (__i32x4)((__f32x4)__a < (__f32x4)__b);
+  return (v128_t)((((__i32x4)__b) & __mask) | (((__i32x4)__a) & ~__mask));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t __a) {
@@ -1218,12 +1220,14 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_max(v128_t __a,
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmin(v128_t __a,
                                                             v128_t __b) {
-  return (v128_t)__builtin_wasm_pmin_f64x2((__f64x2)__a, (__f64x2)__b);
+  __i64x2 __mask = (__i64x2)((__f64x2)__b < (__f64x2)__a);
+  return (v128_t)((((__i64x2)__b) & __mask) | (((__i64x2)__a) & ~__mask));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmax(v128_t __a,
                                                             v128_t __b) {
-  return (v128_t)__builtin_wasm_pmax_f64x2((__f64x2)__a, (__f64x2)__b);
+  __i64x2 __mask = (__i64x2)((__f64x2)__a < (__f64x2)__b);
+  return (v128_t)((((__i64x2)__b) & __mask) | (((__i64x2)__a) & ~__mask));
 }
 
 static __inline__ v128_t __DEFAULT_FN_ATTRS
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
@@ -590,20 +590,6 @@ f32x4 max_f32x4(f32x4 x, f32x4 y) {
   // WEBASSEMBLY-NEXT: ret
 }
 
-f32x4 pmin_f32x4(f32x4 x, f32x4 y) {
-  return __builtin_wasm_pmin_f32x4(x, y);
-  // WEBASSEMBLY: call <4 x float> @llvm.wasm.pmin.v4f32(
-  // WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y)
-  // WEBASSEMBLY-NEXT: ret
-}
-
-f32x4 pmax_f32x4(f32x4 x, f32x4 y) {
-  return __builtin_wasm_pmax_f32x4(x, y);
-  // WEBASSEMBLY: call <4 x float> @llvm.wasm.pmax.v4f32(
-  // WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y)
-  // WEBASSEMBLY-NEXT: ret
-}
-
 f64x2 min_f64x2(f64x2 x, f64x2 y) {
   return __builtin_wasm_min_f64x2(x, y);
   // WEBASSEMBLY: call <2 x double> @llvm.minimum.v2f64(
@@ -618,20 +604,6 @@ f64x2 max_f64x2(f64x2 x, f64x2 y) {
   // WEBASSEMBLY-NEXT: ret
 }
 
-f64x2 pmin_f64x2(f64x2 x, f64x2 y) {
-  return __builtin_wasm_pmin_f64x2(x, y);
-  // WEBASSEMBLY: call <2 x double> @llvm.wasm.pmin.v2f64(
-  // WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y)
-  // WEBASSEMBLY-NEXT: ret
-}
-
-f64x2 pmax_f64x2(f64x2 x, f64x2 y) {
-  return __builtin_wasm_pmax_f64x2(x, y);
-  // WEBASSEMBLY: call <2 x double> @llvm.wasm.pmax.v2f64(
-  // WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y)
-  // WEBASSEMBLY-NEXT: ret
-}
-
 f32x4 ceil_f32x4(f32x4 x) {
   return __builtin_wasm_ceil_f32x4(x);
   // WEBASSEMBLY: call <4 x float> @llvm.ceil.v4f32(<4 x float> %x)
diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c
@@ -2191,11 +2191,11 @@ v128_t test_f32x4_max(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_f32x4_pmin(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR6]]
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[CMP_I]], <4 x i32> [[B]], <4 x i32> [[A]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_f32x4_pmin(v128_t a, v128_t b) {
   return wasm_f32x4_pmin(a, b);
@@ -2205,9 +2205,9 @@ v128_t test_f32x4_pmin(v128_t a, v128_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR6]]
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[CMP_I]], <4 x i32> [[B]], <4 x i32> [[A]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 v128_t test_f32x4_pmax(v128_t a, v128_t b) {
   return wasm_f32x4_pmax(a, b);
@@ -2364,9 +2364,10 @@ v128_t test_f64x2_max(v128_t a, v128_t b) {
 
 // CHECK-LABEL: @test_f64x2_pmin(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[CMP_I]], <2 x double> [[TMP0]], <2 x double> [[TMP1]]
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@@ -2378,7 +2379,8 @@ v128_t test_f64x2_pmin(v128_t a, v128_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR6]]
+// CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[CMP_I]], <2 x double> [[TMP1]], <2 x double> [[TMP0]]
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -162,16 +162,6 @@ def int_wasm_q15mulr_sat_signed :
             [llvm_v8i16_ty, llvm_v8i16_ty],
             [IntrNoMem, IntrSpeculatable]>;
 
-// TODO: Replace these intrinsics with normal ISel patterns
-def int_wasm_pmin :
-  Intrinsic<[llvm_anyvector_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>],
-            [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_pmax :
-  Intrinsic<[llvm_anyvector_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>],
-            [IntrNoMem, IntrSpeculatable]>;
-
 def int_wasm_extmul_low_signed :
   Intrinsic<[llvm_anyvector_ty],
             [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1122,10 +1122,32 @@ defm MIN : SIMDBinaryFP<fminimum, "min", 232>;
 defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;
 
 // Pseudo-minimum: pmin
-defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>;
+def pmin : PatFrag<(ops node:$lhs, node:$rhs),
+                   (vselect (setolt $rhs, $lhs), $rhs, $lhs)>;
+defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>;
 
 // Pseudo-maximum: pmax
-defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>;
+def pmax : PatFrag<(ops node:$lhs, node:$rhs),
+                   (vselect (setolt $lhs, $rhs), $rhs, $lhs)>;
+defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
+
+// Also match the pmin/pmax cases where the operands are int vectors (but the
+// comparison is still a floating point comparison). This can happen when using
+// the wasm_simd128.h intrinsics because v128_t is an integer vector.
+foreach vec = [F32x4, F64x2] in {
+defvar pmin = !cast<NI>("PMIN_"#vec);
+defvar pmax = !cast<NI>("PMAX_"#vec);
+def : Pat<(vec.int_vt (vselect
+            (setolt (vec.vt (bitconvert V128:$rhs)),
+                    (vec.vt (bitconvert V128:$lhs))),
+            V128:$rhs, V128:$lhs)),
+          (pmin $lhs, $rhs)>;
+def : Pat<(vec.int_vt (vselect
+            (setolt (vec.vt (bitconvert V128:$lhs)),
+                    (vec.vt (bitconvert V128:$rhs))),
+            V128:$rhs, V128:$lhs)),
+          (pmax $lhs, $rhs)>;
+}
 
 //===----------------------------------------------------------------------===//
 // Conversions
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -1409,6 +1409,54 @@ define <4 x float> @max_const_intrinsic_v4f32() {
   ret <4 x float> %a
 }
 
+; CHECK-LABEL: pmin_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .functype pmin_v4f32 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f32x4.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) {
+  %c = fcmp olt <4 x float> %y, %x
+  %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+  ret <4 x float> %a
+}
+
+; CHECK-LABEL: pmin_int_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .functype pmin_int_v4f32 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f32x4.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
+  %fx = bitcast <4 x i32> %x to <4 x float>
+  %fy = bitcast <4 x i32> %y to <4 x float>
+  %c = fcmp olt <4 x float> %fy, %fx
+  %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %x
+  ret <4 x i32> %a
+}
+
+; CHECK-LABEL: pmax_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .functype pmax_v4f32 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f32x4.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) {
+  %c = fcmp olt <4 x float> %x, %y
+  %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+  ret <4 x float> %a
+}
+
+; CHECK-LABEL: pmax_int_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .functype pmax_int_v4f32 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f32x4.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
+  %fx = bitcast <4 x i32> %x to <4 x float>
+  %fy = bitcast <4 x i32> %y to <4 x float>
+  %c = fcmp olt <4 x float> %fx, %fy
+  %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %x
+  ret <4 x i32> %a
+}
+
 ; CHECK-LABEL: add_v4f32:
 ; NO-SIMD128-NOT: f32x4
 ; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}}
@@ -1585,6 +1633,54 @@ define <2 x double> @max_const_intrinsic_v2f64() {
   ret <2 x double> %a
 }
 
+; CHECK-LABEL: pmin_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype pmin_v2f64 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x double> @pmin_v2f64(<2 x double> %x, <2 x double> %y) {
+  %c = fcmp olt <2 x double> %y, %x
+  %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: pmin_int_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype pmin_int_v2f64 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @pmin_int_v2f64(<2 x i64> %x, <2 x i64> %y) {
+  %fx = bitcast <2 x i64> %x to <2 x double>
+  %fy = bitcast <2 x i64> %y to <2 x double>
+  %c = fcmp olt <2 x double> %fy, %fx
+  %a = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %x
+  ret <2 x i64> %a
+}
+
+; CHECK-LABEL: pmax_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype pmax_v2f64 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x double> @pmax_v2f64(<2 x double> %x, <2 x double> %y) {
+  %c = fcmp olt <2 x double> %x, %y
+  %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: pmax_int_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype pmax_int_v2f64 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @pmax_int_v2f64(<2 x i64> %x, <2 x i64> %y) {
+  %fx = bitcast <2 x i64> %x to <2 x double>
+  %fy = bitcast <2 x i64> %y to <2 x double>
+  %c = fcmp olt <2 x double> %fx, %fy
+  %a = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %x
+  ret <2 x i64> %a
+}
+
 ; CHECK-LABEL: add_v2f64:
 ; NO-SIMD128-NOT: f64x2
 ; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -685,26 +685,6 @@ define <4 x float> @bitselect_v4f32(<4 x float> %v1, <4 x float> %v2, <4 x float
   ret <4 x float> %a
 }
 
-; CHECK-LABEL: pmin_v4f32:
-; CHECK-NEXT: .functype pmin_v4f32 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: f32x4.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x float> @llvm.wasm.pmin.v4f32(<4 x float>, <4 x float>)
-define <4 x float> @pmin_v4f32(<4 x float> %a, <4 x float> %b) {
-  %v = call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> %a, <4 x float> %b)
-  ret <4 x float> %v
-}
-
-; CHECK-LABEL: pmax_v4f32:
-; CHECK-NEXT: .functype pmax_v4f32 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: f32x4.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <4 x float> @llvm.wasm.pmax.v4f32(<4 x float>, <4 x float>)
-define <4 x float> @pmax_v4f32(<4 x float> %a, <4 x float> %b) {
-  %v = call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> %a, <4 x float> %b)
-  ret <4 x float> %v
-}
-
 ; CHECK-LABEL: ceil_v4f32:
 ; CHECK-NEXT: .functype ceil_v4f32 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f32x4.ceil $push[[R:[0-9]+]]=, $0{{$}}
@@ -760,26 +740,6 @@ define <2 x double> @bitselect_v2f64(<2 x double> %v1, <2 x double> %v2, <2 x do
   ret <2 x double> %a
 }
 
-; CHECK-LABEL: pmin_v2f64:
-; CHECK-NEXT: .functype pmin_v2f64 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: f64x2.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.pmin.v2f64(<2 x double>, <2 x double>)
-define <2 x double> @pmin_v2f64(<2 x double> %a, <2 x double> %b) {
-  %v = call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> %a, <2 x double> %b)
-  ret <2 x double> %v
-}
-
-; CHECK-LABEL: pmax_v2f64:
-; CHECK-NEXT: .functype pmax_v2f64 (v128, v128) -> (v128){{$}}
-; CHECK-NEXT: f64x2.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.pmax.v2f64(<2 x double>, <2 x double>)
-define <2 x double> @pmax_v2f64(<2 x double> %a, <2 x double> %b) {
-  %v = call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> %a, <2 x double> %b)
-  ret <2 x double> %v
-}
-
 ; CHECK-LABEL: ceil_v2f64:
 ; CHECK-NEXT: .functype ceil_v2f64 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f64x2.ceil $push[[R:[0-9]+]]=, $0{{$}}